From 03614efbd31b183d8675d6eabbec3527e7ef18ad Mon Sep 17 00:00:00 2001 From: lilydjwg Date: Wed, 12 Jun 2024 00:29:01 +0800 Subject: [PATCH] pkg-cleanup: optimize gen-update by using pygit2 and reading the history once --- pkg-cleanup/gen-update | 44 +++++++++++++++++++----------------------- pylib/gitutils.py | 29 ++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 24 deletions(-) create mode 100644 pylib/gitutils.py diff --git a/pkg-cleanup/gen-update b/pkg-cleanup/gen-update index 6ffe393..82946d1 100755 --- a/pkg-cleanup/gen-update +++ b/pkg-cleanup/gen-update @@ -1,44 +1,39 @@ #!/usr/bin/python3 +import sys, os import json from pathlib import Path import logging from collections import defaultdict from copy import deepcopy import time -import subprocess + +import pygit2 import yamlutils from lilac2 import lilacyaml import const +sys.path.append(os.path.join(os.path.dirname(__file__), '../pylib')) +import gitutils + logger = logging.getLogger(__name__) REPODIR = Path(const.REPODIR) lilac_mail = 'lilac@build.archlinuxcn.org' -def check_last_git_update(pkg): - cmd = [ - "git", "log", "--format=%at %ae", "--", pkg, - ] - - p = subprocess.Popen( - cmd, stdout=subprocess.PIPE, universal_newlines=True, - cwd = REPODIR, - ) - - try: - stdout = p.stdout - while True: - line = stdout.readline() - t, email = line.rstrip().split(None, 1) - if int(t) < time.time() - 30 * 86400: - return False - if email == lilac_mail: - continue - return True - finally: - p.terminate() +def get_last_git_update(): + repo = pygit2.Repository(REPODIR) + stop_time = time.time() - 30 * 86400 + package_last_update = defaultdict(int) + for commit in gitutils.iter_commits(repo, stop_time, lilac_mail): + if len(commit.parents) == 1: + diff = repo.diff(commit, commit.parents[0]) + pkgs = gitutils.get_touched_packages(diff) + for pkg in pkgs: + package_last_update[pkg] = max(package_last_update[pkg], commit.commit_time) + + return package_last_update def main(): with open('/home/lilydjwg/tmpfs/removed.json') as f: @@ -73,9 +68,10 @@ def main(): for m in old[name]: pkgs_to_remove[m].append(name) + package_last_update = get_last_git_update() for pkgs in pkgs_to_remove.values(): for pkg in pkgs[:]: - if check_last_git_update(pkg): + if package_last_update.get(('archlinuxcn', pkg)): pkgs.remove(pkg) with open('/home/lilydjwg/tmpfs/update.yaml', 'w') as f: diff --git a/pylib/gitutils.py b/pylib/gitutils.py new file mode 100644 index 0000000..30b22b4 --- /dev/null +++ b/pylib/gitutils.py @@ -0,0 +1,29 @@ +def iter_commits(repo, stop_time, lilac_mail): + commit_hash = repo.head.target + commit = repo[commit_hash] + stack = [commit] + seen_commits = set() + + while stack: + commit = stack.pop() + if commit.commit_time < stop_time: + continue + if commit.id.raw in seen_commits: + continue + stack.extend(commit.parents) + seen_commits.add(commit.id.raw) + if commit.author.email == lilac_mail: + continue + + yield commit + +def get_touched_packages(diff): + ret = set() + for d in diff.deltas: + for a in [d.old_file, d.new_file]: + parts = a.path.split('/', 2) + if len(parts) == 3: + r, pkgbase, _ = parts + ret.add((r, pkgbase)) + return ret +