Skip to content

Commit

Permalink
[IMP] Harve: don't fetch all remotes at once
Browse files Browse the repository at this point in the history
Do it incrementally.
It is:
 - quicker
 - more space efficient

Also add logging to track progress.
  • Loading branch information
KangOl committed Feb 9, 2024
1 parent 242b559 commit 57a6dc1
Showing 1 changed file with 26 additions and 7 deletions.
33 changes: 26 additions & 7 deletions Harve.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ if [[ -z "$workdir" ]]; then
workdir="${XDG_CACHE_HOME:-${HOME}/.cache}/Harve"
fi

function log() {
printf '[%(%F %T)T] %s\n' -1 "$1";
}

mkdir -p "$workdir"
venv="${workdir}/.venv"

Expand All @@ -26,16 +30,19 @@ if [[ ! -d "$workdir/.git" ]]; then
fi

if [[ ! -d "$venv" ]]; then
log "creating virtualenv"
python3 -m venv "$venv"
"$venv/bin/pip" install -U pip
"$venv/bin/pip" install PyGithub
"$venv/bin/pip" --quiet --no-input install -U pip
"$venv/bin/pip" --quiet --no-input install PyGithub
fi

# Generate config file with remotes
log "get fork list"
"$venv/bin/python" > "$workdir/.git/config" <<EOP
#!/usr/bin/env python3
import os
import sys
from collections import defaultdict
from github import Github
Expand All @@ -46,9 +53,11 @@ REMOTE = """\
fetch = +refs/pull/*/head:refs/remotes/{remote}/pr/*
"""
GROUPS = defaultdict(list)
def forks(repo):
print(REMOTE.format(remote=repo.full_name, url=repo.ssh_url))
GROUPS[repo.full_name[0].lower()].append(repo.full_name)
if not repo.forks_count:
return
for frk in repo.get_forks():
Expand All @@ -60,19 +69,29 @@ def forks(repo):
# Token is required to get over the low rate-limit of unauthentified requests
gh = Github(os.getenv("GH_TOKEN"))
forks(gh.get_repo("odoo/odoo"))
print("[remotes]")
for group, remotes in GROUPS.items():
print(f" harve-{group} = {' '.join(remotes)}")
EOP

git="git -C $workdir/.git"

jobs=$(( ($(nproc) + 1) / 2))

# some repo may not be accessible, ignore errors
$git fetch --all --prune --quiet --no-auto-gc --multiple --jobs="$jobs" 2>/dev/null || true
groups=$($git config --local --name-only --get-regexp 'remotes\.' | cut -d. -f2 | sort | xargs)

rm -f "$workdir/.git/gc.log"
$git gc --quiet
for group in $groups; do
log "fetch group $group"
# some repo may not be accessible, ignore errors
$git fetch --prune --quiet --no-auto-gc --multiple --jobs="$jobs" "$group" 2>/dev/null || true
rm -f "$workdir/.git/gc.log"
$git gc --quiet
done;

: ${COMMIT:=4295585aff34ba9881ed7f64bce3481e3d217dcd}
: "${COMMIT:=4295585aff34ba9881ed7f64bce3481e3d217dcd}"
log "search commit $COMMIT"
set -x
# The search for the hash will return an error if not found. That the inverse of what we want.
$git branch --all --contains "${COMMIT}" || exit 0
Expand Down

0 comments on commit 57a6dc1

Please sign in to comment.