Skip to content

Commit

Permalink
Merge branch 'pack-path-walk'
Browse files Browse the repository at this point in the history
Here is where I think the meat of the RFC really lies. There are still
some rough edges, but the data will show that 'git pack-objects
--path-walk' has the potential to be an extremely effective way to pack
objects. (Caveats will come later in the analysis section.)
  • Loading branch information
dscho committed Sep 15, 2024
2 parents 26b7bc1 + e43582c commit f172042
Show file tree
Hide file tree
Showing 12 changed files with 364 additions and 37 deletions.
8 changes: 8 additions & 0 deletions Documentation/config/pack.txt
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,14 @@ pack.useSparse::
commits contain certain types of direct renames. Default is
`true`.

pack.usePathWalk::
When true, git will default to using the '--path-walk' option in
'git pack-objects' when the '--revs' option is present. This
algorithm groups objects by path to maximize the ability to
compute delta chains across historical versions of the same
object. This may disable other options, such as using bitmaps to
enumerate objects.

pack.preferBitmapTips::
When selecting which commits will receive bitmaps, prefer a
commit at the tip of any reference that is a suffix of any value
Expand Down
209 changes: 176 additions & 33 deletions builtin/pack-objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,9 @@
#include "promisor-remote.h"
#include "pack-mtimes.h"
#include "parse-options.h"
#include "blob.h"
#include "tree.h"
#include "path-walk.h"

/*
* Objects we are going to pack are collected in the `to_pack` structure.
Expand Down Expand Up @@ -215,6 +218,7 @@ static int delta_search_threads;
static int pack_to_stdout;
static int sparse;
static int thin;
static int path_walk;
static int num_preferred_base;
static struct progress *progress_state;

Expand Down Expand Up @@ -3139,6 +3143,35 @@ static int add_ref_tag(const char *tag UNUSED, const struct object_id *oid,
return 0;
}

static int should_attempt_deltas(struct object_entry *entry)
{
if (DELTA(entry))
/* This happens if we decided to reuse existing
* delta from a pack. "reuse_delta &&" is implied.
*/
return 0;

if (!entry->type_valid || oe_size_less_than(&to_pack, entry, 50))
return 0;

if (entry->no_try_delta)
return 0;

if (!entry->preferred_base) {
if (oe_type(entry) < 0)
die(_("unable to get type of object %s"),
oid_to_hex(&entry->idx.oid));
} else if (oe_type(entry) < 0) {
/*
* This object is not found, but we
* don't have to include it anyway.
*/
return 0;
}

return 1;
}

static void prepare_pack(int window, int depth)
{
struct object_entry **delta_list;
Expand Down Expand Up @@ -3169,33 +3202,11 @@ static void prepare_pack(int window, int depth)
for (i = 0; i < to_pack.nr_objects; i++) {
struct object_entry *entry = to_pack.objects + i;

if (DELTA(entry))
/* This happens if we decided to reuse existing
* delta from a pack. "reuse_delta &&" is implied.
*/
continue;

if (!entry->type_valid ||
oe_size_less_than(&to_pack, entry, 50))
continue;

if (entry->no_try_delta)
if (!should_attempt_deltas(entry))
continue;

if (!entry->preferred_base) {
if (!entry->preferred_base)
nr_deltas++;
if (oe_type(entry) < 0)
die(_("unable to get type of object %s"),
oid_to_hex(&entry->idx.oid));
} else {
if (oe_type(entry) < 0) {
/*
* This object is not found, but we
* don't have to include it anyway.
*/
continue;
}
}

delta_list[n++] = entry;
}
Expand Down Expand Up @@ -4109,6 +4120,117 @@ static void mark_bitmap_preferred_tips(void)
}
}

static inline int is_oid_interesting(struct repository *repo,
struct object_id *oid,
enum object_type type)
{
if (type == OBJ_TAG) {
struct tag *t = lookup_tag(repo, oid);
return t && !(t->object.flags & UNINTERESTING);
}

if (type == OBJ_COMMIT) {
struct commit *c = lookup_commit(repo, oid);
return c && !(c->object.flags & UNINTERESTING);
}

if (type == OBJ_TREE) {
struct tree *t = lookup_tree(repo, oid);
return t && !(t->object.flags & UNINTERESTING);
}

if (type == OBJ_BLOB) {
struct blob *b = lookup_blob(repo, oid);
return b && !(b->object.flags & UNINTERESTING);
}

return 0;
}

static int add_objects_by_path(const char *path,
struct oid_array *oids,
enum object_type type,
void *data)
{
struct object_entry **delta_list;
size_t oe_start = to_pack.nr_objects;
size_t oe_end;
unsigned int sub_list_size;
unsigned int *processed = data;

/*
* First, add all objects to the packing data, including the ones
* marked UNINTERESTING (translated to 'exclude') as they can be
* used as delta bases.
*/
for (size_t i = 0; i < oids->nr; i++) {
struct object_id *oid = &oids->oid[i];
int exclude = !is_oid_interesting(the_repository, oid, type);
add_object_entry(oid, type, path, exclude);
}

oe_end = to_pack.nr_objects;

/* We can skip delta calculations if it is a no-op. */
if (oe_end == oe_start || !window)
return 0;

sub_list_size = 0;
ALLOC_ARRAY(delta_list, oe_end - oe_start);

for (size_t i = 0; i < oe_end - oe_start; i++) {
struct object_entry *entry = to_pack.objects + oe_start + i;

if (!should_attempt_deltas(entry))
continue;

delta_list[sub_list_size++] = entry;
}

/*
* Find delta bases among this list of objects that all match the same
* path. This causes the delta compression to be interleaved in the
* object walk, which can lead to confusing progress indicators. This is
* also incompatible with threaded delta calculations. In the future,
* consider creating a list of regions in the full to_pack.objects array
* that could be picked up by the threaded delta computation.
*/
if (sub_list_size && window) {
QSORT(delta_list, sub_list_size, type_size_sort);
find_deltas(delta_list, &sub_list_size, window, depth, processed);
}

free(delta_list);
return 0;
}

static void get_object_list_path_walk(struct rev_info *revs)
{
struct path_walk_info info = PATH_WALK_INFO_INIT;
unsigned int processed = 0;

info.revs = revs;

info.revs->tag_objects = 1;
info.tags = 1;
info.commits = 1;
info.trees = 1;
info.blobs = 1;
info.path_fn = add_objects_by_path;
info.path_fn_data = &processed;

/*
* Allow the --[no-]sparse option to be interesting here, if only
* for testing purposes. Paths with no interesting objects will not
* contribute to the resulting pack, but only create noisy preferred
* base objects.
*/
info.prune_all_uninteresting = sparse;

if (walk_objects_by_path(&info))
die(_("failed to pack objects via path-walk"));
}

static void get_object_list(struct rev_info *revs, int ac, const char **av)
{
struct setup_revision_opt s_r_opt = {
Expand Down Expand Up @@ -4155,7 +4277,7 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)

warn_on_object_refname_ambiguity = save_warning;

if (use_bitmap_index && !get_object_list_from_bitmap(revs))
if (use_bitmap_index && !path_walk && !get_object_list_from_bitmap(revs))
return;

if (use_delta_islands)
Expand All @@ -4164,15 +4286,19 @@ static void get_object_list(struct rev_info *revs, int ac, const char **av)
if (write_bitmap_index)
mark_bitmap_preferred_tips();

if (prepare_revision_walk(revs))
die(_("revision walk setup failed"));
mark_edges_uninteresting(revs, show_edge, sparse);

if (!fn_show_object)
fn_show_object = show_object;
traverse_commit_list(revs,
show_commit, fn_show_object,
NULL);

if (path_walk) {
get_object_list_path_walk(revs);
} else {
if (prepare_revision_walk(revs))
die(_("revision walk setup failed"));
mark_edges_uninteresting(revs, show_edge, sparse);
traverse_commit_list(revs,
show_commit, fn_show_object,
NULL);
}

if (unpack_unreachable_expiration) {
revs->ignore_missing_links = 1;
Expand Down Expand Up @@ -4367,6 +4493,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
N_("use the sparse reachability algorithm")),
OPT_BOOL(0, "thin", &thin,
N_("create thin packs")),
OPT_BOOL(0, "path-walk", &path_walk,
N_("use the path-walk API to walk objects when possible")),
OPT_BOOL(0, "shallow", &shallow,
N_("create packs suitable for shallow fetches")),
OPT_BOOL(0, "honor-pack-keep", &ignore_packed_keep_on_disk,
Expand Down Expand Up @@ -4405,11 +4533,14 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)

disable_replace_refs();

path_walk = git_env_bool("GIT_TEST_PACK_PATH_WALK", -1);
sparse = git_env_bool("GIT_TEST_PACK_SPARSE", -1);
if (the_repository->gitdir) {
prepare_repo_settings(the_repository);
if (sparse < 0)
sparse = the_repository->settings.pack_use_sparse;
if (path_walk < 0)
path_walk = the_repository->settings.pack_use_path_walk;
if (the_repository->settings.pack_use_multi_pack_reuse)
allow_pack_reuse = MULTI_PACK_REUSE;
}
Expand Down Expand Up @@ -4447,7 +4578,19 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
window = 0;

strvec_push(&rp, "pack-objects");
if (thin) {

if (path_walk && filter_options.choice) {
warning(_("cannot use --filter with --path-walk"));
path_walk = 0;
}
if (path_walk) {
strvec_push(&rp, "--boundary");
/*
* We must disable the bitmaps because we are removing
* the --objects / --objects-edge[-aggressive] options.
*/
use_bitmap_index = 0;
} else if (thin) {
use_internal_rev_list = 1;
strvec_push(&rp, shallow
? "--objects-edge-aggressive"
Expand Down
5 changes: 5 additions & 0 deletions builtin/repack.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ struct pack_objects_args {
int no_reuse_object;
int quiet;
int local;
int path_walk;
struct list_objects_filter_options filter_options;
};

Expand Down Expand Up @@ -288,6 +289,8 @@ static void prepare_pack_objects(struct child_process *cmd,
strvec_pushf(&cmd->args, "--no-reuse-delta");
if (args->no_reuse_object)
strvec_pushf(&cmd->args, "--no-reuse-object");
if (args->path_walk)
strvec_pushf(&cmd->args, "--path-walk");
if (args->local)
strvec_push(&cmd->args, "--local");
if (args->quiet)
Expand Down Expand Up @@ -1157,6 +1160,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
N_("pass --no-reuse-delta to git-pack-objects")),
OPT_BOOL('F', NULL, &po_args.no_reuse_object,
N_("pass --no-reuse-object to git-pack-objects")),
OPT_BOOL(0, "path-walk", &po_args.path_walk,
N_("pass --path-walk to git-pack-objects")),
OPT_NEGBIT('n', NULL, &run_update_server_info,
N_("do not run git-update-server-info"), 1),
OPT__QUIET(&po_args.quiet, N_("be quiet")),
Expand Down
Loading

0 comments on commit f172042

Please sign in to comment.