From c145b9e6b3c52055ba0f5677d62374c81de899f1 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <stolee@gmail.com>
Date: Sat, 7 Sep 2024 19:43:45 -0400
Subject: [PATCH 1/4] pack-objects: add --full-name-hash option

RFC NOTE: this is essentially the same as the patch introduced
independently of the RFC, but now is on top of the --path-walk option
instead. This is included in the RFC for comparison purposes.

RFC NOTE: As you can see from the details below, the --full-name-hash
option essentially attempts to do similar things as the --path-walk
option, but sometimes misses the mark. Collisions still happen with the
--full-name-hash option, leading to some misses. However, in cases where
the default name-hash algorithm has low collision rates and deltas are
actually desired across objects with similar names but different full
names, the --path-walk option can still take advantage of the default
name hash approach.

Here are the new performance details simulating a single push in an
internal monorepo using a lot of paths that collide in the default name
hash. We can see that --full-name-hash gets close to the --path-walk
option's size.

Test                                           this tree
--------------------------------------------------------------
5313.2: thin pack                              2.43(2.92+0.14)
5313.3: thin pack size                                    4.5M
5313.4: thin pack with --full-name-hash        0.31(0.49+0.12)
5313.5: thin pack size with --full-name-hash             15.5K
5313.6: thin pack with --path-walk             0.35(0.31+0.04)
5313.7: thin pack size with --path-walk                  14.2K

However, when simulating pushes on repositories that do not have issues
with name-hash collisions, the --full-name-hash option presents a
potential of worse delta calculations, such as this example using my
local Git repository:

Test                                           this tree
--------------------------------------------------------------
5313.2: thin pack                              0.03(0.01+0.01)
5313.3: thin pack size                                     475
5313.4: thin pack with --full-name-hash        0.02(0.01+0.01)
5313.5: thin pack size with --full-name-hash             14.8K
5313.6: thin pack with --path-walk             0.02(0.01+0.01)
5313.7: thin pack size with --path-walk                    475

Note that the path-walk option found the same delta bases as the default
options in this case.

In the full repack case, the --full-name-hash option may be preferable
because it interacts well with other advanced features, such as using
bitmap indexes and tracking delta islands.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
 builtin/pack-objects.c       | 20 +++++++++++++++-----
 builtin/repack.c             |  5 +++++
 pack-objects.h               | 20 ++++++++++++++++++++
 t/perf/p5313-pack-objects.sh | 26 ++++++++++++++++++++++++++
 4 files changed, 66 insertions(+), 5 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index d510c095d64eb8..094d253fa8d8d3 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -270,6 +270,14 @@ struct configured_exclusion {
 static struct oidmap configured_exclusions;
 
 static struct oidset excluded_by_config;
+static int use_full_name_hash;
+
+static inline uint32_t pack_name_hash_fn(const char *name)
+{
+	if (use_full_name_hash)
+		return pack_full_name_hash(name);
+	return pack_name_hash(name);
+}
 
 /*
  * stats
@@ -1674,7 +1682,7 @@ static int add_object_entry(const struct object_id *oid, enum object_type type,
 		return 0;
 	}
 
-	create_object_entry(oid, type, pack_name_hash(name),
+	create_object_entry(oid, type, pack_name_hash_fn(name),
 			    exclude, name && no_try_delta(name),
 			    found_pack, found_offset);
 	return 1;
@@ -1888,7 +1896,7 @@ static void add_preferred_base_object(const char *name)
 {
 	struct pbase_tree *it;
 	size_t cmplen;
-	unsigned hash = pack_name_hash(name);
+	unsigned hash = pack_name_hash_fn(name);
 
 	if (!num_preferred_base || check_pbase_path(hash))
 		return;
@@ -3405,7 +3413,7 @@ static void show_object_pack_hint(struct object *object, const char *name,
 	 * here using a now in order to perhaps improve the delta selection
 	 * process.
 	 */
-	oe->hash = pack_name_hash(name);
+	oe->hash = pack_name_hash_fn(name);
 	oe->no_try_delta = name && no_try_delta(name);
 
 	stdin_packs_hints_nr++;
@@ -3555,7 +3563,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
 	entry = packlist_find(&to_pack, oid);
 	if (entry) {
 		if (name) {
-			entry->hash = pack_name_hash(name);
+			entry->hash = pack_name_hash_fn(name);
 			entry->no_try_delta = no_try_delta(name);
 		}
 	} else {
@@ -3578,7 +3586,7 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type
 			return;
 		}
 
-		entry = create_object_entry(oid, type, pack_name_hash(name),
+		entry = create_object_entry(oid, type, pack_name_hash_fn(name),
 					    0, name && no_try_delta(name),
 					    pack, offset);
 	}
@@ -4525,6 +4533,8 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 		OPT_STRING_LIST(0, "uri-protocol", &uri_protocols,
 				N_("protocol"),
 				N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
+		OPT_BOOL(0, "full-name-hash", &use_full_name_hash,
+			 N_("optimize delta compression across identical path names over time")),
 		OPT_END(),
 	};
 
diff --git a/builtin/repack.c b/builtin/repack.c
index fe1454d32dd80a..575dfdab4e78dc 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -58,6 +58,7 @@ struct pack_objects_args {
 	int quiet;
 	int local;
 	int path_walk;
+	int full_name_hash;
 	struct list_objects_filter_options filter_options;
 };
 
@@ -291,6 +292,8 @@ static void prepare_pack_objects(struct child_process *cmd,
 		strvec_pushf(&cmd->args, "--no-reuse-object");
 	if (args->path_walk)
 		strvec_pushf(&cmd->args, "--path-walk");
+	if (args->full_name_hash)
+		strvec_pushf(&cmd->args, "--full-name-hash");
 	if (args->local)
 		strvec_push(&cmd->args,  "--local");
 	if (args->quiet)
@@ -1162,6 +1165,8 @@ int cmd_repack(int argc, const char **argv, const char *prefix)
 				N_("pass --no-reuse-object to git-pack-objects")),
 		OPT_BOOL(0, "path-walk", &po_args.path_walk,
 				N_("pass --path-walk to git-pack-objects")),
+		OPT_BOOL(0, "full-name-hash", &po_args.full_name_hash,
+				N_("pass --full-name-hash to git-pack-objects")),
 		OPT_NEGBIT('n', NULL, &run_update_server_info,
 				N_("do not run git-update-server-info"), 1),
 		OPT__QUIET(&po_args.quiet, N_("be quiet")),
diff --git a/pack-objects.h b/pack-objects.h
index b9898a4e64b8b4..50097552d03f20 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -207,6 +207,26 @@ static inline uint32_t pack_name_hash(const char *name)
 	return hash;
 }
 
+static inline uint32_t pack_full_name_hash(const char *name)
+{
+	const uint32_t bigp = 1234572167U;
+	uint32_t c, hash = bigp;
+
+	if (!name)
+		return 0;
+
+	/*
+	 * Just do the dumbest thing possible: add random multiples of a
+	 * large prime number with a binary shift. Goal is not cryptographic,
+	 * but generally uniformly distributed.
+	 */
+	while ((c = *name++) != 0) {
+		hash += c * bigp;
+		hash = (hash >> 5) | (hash << 27);
+	}
+	return hash;
+}
+
 static inline enum object_type oe_type(const struct object_entry *e)
 {
 	return e->type_valid ? e->type_ : OBJ_BAD;
diff --git a/t/perf/p5313-pack-objects.sh b/t/perf/p5313-pack-objects.sh
index 48fc05bb6c6ce3..b3b7fff8abf3bf 100755
--- a/t/perf/p5313-pack-objects.sh
+++ b/t/perf/p5313-pack-objects.sh
@@ -28,6 +28,14 @@ test_size 'thin pack size' '
 	wc -c <out
 '
 
+test_perf 'thin pack with --full-name-hash' '
+	git pack-objects --thin --stdout --revs --sparse --full-name-hash <in-thin >out
+'
+
+test_size 'thin pack size with --full-name-hash' '
+	wc -c <out
+'
+
 test_perf 'thin pack with --path-walk' '
 	git pack-objects --thin --stdout --revs --sparse --path-walk <in-thin >out
 '
@@ -44,6 +52,14 @@ test_size 'big recent pack size' '
 	wc -c <out
 '
 
+test_perf 'big recent pack with --full-name-hash' '
+	git pack-objects --stdout --revs --full-name-hash <in-big-recent >out
+'
+
+test_size 'big recent pack size with --full-name-hash' '
+	wc -c <out
+'
+
 test_perf 'big recent pack with --path-walk' '
 	git pack-objects --stdout --revs --path-walk <in-big-recent >out
 '
@@ -62,6 +78,16 @@ test_size 'full repack size' '
 		       sort -nr | head -n 1
 '
 
+test_perf 'full repack with --full-name-hash' '
+	git repack -adf --no-write-bitmap-index --full-name-hash
+'
+
+test_size 'full repack size with --full-name-hash' '
+	du -a .git/objects/pack | \
+	   awk "{ print \$1; }" | \
+		       sort -nr | head -n 1
+'
+
 test_perf 'full repack with --path-walk' '
 	git repack -adf --no-write-bitmap-index --path-walk
 '

From 72191a0cba8acce3b3b7ba0c6d704198a2c86689 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <stolee@gmail.com>
Date: Sun, 8 Sep 2024 19:30:57 -0400
Subject: [PATCH 2/4] test-name-hash: add helper to compute name-hash functions

Using this tool, we can count how many distinct name-hash values exist
within a list of paths. Examples include

 git ls-tree -r --name-only HEAD | \
	     test-tool name-hash | \
  	      awk "{print \$1;}" | \
  		 sort -ns | uniq | wc -l

which outputs the number of distinct name-hash values that appear at
HEAD. Or, the following which presents the resulting name-hash values of
maximum multiplicity:

 git ls-tree -r --name-only HEAD | \
	     test-tool name-hash | \
	      awk "{print \$1;}" | \
	       sort -n | uniq -c | sort -nr | head -n 25

For an internal monorepo with around a quarter million paths at HEAD,
the highest multiplicity for the standard name-hash function was 14,424
while the full name-hash algorithm had only seven hash values with any
collision, with a maximum multiplicity of two.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
 Makefile                  |  1 +
 t/helper/test-name-hash.c | 23 +++++++++++++++++++++++
 t/helper/test-tool.c      |  1 +
 t/helper/test-tool.h      |  1 +
 4 files changed, 26 insertions(+)
 create mode 100644 t/helper/test-name-hash.c

diff --git a/Makefile b/Makefile
index 076f7581541740..ef73d24a969efe 100644
--- a/Makefile
+++ b/Makefile
@@ -814,6 +814,7 @@ TEST_BUILTINS_OBJS += test-lazy-init-name-hash.o
 TEST_BUILTINS_OBJS += test-match-trees.o
 TEST_BUILTINS_OBJS += test-mergesort.o
 TEST_BUILTINS_OBJS += test-mktemp.o
+TEST_BUILTINS_OBJS += test-name-hash.o
 TEST_BUILTINS_OBJS += test-oid-array.o
 TEST_BUILTINS_OBJS += test-online-cpus.o
 TEST_BUILTINS_OBJS += test-pack-mtimes.o
diff --git a/t/helper/test-name-hash.c b/t/helper/test-name-hash.c
new file mode 100644
index 00000000000000..c82ccd7cefd860
--- /dev/null
+++ b/t/helper/test-name-hash.c
@@ -0,0 +1,23 @@
+/*
+ * test-name-hash.c: Read a list of paths over stdin and report on their
+ * name-hash and full name-hash.
+ */
+
+#include "test-tool.h"
+#include "git-compat-util.h"
+#include "pack-objects.h"
+#include "strbuf.h"
+
+int cmd__name_hash(int argc, const char **argv)
+{
+	struct strbuf line = STRBUF_INIT;
+
+	while (!strbuf_getline(&line, stdin)) {
+		uint32_t name_hash = pack_name_hash(line.buf);
+		uint32_t full_hash = pack_full_name_hash(line.buf);
+
+		printf("%10"PRIu32"\t%10"PRIu32"\t%s\n", name_hash, full_hash, line.buf);
+	}
+
+	return 0;
+}
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index af909330f0eab6..eeb10fae59cf8b 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -44,6 +44,7 @@ static struct test_cmd cmds[] = {
 	{ "match-trees", cmd__match_trees },
 	{ "mergesort", cmd__mergesort },
 	{ "mktemp", cmd__mktemp },
+	{ "name-hash", cmd__name_hash },
 	{ "oid-array", cmd__oid_array },
 	{ "online-cpus", cmd__online_cpus },
 	{ "pack-mtimes", cmd__pack_mtimes },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index 6b6a514323ff10..bd5f26d8a7c3a2 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -38,6 +38,7 @@ int cmd__lazy_init_name_hash(int argc, const char **argv);
 int cmd__match_trees(int argc, const char **argv);
 int cmd__mergesort(int argc, const char **argv);
 int cmd__mktemp(int argc, const char **argv);
+int cmd__name_hash(int argc, const char **argv);
 int cmd__online_cpus(int argc, const char **argv);
 int cmd__pack_mtimes(int argc, const char **argv);
 int cmd__parse_options(int argc, const char **argv);

From 5039f0348dc67ee31484002fa65ecd07f1a033b4 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <stolee@gmail.com>
Date: Sun, 8 Sep 2024 21:04:52 -0400
Subject: [PATCH 3/4] p5314: add a size test for name-hash collisions

This test helps inform someone as to the behavior of the name-hash
algorithms for their repo based on the paths at HEAD.

For example, the microsoft/fluentui repo had these statistics at time of
committing:

Test                                              this tree
-----------------------------------------------------------------
5314.1: paths at head                                       19.6K
5314.2: number of distinct name-hashes                       8.2K
5314.3: number of distinct full-name-hashes                 19.6K
5314.4: maximum multiplicity of name-hashes                   279
5314.5: maximum multiplicity of fullname-hashes                 1

That demonstrates that of the nearly twenty thousand path names, they
are assigned around eight thousand distinct values. 279 paths are
assigned to a single value, leading the packing algorithm to sort
objects from those paths together, by size.

In this repository, no collisions occur for the full-name-hash
algorithm.

In a more extreme example, an internal monorepo had a much worse
collision rate:

Test                                              this tree
-----------------------------------------------------------------
5314.1: paths at head                                      221.6K
5314.2: number of distinct name-hashes                      72.0K
5314.3: number of distinct full-name-hashes                221.6K
5314.4: maximum multiplicity of name-hashes                 14.4K
5314.5: maximum multiplicity of fullname-hashes                 2

Even in this repository with many more paths at HEAD, the collision rate
was low and the maximum number of paths being grouped into a single
bucket by the full-path-name algorithm was two.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
 t/perf/p5314-name-hash.sh | 41 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100755 t/perf/p5314-name-hash.sh

diff --git a/t/perf/p5314-name-hash.sh b/t/perf/p5314-name-hash.sh
new file mode 100755
index 00000000000000..9fe26612facc1d
--- /dev/null
+++ b/t/perf/p5314-name-hash.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+
+test_description='Tests pack performance using bitmaps'
+. ./perf-lib.sh
+
+GIT_TEST_PASSING_SANITIZE_LEAK=0
+export GIT_TEST_PASSING_SANITIZE_LEAK
+
+test_perf_large_repo
+
+test_size 'paths at head' '
+	git ls-tree -r --name-only HEAD >path-list &&
+	wc -l <path-list
+'
+
+test_size 'number of distinct name-hashes' '
+	cat path-list | test-tool name-hash >name-hashes &&
+	cat name-hashes | awk "{ print \$1; }" | sort -n | uniq -c >name-hash-count &&
+	wc -l <name-hash-count
+'
+
+test_size 'number of distinct full-name-hashes' '
+	cat name-hashes | awk "{ print \$2; }" | sort -n | uniq -c >full-name-hash-count &&
+	wc -l <full-name-hash-count
+'
+
+test_size 'maximum multiplicity of name-hashes' '
+	cat name-hash-count | \
+		sort -nr | \
+		head -n 1 | \
+		awk "{ print \$1; }"
+'
+
+test_size 'maximum multiplicity of fullname-hashes' '
+	cat full-name-hash-count | \
+		sort -nr | \
+		head -n 1 | \
+		awk "{ print \$1; }"
+'
+
+test_done

From 88fee5b4aefbbeaedfaa3c7e8ecdf5f148978a09 Mon Sep 17 00:00:00 2001
From: Derrick Stolee <stolee@gmail.com>
Date: Wed, 28 Aug 2024 12:06:43 -0400
Subject: [PATCH 4/4] pack-objects: output debug info about deltas

In order to debug what is going on during delta calculations, add a
--debug-file=<file> option to 'git pack-objects'. This leads to sending
a JSON-formatted description of the delta information to that file.

Signed-off-by: Derrick Stolee <stolee@gmail.com>
---
 builtin/pack-objects.c | 69 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index 094d253fa8d8d3..080b3f98bef32f 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -50,6 +50,9 @@
  */
 static struct packing_data to_pack;
 
+static FILE *delta_file;
+static int delta_file_nr;
+
 static inline struct object_entry *oe_delta(
 		const struct packing_data *pack,
 		const struct object_entry *e)
@@ -516,6 +519,14 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 	hdrlen = encode_in_pack_object_header(header, sizeof(header),
 					      type, size);
 
+	if (delta_file) {
+		if (delta_file_nr++)
+			fprintf(delta_file, ",\n");
+		fprintf(delta_file, "\t\t{\n");
+		fprintf(delta_file, "\t\t\t\"oid\" : \"%s\",\n", oid_to_hex(&entry->idx.oid));
+		fprintf(delta_file, "\t\t\t\"size\" : %"PRIuMAX",\n", datalen);
+	}
+
 	if (type == OBJ_OFS_DELTA) {
 		/*
 		 * Deltas with relative base contain an additional
@@ -536,6 +547,11 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 		hashwrite(f, header, hdrlen);
 		hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 		hdrlen += sizeof(dheader) - pos;
+		if (delta_file) {
+			fprintf(delta_file, "\t\t\t\"delta_type\" : \"OFS\",\n");
+			fprintf(delta_file, "\t\t\t\"offset\" : %"PRIuMAX",\n", ofs);
+			fprintf(delta_file, "\t\t\t\"delta_base\" : \"%s\",\n", oid_to_hex(&DELTA(entry)->idx.oid));
+		}
 	} else if (type == OBJ_REF_DELTA) {
 		/*
 		 * Deltas with a base reference contain
@@ -550,6 +566,10 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 		hashwrite(f, header, hdrlen);
 		hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 		hdrlen += hashsz;
+		if (delta_file) {
+			fprintf(delta_file, "\t\t\t\"delta_type\" : \"REF\",\n");
+			fprintf(delta_file, "\t\t\t\"delta_base\" : \"%s\",\n", oid_to_hex(&DELTA(entry)->idx.oid));
+		}
 	} else {
 		if (limit && hdrlen + datalen + hashsz >= limit) {
 			if (st)
@@ -559,6 +579,10 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 		}
 		hashwrite(f, header, hdrlen);
 	}
+
+	if (delta_file)
+		fprintf(delta_file, "\t\t\t\"reused\" : false\n\t\t}");
+
 	if (st) {
 		datalen = write_large_blob_data(st, f, &entry->idx.oid);
 		close_istream(st);
@@ -619,6 +643,14 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 		return write_no_reuse_object(f, entry, limit, usable_delta);
 	}
 
+	if (delta_file) {
+		if (delta_file_nr++)
+			fprintf(delta_file, ",\n");
+		fprintf(delta_file, "\t\t{\n");
+		fprintf(delta_file, "\t\t\t\"oid\" : \"%s\",\n", oid_to_hex(&entry->idx.oid));
+		fprintf(delta_file, "\t\t\t\"size\" : %"PRIuMAX",\n", entry_size);
+	}
+
 	if (type == OBJ_OFS_DELTA) {
 		off_t ofs = entry->idx.offset - DELTA(entry)->idx.offset;
 		unsigned pos = sizeof(dheader) - 1;
@@ -633,6 +665,12 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 		hashwrite(f, dheader + pos, sizeof(dheader) - pos);
 		hdrlen += sizeof(dheader) - pos;
 		reused_delta++;
+
+		if (delta_file) {
+			fprintf(delta_file, "\t\t\t\"delta_type\" : \"OFS\",\n");
+			fprintf(delta_file, "\t\t\t\"offset\" : %"PRIuMAX",\n", ofs);
+			fprintf(delta_file, "\t\t\t\"delta_base\" : \"%s\",\n", oid_to_hex(&DELTA(entry)->idx.oid));
+		}
 	} else if (type == OBJ_REF_DELTA) {
 		if (limit && hdrlen + hashsz + datalen + hashsz >= limit) {
 			unuse_pack(&w_curs);
@@ -642,6 +680,10 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 		hashwrite(f, DELTA(entry)->idx.oid.hash, hashsz);
 		hdrlen += hashsz;
 		reused_delta++;
+		if (delta_file) {
+			fprintf(delta_file, "\t\t\t\"delta_type\" : \"REF\",\n");
+			fprintf(delta_file, "\t\t\t\"delta_base\" : \"%s\",\n", oid_to_hex(&DELTA(entry)->idx.oid));
+		}
 	} else {
 		if (limit && hdrlen + datalen + hashsz >= limit) {
 			unuse_pack(&w_curs);
@@ -652,6 +694,10 @@ static off_t write_reuse_object(struct hashfile *f, struct object_entry *entry,
 	copy_pack_data(f, p, &w_curs, offset, datalen);
 	unuse_pack(&w_curs);
 	reused++;
+
+	if (delta_file)
+		fprintf(delta_file, "\t\t\t\"reused\" : true\n\t\t}");
+
 	return hdrlen + datalen;
 }
 
@@ -1264,6 +1310,11 @@ static void write_pack_file(void)
 	ALLOC_ARRAY(written_list, to_pack.nr_objects);
 	write_order = compute_write_order();
 
+	if (delta_file) {
+		fprintf(delta_file, "{\n\t\"num_objects\" : %"PRIu32",\n", to_pack.nr_objects);
+		fprintf(delta_file, "\t\"objects\" : [\n");
+	}
+
 	do {
 		unsigned char hash[GIT_MAX_RAWSZ];
 		char *pack_tmp_name = NULL;
@@ -1412,6 +1463,9 @@ static void write_pack_file(void)
 		    written, nr_result);
 	trace2_data_intmax("pack-objects", the_repository,
 			   "write_pack_file/wrote", nr_result);
+
+	if (delta_file)
+		fprintf(delta_file, "\n\t]\n}");
 }
 
 static int no_try_delta(const char *path)
@@ -4429,6 +4483,7 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	struct string_list keep_pack_list = STRING_LIST_INIT_NODUP;
 	struct list_objects_filter_options filter_options =
 		LIST_OBJECTS_FILTER_INIT;
+	const char *delta_file_name = NULL;
 
 	struct option pack_objects_options[] = {
 		OPT_CALLBACK_F('q', "quiet", &progress, NULL,
@@ -4535,6 +4590,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 				N_("exclude any configured uploadpack.blobpackfileuri with this protocol")),
 		OPT_BOOL(0, "full-name-hash", &use_full_name_hash,
 			 N_("optimize delta compression across identical path names over time")),
+		OPT_STRING(0, "delta-file", &delta_file_name,
+				N_("filename"),
+				N_("output delta compression details to the given file")),
 		OPT_END(),
 	};
 
@@ -4572,6 +4630,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	if (pack_to_stdout != !base_name || argc)
 		usage_with_options(pack_usage, pack_objects_options);
 
+	if (delta_file_name) {
+		delta_file = fopen(delta_file_name, "w");
+		if (!delta_file)
+			die_errno("failed to open '%s'", delta_file_name);
+		trace2_printf("opened '%s' for writing deltas", delta_file_name);
+	}
 	if (depth < 0)
 		depth = 0;
 	if (depth >= (1 << OE_DEPTH_BITS)) {
@@ -4795,5 +4859,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	list_objects_filter_release(&filter_options);
 	strvec_clear(&rp);
 
+	if (delta_file) {
+		fflush(delta_file);
+		fclose(delta_file);
+	}
+
 	return 0;
 }