From f3fd0e8a7141ec68e6b20577e8669aeb8b42f6a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Tue, 24 Jun 2025 19:25:11 +0200 Subject: [PATCH 01/20] Apply whitespace fixes --- Documentation/gitprotocol-capabilities.adoc | 8 + Makefile | 139 ++++++------- alloc.c | 8 + alloc.h | 1 + bblob.c | 213 ++++++++++++++++++++ bblob.h | 25 +++ builtin/cat-file.c | 12 +- builtin/fsck.c | 6 +- builtin/repack.c | 38 +++- fsck.c | 111 +++++++++- fsck.h | 45 +++-- object-file.c | 48 +++-- object-store.c | 37 +++- object-store.h | 11 +- object.c | 68 ++++--- object.h | 6 +- serve.c | 4 + streaming.c | 35 +++- t/meson.build | 7 +- t/t1030-bblob.sh | 37 ++++ upload-pack.c | 1 + 21 files changed, 687 insertions(+), 173 deletions(-) create mode 100644 bblob.c create mode 100644 bblob.h create mode 100755 t/t1030-bblob.sh diff --git a/Documentation/gitprotocol-capabilities.adoc b/Documentation/gitprotocol-capabilities.adoc index 2cf7735be479e6..c46cbd28e69e1a 100644 --- a/Documentation/gitprotocol-capabilities.adoc +++ b/Documentation/gitprotocol-capabilities.adoc @@ -378,6 +378,14 @@ fetch-pack may send "filter" commands to request a partial clone or partial fetch and request that the server omit various objects from the packfile. +bblob +----- + +If the upload-pack server advertises the 'bblob' capability, it may +send packfiles containing bblob objects. Clients that do not +understand this object type must refrain from requesting the +capability. + session-id= ----------------------- diff --git a/Makefile b/Makefile index 70d1543b6b8688..fb4a12b76d3fe6 100644 --- a/Makefile +++ b/Makefile @@ -974,6 +974,7 @@ LIB_OBJS += base85.o LIB_OBJS += bisect.o LIB_OBJS += blame.o LIB_OBJS += blob.o +LIB_OBJS += bblob.o LIB_OBJS += bloom.o LIB_OBJS += branch.o LIB_OBJS += bulk-checkin.o @@ -1466,7 +1467,7 @@ GIT-SPATCH-DEFINES: FORCE if test x"$$FLAGS" != x"`cat GIT-SPATCH-DEFINES 2>/dev/null`" ; then \ echo >&2 " * new spatch flags"; \ echo "$$FLAGS" >GIT-SPATCH-DEFINES; \ - fi + fi include config.mak.uname -include config.mak.autogen @@ -1601,23 +1602,23 @@ ifneq (,$(SOCKLEN_T)) endif ifeq ($(uname_S),Darwin) - ifndef NO_FINK - ifeq ($(shell test -d /sw/lib && echo y),y) + ifndef NO_FINK + ifeq ($(shell test -d /sw/lib && echo y),y) BASIC_CFLAGS += -I/sw/include BASIC_LDFLAGS += -L/sw/lib - endif - endif - ifndef NO_DARWIN_PORTS - ifeq ($(shell test -d /opt/local/lib && echo y),y) + endif + endif + ifndef NO_DARWIN_PORTS + ifeq ($(shell test -d /opt/local/lib && echo y),y) BASIC_CFLAGS += -I/opt/local/include BASIC_LDFLAGS += -L/opt/local/lib - endif - endif - ifndef NO_APPLE_COMMON_CRYPTO + endif + endif + ifndef NO_APPLE_COMMON_CRYPTO NO_OPENSSL = YesPlease APPLE_COMMON_CRYPTO = YesPlease COMPAT_CFLAGS += -DAPPLE_COMMON_CRYPTO - endif + endif PTHREAD_LIBS = endif @@ -1656,23 +1657,23 @@ ifdef NO_CURL REMOTE_CURL_NAMES = EXCLUDED_PROGRAMS += git-http-fetch git-http-push else - ifdef CURLDIR + ifdef CURLDIR # Try "-Wl,-rpath=$(CURLDIR)/$(lib)" in such a case. CURL_CFLAGS = -I$(CURLDIR)/include CURL_LIBCURL = $(call libpath_template,$(CURLDIR)/$(lib)) - else + else CURL_CFLAGS = CURL_LIBCURL = - endif + endif - ifndef CURL_LDFLAGS + ifndef CURL_LDFLAGS CURL_LDFLAGS = $(eval CURL_LDFLAGS := $$(shell $$(CURL_CONFIG) --libs))$(CURL_LDFLAGS) - endif + endif CURL_LIBCURL += $(CURL_LDFLAGS) - ifndef CURL_CFLAGS + ifndef CURL_CFLAGS CURL_CFLAGS = $(eval CURL_CFLAGS := $$(shell $$(CURL_CONFIG) --cflags))$(CURL_CFLAGS) - endif + endif BASIC_CFLAGS += $(CURL_CFLAGS) REMOTE_CURL_PRIMARY = git-remote-http$X @@ -1680,58 +1681,58 @@ else REMOTE_CURL_NAMES = $(REMOTE_CURL_PRIMARY) $(REMOTE_CURL_ALIASES) PROGRAM_OBJS += http-fetch.o PROGRAMS += $(REMOTE_CURL_NAMES) - ifndef NO_EXPAT + ifndef NO_EXPAT PROGRAM_OBJS += http-push.o - endif + endif curl_check := $(shell (echo 072200; $(CURL_CONFIG) --vernum | sed -e '/^70[BC]/s/^/0/') 2>/dev/null | sort -r | sed -ne 2p) - ifeq "$(curl_check)" "072200" + ifeq "$(curl_check)" "072200" USE_CURL_FOR_IMAP_SEND = YesPlease - endif - ifdef USE_CURL_FOR_IMAP_SEND + endif + ifdef USE_CURL_FOR_IMAP_SEND BASIC_CFLAGS += -DUSE_CURL_FOR_IMAP_SEND IMAP_SEND_BUILDDEPS = http.o IMAP_SEND_LDFLAGS += $(CURL_LIBCURL) - endif - ifndef NO_EXPAT - ifdef EXPATDIR + endif + ifndef NO_EXPAT + ifdef EXPATDIR BASIC_CFLAGS += -I$(EXPATDIR)/include EXPAT_LIBEXPAT = $(call libpath_template,$(EXPATDIR)/$(lib)) -lexpat - else + else EXPAT_LIBEXPAT = -lexpat - endif - ifdef EXPAT_NEEDS_XMLPARSE_H + endif + ifdef EXPAT_NEEDS_XMLPARSE_H BASIC_CFLAGS += -DEXPAT_NEEDS_XMLPARSE_H - endif - endif + endif + endif endif IMAP_SEND_LDFLAGS += $(OPENSSL_LINK) $(OPENSSL_LIBSSL) $(LIB_4_CRYPTO) ifdef ZLIB_NG BASIC_CFLAGS += -DHAVE_ZLIB_NG - ifdef ZLIB_NG_PATH + ifdef ZLIB_NG_PATH BASIC_CFLAGS += -I$(ZLIB_NG_PATH)/include EXTLIBS += $(call libpath_template,$(ZLIB_NG_PATH)/$(lib)) - endif + endif EXTLIBS += -lz-ng else - ifdef ZLIB_PATH + ifdef ZLIB_PATH BASIC_CFLAGS += -I$(ZLIB_PATH)/include EXTLIBS += $(call libpath_template,$(ZLIB_PATH)/$(lib)) - endif + endif EXTLIBS += -lz endif ifndef NO_OPENSSL OPENSSL_LIBSSL = -lssl - ifdef OPENSSLDIR + ifdef OPENSSLDIR BASIC_CFLAGS += -I$(OPENSSLDIR)/include OPENSSL_LINK = $(call libpath_template,$(OPENSSLDIR)/$(lib)) - else + else OPENSSL_LINK = - endif - ifdef NEEDS_CRYPTO_WITH_SSL + endif + ifdef NEEDS_CRYPTO_WITH_SSL OPENSSL_LIBSSL += -lcrypto - endif + endif else BASIC_CFLAGS += -DNO_OPENSSL OPENSSL_LIBSSL = @@ -1749,18 +1750,18 @@ ifdef APPLE_COMMON_CRYPTO endif endif ifndef NO_ICONV - ifdef NEEDS_LIBICONV - ifdef ICONVDIR + ifdef NEEDS_LIBICONV + ifdef ICONVDIR BASIC_CFLAGS += -I$(ICONVDIR)/include ICONV_LINK = $(call libpath_template,$(ICONVDIR)/$(lib)) - else + else ICONV_LINK = - endif - ifdef NEEDS_LIBINTL_BEFORE_LIBICONV + endif + ifdef NEEDS_LIBINTL_BEFORE_LIBICONV ICONV_LINK += -lintl - endif + endif EXTLIBS += $(ICONV_LINK) -liconv - endif + endif endif ifdef ICONV_OMITS_BOM BASIC_CFLAGS += -DICONV_OMITS_BOM @@ -1880,10 +1881,10 @@ ifdef NO_MMAP COMPAT_CFLAGS += -DNO_MMAP COMPAT_OBJS += compat/mmap.o else - ifdef USE_WIN32_MMAP + ifdef USE_WIN32_MMAP COMPAT_CFLAGS += -DUSE_WIN32_MMAP COMPAT_OBJS += compat/win32mmap.o - endif + endif endif ifdef MMAP_PREVENTS_DELETE BASIC_CFLAGS += -DMMAP_PREVENTS_DELETE @@ -2008,11 +2009,11 @@ else BASIC_CFLAGS += -DSHA1_DC LIB_OBJS += sha1dc_git.o ifdef DC_SHA1_EXTERNAL - ifdef DC_SHA1_SUBMODULE - ifneq ($(DC_SHA1_SUBMODULE),auto) + ifdef DC_SHA1_SUBMODULE + ifneq ($(DC_SHA1_SUBMODULE),auto) $(error Only set DC_SHA1_EXTERNAL or DC_SHA1_SUBMODULE, not both) - endif - endif + endif + endif BASIC_CFLAGS += -DDC_SHA1_EXTERNAL EXTLIBS += -lsha1detectcoll else @@ -2218,26 +2219,26 @@ endif ifdef RUNTIME_PREFIX - ifdef HAVE_BSD_KERN_PROC_SYSCTL + ifdef HAVE_BSD_KERN_PROC_SYSCTL BASIC_CFLAGS += -DHAVE_BSD_KERN_PROC_SYSCTL - endif + endif - ifneq ($(PROCFS_EXECUTABLE_PATH),) + ifneq ($(PROCFS_EXECUTABLE_PATH),) pep_SQ = $(subst ','\'',$(PROCFS_EXECUTABLE_PATH)) BASIC_CFLAGS += '-DPROCFS_EXECUTABLE_PATH="$(pep_SQ)"' - endif + endif - ifdef HAVE_NS_GET_EXECUTABLE_PATH + ifdef HAVE_NS_GET_EXECUTABLE_PATH BASIC_CFLAGS += -DHAVE_NS_GET_EXECUTABLE_PATH - endif + endif - ifdef HAVE_ZOS_GET_EXECUTABLE_PATH + ifdef HAVE_ZOS_GET_EXECUTABLE_PATH BASIC_CFLAGS += -DHAVE_ZOS_GET_EXECUTABLE_PATH - endif + endif - ifdef HAVE_WPGMPTR + ifdef HAVE_WPGMPTR BASIC_CFLAGS += -DHAVE_WPGMPTR - endif + endif endif @@ -2595,7 +2596,7 @@ GIT-SCRIPT-DEFINES: FORCE if test x"$$FLAGS" != x"`cat $@ 2>/dev/null`" ; then \ echo >&2 " * new script parameters"; \ echo "$$FLAGS" >$@; \ - fi + fi $(SCRIPT_SH_GEN) $(SCRIPT_LIB) : % : %.sh generate-script.sh GIT-BUILD-OPTIONS GIT-SCRIPT-DEFINES $(QUIET_GEN)./generate-script.sh "$<" "$@+" ./GIT-BUILD-OPTIONS && \ @@ -2936,7 +2937,7 @@ Documentation/GIT-EXCLUDED-PROGRAMS: FORCE x"`cat Documentation/GIT-EXCLUDED-PROGRAMS 2>/dev/null`" ; then \ echo >&2 " * new documentation flags"; \ echo "$$EXCLUDED" >Documentation/GIT-EXCLUDED-PROGRAMS; \ - fi + fi .PHONY: doc man man-perl html info pdf doc: man-perl @@ -3172,7 +3173,7 @@ GIT-CFLAGS: FORCE if test x"$$FLAGS" != x"`cat GIT-CFLAGS 2>/dev/null`" ; then \ echo >&2 " * new build flags"; \ echo "$$FLAGS" >GIT-CFLAGS; \ - fi + fi TRACK_LDFLAGS = $(subst ','\'',$(ALL_LDFLAGS)) @@ -3181,7 +3182,7 @@ GIT-LDFLAGS: FORCE if test x"$$FLAGS" != x"`cat GIT-LDFLAGS 2>/dev/null`" ; then \ echo >&2 " * new link flags"; \ echo "$$FLAGS" >GIT-LDFLAGS; \ - fi + fi ifdef RUNTIME_PREFIX RUNTIME_PREFIX_OPTION = true @@ -3256,7 +3257,7 @@ GIT-PYTHON-VARS: FORCE if test x"$$VARS" != x"`cat $@ 2>/dev/null`" ; then \ echo >&2 " * new Python interpreter location"; \ echo "$$VARS" >$@; \ - fi + fi endif test_bindir_programs := $(patsubst %,bin-wrappers/%,$(BINDIR_PROGRAMS_NEED_X) $(BINDIR_PROGRAMS_NO_X) $(TEST_PROGRAMS_NEED_X)) @@ -3927,7 +3928,7 @@ GIT-TEST-SUITES: FORCE if test x"$$FLAGS" != x"`cat GIT-TEST-SUITES 2>/dev/null`" ; then \ echo >&2 " * new test suites"; \ echo "$$FLAGS" >GIT-TEST-SUITES; \ - fi + fi $(UNIT_TEST_DIR)/clar-decls.h: $(patsubst %,$(UNIT_TEST_DIR)/%.c,$(CLAR_TEST_SUITES)) $(UNIT_TEST_DIR)/generate-clar-decls.sh GIT-TEST-SUITES $(QUIET_GEN)$(SHELL_PATH) $(UNIT_TEST_DIR)/generate-clar-decls.sh "$@" $(filter %.c,$^) diff --git a/alloc.c b/alloc.c index 377e80f5dda2f8..bb87ead8e1ca43 100644 --- a/alloc.c +++ b/alloc.c @@ -11,6 +11,7 @@ #include "git-compat-util.h" #include "object.h" #include "blob.h" +#include "bblob.h" #include "tree.h" #include "commit.h" #include "repository.h" @@ -77,6 +78,13 @@ void *alloc_blob_node(struct repository *r) return b; } +void *alloc_bblob_node(struct repository *r) +{ + struct bblob *bb = alloc_node(r->parsed_objects->blob_state, sizeof(struct bblob)); + bb->object.type = OBJ_BBLOB; + return bb; +} + void *alloc_tree_node(struct repository *r) { struct tree *t = alloc_node(r->parsed_objects->tree_state, sizeof(struct tree)); diff --git a/alloc.h b/alloc.h index 3f4a0ad310a94b..662205812928f2 100644 --- a/alloc.h +++ b/alloc.h @@ -8,6 +8,7 @@ struct tag; struct repository; void *alloc_blob_node(struct repository *r); +void *alloc_bblob_node(struct repository *r); void *alloc_tree_node(struct repository *r); void init_commit_node(struct commit *c); void *alloc_commit_node(struct repository *r); diff --git a/bblob.c b/bblob.c new file mode 100644 index 00000000000000..4d2910495548bb --- /dev/null +++ b/bblob.c @@ -0,0 +1,213 @@ +#include "git-compat-util.h" +#include "bblob.h" +#include "alloc.h" +#include "object-file.h" +#include "hash.h" +#include "repository.h" +#include "object-store.h" +#include "streaming.h" + +extern int disable_bblob_conversion; + +const char *bblob_type = "bblob"; + +struct bblob *lookup_bblob(struct repository *r, const struct object_id *oid) +{ + struct object *obj = lookup_object(r, oid); + if (!obj) + return create_object(r, oid, alloc_bblob_node(r)); + return object_as_type(obj, OBJ_BBLOB, 0); +} + +void parse_bblob_buffer(struct bblob *item) +{ + item->object.parsed = 1; +} + +static int write_bblob_tree(struct repository *r, struct object_id *oids, + int nr, struct object_id *oid) +{ + size_t oidsz = r->hash_algo->rawsz; + if (nr <= BBLOB_FANOUT) { + size_t rawlen = oidsz * BBLOB_FANOUT; + void *raw = xcalloc(1, rawlen); + for (int i = 0; i < nr; i++) + memcpy((char *)raw + i * oidsz, oids[i].hash, oidsz); + int ret = write_object_file(raw, rawlen, OBJ_BBLOB, oid); + free(raw); + return ret; + } + + int groups = (nr + BBLOB_FANOUT - 1) / BBLOB_FANOUT; + struct object_id *tmp = xcalloc(groups, sizeof(*tmp)); + for (int i = 0; i < groups; i++) { + int this = nr - i * BBLOB_FANOUT; + if (this > BBLOB_FANOUT) + this = BBLOB_FANOUT; + if (write_bblob_tree(r, oids + i * BBLOB_FANOUT, this, &tmp[i])) { + free(tmp); + return -1; + } + } + int ret = write_bblob_tree(r, tmp, groups, oid); + free(tmp); + return ret; +} + +int write_bblob(struct repository *r, const void *buf, unsigned long len, + struct object_id *oid) +{ + size_t oids_alloc = 0, oids_nr = 0; + struct object_id *oids = NULL; + unsigned char window[64]; + size_t win_len = 0; + size_t chunk_start = 0; + + for (size_t i = 0; i < len; i++) { + window[win_len % 64] = ((const unsigned char *)buf)[i]; + if (win_len >= 63 && i - chunk_start + 1 >= BBLOB_CHUNK_GOAL) { + struct git_hash_ctx c; + unsigned char out[GIT_MAX_RAWSZ]; + r->hash_algo->init_fn(&c); + git_hash_update(&c, window, 64); + git_hash_final(out, &c); + unsigned short bits = + (out[r->hash_algo->rawsz - 2] << 8) | + out[r->hash_algo->rawsz - 1]; + if ((bits & 0x1fff) == 0) { + struct object_id ch; + disable_bblob_conversion++; + if (write_object_file((const char *)buf + chunk_start, + i - chunk_start + 1, + OBJ_BLOB, &ch)) { + disable_bblob_conversion--; + free(oids); + return -1; + } + disable_bblob_conversion--; + ALLOC_GROW(oids, oids_nr + 1, oids_alloc); + oidcpy(&oids[oids_nr++], &ch); + chunk_start = i + 1; + } + } + win_len++; + } + if (chunk_start < len) { + struct object_id ch; + disable_bblob_conversion++; + if (write_object_file((const char *)buf + chunk_start, + len - chunk_start, + OBJ_BLOB, &ch)) { + disable_bblob_conversion--; + free(oids); + return -1; + } + disable_bblob_conversion--; + ALLOC_GROW(oids, oids_nr + 1, oids_alloc); + oidcpy(&oids[oids_nr++], &ch); + } + + int ret = write_bblob_tree(r, oids, oids_nr, oid); + free(oids); + return ret; +} + +static void *read_raw(struct repository *r, const struct object_id *oid, + enum object_type *type, unsigned long *size) +{ + struct object_info oi = OBJECT_INFO_INIT; + void *data; + + oi.typep = type; + oi.sizep = size; + oi.contentp = &data; + if (oid_object_info_extended(r, oid, &oi, + OBJECT_INFO_DIE_IF_CORRUPT | OBJECT_INFO_LOOKUP_REPLACE)) + return NULL; + return data; +} + +static void *read_bblob_rec(struct repository *r, const struct object_id *oid, + unsigned long *size) +{ + enum object_type t; + unsigned long sz; + void *data = read_raw(r, oid, &t, &sz); + if (!data) + return NULL; + if (t == OBJ_BLOB) { + *size = sz; + return data; + } + if (t != OBJ_BBLOB) { + free(data); + return NULL; + } + + size_t oidsz = r->hash_algo->rawsz; + int cnt = sz / oidsz; + unsigned long out_sz = 0; + char *out = NULL; + for (int i = 0; i < cnt; i++) { + struct object_id child; + memset(&child, 0, sizeof(child)); + memcpy(child.hash, (char *)data + i * oidsz, oidsz); + if (is_null_oid(&child)) + continue; + unsigned long csz; + void *cbuf = read_bblob_rec(r, &child, &csz); + if (!cbuf) { + free(out); + free(data); + return NULL; + } + REALLOC_ARRAY(out, out_sz + csz); + memcpy(out + out_sz, cbuf, csz); + out_sz += csz; + free(cbuf); + } + free(data); + *size = out_sz; + return out; +} + +static unsigned long size_bblob_rec(struct repository *r, const struct object_id *oid) +{ + enum object_type t; + unsigned long sz; + void *data = read_raw(r, oid, &t, &sz); + if (!data) + return 0; + if (t == OBJ_BLOB) { + free(data); + return sz; + } + if (t != OBJ_BBLOB) { + free(data); + return 0; + } + size_t oidsz = r->hash_algo->rawsz; + int cnt = sz / oidsz; + unsigned long total = 0; + for (int i = 0; i < cnt; i++) { + struct object_id child; + memset(&child, 0, sizeof(child)); + memcpy(child.hash, (char *)data + i * oidsz, oidsz); + if (is_null_oid(&child)) + continue; + total += size_bblob_rec(r, &child); + } + free(data); + return total; +} + +void *read_bblob(struct repository *r, const struct object_id *oid, + unsigned long *size) +{ + return read_bblob_rec(r, oid, size); +} + +unsigned long bblob_size(struct repository *r, const struct object_id *oid) +{ + return size_bblob_rec(r, oid); +} diff --git a/bblob.h b/bblob.h new file mode 100644 index 00000000000000..370f6461c62205 --- /dev/null +++ b/bblob.h @@ -0,0 +1,25 @@ +#ifndef BBLOB_H +#define BBLOB_H + +#include "object.h" + +/* Number of child entries in each bblob node */ +#define BBLOB_FANOUT 64 + +/* heuristic target chunk size when splitting large blobs */ +#define BBLOB_CHUNK_GOAL 4096 + +struct bblob { + struct object object; + struct object_id oids[BBLOB_FANOUT]; +}; + +struct bblob *lookup_bblob(struct repository *r, const struct object_id *oid); +void parse_bblob_buffer(struct bblob *item); +int write_bblob(struct repository *r, const void *buf, unsigned long len, + struct object_id *oid); +void *read_bblob(struct repository *r, const struct object_id *oid, + unsigned long *size); +unsigned long bblob_size(struct repository *r, const struct object_id *oid); + +#endif /* BBLOB_H */ diff --git a/builtin/cat-file.c b/builtin/cat-file.c index 67a5ff2b9ebd29..3bf27cb315aa66 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -25,6 +25,7 @@ #include "object-file.h" #include "object-name.h" #include "object-store.h" +#include "bblob.h" #include "replace-object.h" #include "promisor-remote.h" #include "mailmap.h" @@ -149,7 +150,16 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name) if (oid_object_info_extended(the_repository, &oid, &oi, flags) < 0) die("git cat-file: could not get object info"); - if (use_mailmap && (type == OBJ_COMMIT || type == OBJ_TAG)) { + if (!oi.typep) { + struct object_info ti = OBJECT_INFO_INIT; + ti.typep = &type; + if (oid_object_info_extended(the_repository, &oid, &ti, flags) < 0) + die("git cat-file: could not get object info"); + } + + if (type == OBJ_BBLOB) + size = bblob_size(the_repository, &oid); + else if (use_mailmap && (type == OBJ_COMMIT || type == OBJ_TAG)) { size_t s = size; buf = replace_idents_using_mailmap(buf, &s); size = cast_size_t_to_ulong(s); diff --git a/builtin/fsck.c b/builtin/fsck.c index e7d96a9c8ea586..81f9bd9535c0d8 100644 --- a/builtin/fsck.c +++ b/builtin/fsck.c @@ -946,8 +946,10 @@ static struct option fsck_opts[] = { N_("write dangling objects in .git/lost-found")), OPT_BOOL(0, "progress", &show_progress, N_("show progress")), OPT_BOOL(0, "name-objects", &name_objects, N_("show verbose names for reachable objects")), - OPT_BOOL(0, "references", &check_references, N_("check reference database consistency")), - OPT_END(), + OPT_BOOL(0, "references", &check_references, N_("check reference database consistency")), + OPT_BOOL(0, "full-bblob-verify", &fsck_obj_options.full_bblob_verify, + N_("reassemble bblobs to verify contents")), + OPT_END(), }; int cmd_fsck(int argc, diff --git a/builtin/repack.c b/builtin/repack.c index 59214dbdfdfcd5..908f709e69342d 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -18,6 +18,7 @@ #include "packfile.h" #include "prune-packed.h" #include "object-store.h" +#include "bblob.h" #include "promisor-remote.h" #include "shallow.h" #include "pack.h" @@ -38,8 +39,30 @@ static int pack_kept_objects = -1; static int write_bitmaps = -1; static int use_delta_islands; static int run_update_server_info = 1; +static int convert_to_bblob; static char *packdir, *packtmp_name, *packtmp; +static int convert_one_loose(const struct object_id *oid, const char *path, + void *data) +{ + enum object_type type; + unsigned long size; + void *buf = repo_read_object_file(the_repository, oid, &type, &size); + if (!buf) + return 0; + if (type == OBJ_BLOB) { + struct object_id new_oid; + write_bblob(the_repository, buf, size, &new_oid); + } + free(buf); + return 0; +} + +static void convert_all_blobs_to_bblob(void) +{ + for_each_loose_object(convert_one_loose, NULL, 0); +} + static const char *const git_repack_usage[] = { N_("git repack [-a] [-A] [-d] [-f] [-F] [-l] [-n] [-q] [-b] [-m]\n" "[--window=] [--depth=] [--threads=] [--keep-pack=]\n" @@ -1207,9 +1230,11 @@ int cmd_repack(int argc, N_("limits the maximum number of threads")), OPT_UNSIGNED(0, "max-pack-size", &po_args.max_pack_size, N_("maximum size of each packfile")), - OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options), - OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects, - N_("repack objects in packs marked with .keep")), + OPT_PARSE_LIST_OBJECTS_FILTER(&po_args.filter_options), + OPT_BOOL(0, "convert-to-bblob", &convert_to_bblob, + N_("rewrite all blobs as bblobs")), + OPT_BOOL(0, "pack-kept-objects", &pack_kept_objects, + N_("repack objects in packs marked with .keep")), OPT_STRING_LIST(0, "keep-pack", &keep_pack_list, N_("name"), N_("do not repack this pack")), OPT_INTEGER('g', "geometric", &geometry.split_factor, @@ -1232,8 +1257,11 @@ int cmd_repack(int argc, po_args.window = xstrdup_or_null(opt_window); po_args.window_memory = xstrdup_or_null(opt_window_memory); - po_args.depth = xstrdup_or_null(opt_depth); - po_args.threads = xstrdup_or_null(opt_threads); + po_args.depth = xstrdup_or_null(opt_depth); + po_args.threads = xstrdup_or_null(opt_threads); + + if (convert_to_bblob) + convert_all_blobs_to_bblob(); if (delete_redundant && repository_format_precious_objects) die(_("cannot delete packs in a precious-objects repo")); diff --git a/fsck.c b/fsck.c index 8dc8472ceb3781..81eb3dd9382114 100644 --- a/fsck.c +++ b/fsck.c @@ -10,6 +10,7 @@ #include "object.h" #include "attr.h" #include "blob.h" +#include "bblob.h" #include "tree.h" #include "tree-walk.h" #include "commit.h" @@ -480,6 +481,45 @@ static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *optio return options->walk(tag->tagged, OBJ_ANY, data, options); } +static int fsck_walk_bblob(struct bblob *bb, void *data, + struct fsck_options *options) +{ + enum object_type type; + unsigned long size; + void *buf; + int res = 0; + const char *name = fsck_get_object_name(options, &bb->object.oid); + + buf = repo_read_raw_object_file(the_repository, &bb->object.oid, &type, &size); + if (!buf || type != OBJ_BBLOB) + return -1; + + size_t oidsz = the_repository->hash_algo->rawsz; + int cnt = size / oidsz; + for (int i = 0; i < cnt; i++) { + struct object_id child; + memset(&child, 0, sizeof(child)); + memcpy(child.hash, (char *)buf + i * oidsz, oidsz); + if (is_null_oid(&child)) + continue; + enum object_type ct = oid_object_info(the_repository, &child, NULL); + if (ct <= 0) + continue; + struct object *obj = lookup_object_by_type(the_repository, &child, ct); + if (name && obj) + fsck_put_object_name(options, &child, "%s#%d", name, i); + int result = options->walk(obj, ct, data, options); + if (result < 0) { + free(buf); + return result; + } + if (!res) + res = result; + } + free(buf); + return res; +} + int fsck_walk(struct object *obj, void *data, struct fsck_options *options) { if (!obj) @@ -488,11 +528,13 @@ int fsck_walk(struct object *obj, void *data, struct fsck_options *options) if (obj->type == OBJ_NONE) parse_object(the_repository, &obj->oid); - switch (obj->type) { - case OBJ_BLOB: - return 0; - case OBJ_TREE: - return fsck_walk_tree((struct tree *)obj, data, options); + switch (obj->type) { + case OBJ_BLOB: + return 0; + case OBJ_BBLOB: + return fsck_walk_bblob((struct bblob *)obj, data, options); + case OBJ_TREE: + return fsck_walk_tree((struct tree *)obj, data, options); case OBJ_COMMIT: return fsck_walk_commit((struct commit *)obj, data, options); case OBJ_TAG: @@ -1202,7 +1244,52 @@ static int fsck_blob(const struct object_id *oid, const char *buf, } } - return ret; + return ret; +} + +static int fsck_bblob(const struct object_id *oid, const char *buf, + unsigned long size, struct fsck_options *options) +{ + int ret = 0; + size_t oidsz = the_repository->hash_algo->rawsz; + + if (size != oidsz * BBLOB_FANOUT) + ret |= report(options, oid, OBJ_BBLOB, FSCK_MSG_BAD_TYPE, + "invalid bblob size"); + + for (int i = 0; i < BBLOB_FANOUT && i * oidsz < size; i++) { + struct object_id child; + memset(&child, 0, sizeof(child)); + memcpy(child.hash, buf + i * oidsz, oidsz); + if (is_null_oid(&child)) + continue; + + enum object_type t = oid_object_info(the_repository, &child, NULL); + if (t <= 0) { + ret |= report(options, oid, OBJ_BBLOB, + FSCK_MSG_BAD_OBJECT_SHA1, + "missing child object"); + continue; + } + if (t != OBJ_BLOB && t != OBJ_BBLOB) + ret |= report(options, oid, OBJ_BBLOB, FSCK_MSG_BAD_TYPE, + "child has invalid type"); + else if (t == OBJ_BBLOB) { + unsigned long csz; + void *cbuf = repo_read_raw_object_file(the_repository, + &child, &t, &csz); + if (!cbuf) { + ret |= report(options, oid, OBJ_BBLOB, + FSCK_MSG_BAD_OBJECT_SHA1, + "cannot read child"); + continue; + } + ret |= fsck_bblob(&child, cbuf, csz, options); + free(cbuf); + } + } + + return ret; } int fsck_object(struct object *obj, void *data, unsigned long size, @@ -1218,10 +1305,12 @@ int fsck_buffer(const struct object_id *oid, enum object_type type, const void *data, unsigned long size, struct fsck_options *options) { - if (type == OBJ_BLOB) - return fsck_blob(oid, data, size, options); - if (type == OBJ_TREE) - return fsck_tree(oid, data, size, options); + if (type == OBJ_BLOB) + return fsck_blob(oid, data, size, options); + if (type == OBJ_BBLOB) + return fsck_bblob(oid, data, size, options); + if (type == OBJ_TREE) + return fsck_tree(oid, data, size, options); if (type == OBJ_COMMIT) return fsck_commit(oid, data, size, options); if (type == OBJ_TAG) @@ -1293,7 +1382,7 @@ static int fsck_blobs(struct oidset *blobs_found, struct oidset *blobs_done, if (oidset_contains(blobs_done, oid)) continue; - buf = repo_read_object_file(the_repository, oid, &type, &size); + buf = repo_read_raw_object_file(the_repository, oid, &type, &size); if (!buf) { if (is_promisor_object(the_repository, oid)) continue; diff --git a/fsck.h b/fsck.h index 0c5869ac34e216..fc744b5abedb7b 100644 --- a/fsck.h +++ b/fsck.h @@ -174,35 +174,40 @@ struct fsck_options { struct oidset gitmodules_done; struct oidset gitattributes_found; struct oidset gitattributes_done; - kh_oid_map_t *object_names; + kh_oid_map_t *object_names; + int full_bblob_verify; }; #define FSCK_OPTIONS_DEFAULT { \ - .skip_oids = OIDSET_INIT, \ - .gitmodules_found = OIDSET_INIT, \ - .gitmodules_done = OIDSET_INIT, \ - .gitattributes_found = OIDSET_INIT, \ - .gitattributes_done = OIDSET_INIT, \ - .error_func = fsck_objects_error_function \ + .skip_oids = OIDSET_INIT, \ + .gitmodules_found = OIDSET_INIT, \ + .gitmodules_done = OIDSET_INIT, \ + .gitattributes_found = OIDSET_INIT, \ + .gitattributes_done = OIDSET_INIT, \ + .error_func = fsck_objects_error_function, \ + .full_bblob_verify = 0 \ } #define FSCK_OPTIONS_STRICT { \ - .strict = 1, \ - .gitmodules_found = OIDSET_INIT, \ - .gitmodules_done = OIDSET_INIT, \ - .gitattributes_found = OIDSET_INIT, \ - .gitattributes_done = OIDSET_INIT, \ - .error_func = fsck_objects_error_function, \ + .strict = 1, \ + .gitmodules_found = OIDSET_INIT, \ + .gitmodules_done = OIDSET_INIT, \ + .gitattributes_found = OIDSET_INIT, \ + .gitattributes_done = OIDSET_INIT, \ + .error_func = fsck_objects_error_function, \ + .full_bblob_verify = 0, \ } #define FSCK_OPTIONS_MISSING_GITMODULES { \ - .strict = 1, \ - .gitmodules_found = OIDSET_INIT, \ - .gitmodules_done = OIDSET_INIT, \ - .gitattributes_found = OIDSET_INIT, \ - .gitattributes_done = OIDSET_INIT, \ - .error_func = fsck_objects_error_cb_print_missing_gitmodules, \ + .strict = 1, \ + .gitmodules_found = OIDSET_INIT, \ + .gitmodules_done = OIDSET_INIT, \ + .gitattributes_found = OIDSET_INIT, \ + .gitattributes_done = OIDSET_INIT, \ + .error_func = fsck_objects_error_cb_print_missing_gitmodules, \ + .full_bblob_verify = 0, \ } #define FSCK_REFS_OPTIONS_DEFAULT { \ - .error_func = fsck_refs_error_function, \ + .error_func = fsck_refs_error_function, \ + .full_bblob_verify = 0, \ } /* descend in all linked child objects diff --git a/object-file.c b/object-file.c index 1ac04c2891634a..288f3dcebf11a7 100644 --- a/object-file.c +++ b/object-file.c @@ -21,6 +21,7 @@ #include "loose.h" #include "object-file-convert.h" #include "object-file.h" +#include "bblob.h" #include "object-store.h" #include "oidtree.h" #include "pack.h" @@ -774,8 +775,8 @@ static int start_loose_object_common(struct strbuf *tmp_file, char *hdr, int hdrlen) { struct repository *repo = the_repository; - const struct git_hash_algo *algo = repo->hash_algo; - const struct git_hash_algo *compat = repo->compat_hash_algo; + const struct git_hash_algo *algo = repo->hash_algo; + const struct git_hash_algo *compat = repo->compat_hash_algo; int fd; fd = create_tmpfile(tmp_file, filename); @@ -1050,9 +1051,11 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, return err; } +int disable_bblob_conversion; + int write_object_file_flags(const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, unsigned flags) + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, unsigned flags) { struct repository *repo = the_repository; const struct git_hash_algo *algo = repo->hash_algo; @@ -1061,21 +1064,30 @@ int write_object_file_flags(const void *buf, unsigned long len, char hdr[MAX_HEADER_LEN]; int hdrlen = sizeof(hdr); - /* Generate compat_oid */ - if (compat) { - if (compat_oid_in) - oidcpy(&compat_oid, compat_oid_in); - else if (type == OBJ_BLOB) - hash_object_file(compat, buf, len, type, &compat_oid); - else { - struct strbuf converted = STRBUF_INIT; - convert_object_file(the_repository, &converted, algo, compat, + /* Generate compat_oid */ + if (compat) { + if (compat_oid_in) + oidcpy(&compat_oid, compat_oid_in); + else if (type == OBJ_BLOB) + hash_object_file(compat, buf, len, OBJ_BLOB, &compat_oid); + else { + struct strbuf converted = STRBUF_INIT; + convert_object_file(the_repository, &converted, algo, compat, buf, len, type, 0); - hash_object_file(compat, converted.buf, converted.len, - type, &compat_oid); - strbuf_release(&converted); - } - } + hash_object_file(compat, converted.buf, converted.len, + type, &compat_oid); + strbuf_release(&converted); + } + } + + if (!disable_bblob_conversion && type == OBJ_BLOB && len > BBLOB_CHUNK_GOAL) { + if (write_bblob(repo, buf, len, oid)) + return -1; + type = OBJ_BBLOB; + if (compat) + return repo_add_loose_object_map(repo, oid, &compat_oid); + return 0; + } /* Normally if we have it in the pack then we do not bother writing * it out into .git/objects/??/?{38} file. diff --git a/object-store.c b/object-store.c index 58cde0313a5533..692a27e62b053d 100644 --- a/object-store.c +++ b/object-store.c @@ -13,6 +13,7 @@ #include "loose.h" #include "object-file-convert.h" #include "object-file.h" +#include "bblob.h" #include "object-store.h" #include "packfile.h" #include "path.h" @@ -859,22 +860,40 @@ int pretend_object_file(struct repository *repo, * deal with them should arrange to call oid_object_info_extended() and give * error messages themselves. */ -void *repo_read_object_file(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size) +void *repo_read_raw_object_file(struct repository *r, + const struct object_id *oid, + enum object_type *type, + unsigned long *size) { struct object_info oi = OBJECT_INFO_INIT; unsigned flags = OBJECT_INFO_DIE_IF_CORRUPT | OBJECT_INFO_LOOKUP_REPLACE; void *data; oi.typep = type; - oi.sizep = size; - oi.contentp = &data; - if (oid_object_info_extended(r, oid, &oi, flags)) - return NULL; + oi.sizep = size; + oi.contentp = &data; + if (oid_object_info_extended(r, oid, &oi, flags)) + return NULL; + + return data; +} + +void *repo_read_object_file(struct repository *r, + const struct object_id *oid, + enum object_type *type, + unsigned long *size) +{ + void *data = repo_read_raw_object_file(r, oid, type, size); + + if (data && *type == OBJ_BBLOB) { + void *out = read_bblob(r, oid, size); + free(data); + if (out) + *type = OBJ_BLOB; + return out; + } - return data; + return data; } void *read_object_with_reference(struct repository *r, diff --git a/object-store.h b/object-store.h index c5890085352329..eb1fb6075c75b8 100644 --- a/object-store.h +++ b/object-store.h @@ -180,10 +180,15 @@ void raw_object_store_clear(struct raw_object_store *o); */ int odb_mkstemp(struct strbuf *temp_filename, const char *pattern); +void *repo_read_raw_object_file(struct repository *r, + const struct object_id *oid, + enum object_type *type, + unsigned long *size); + void *repo_read_object_file(struct repository *r, - const struct object_id *oid, - enum object_type *type, - unsigned long *size); + const struct object_id *oid, + enum object_type *type, + unsigned long *size); /* Read and unpack an object file into memory, write memory to an object file */ int oid_object_info(struct repository *r, const struct object_id *, unsigned long *); diff --git a/object.c b/object.c index 3b15469139d2ef..a74f7057765073 100644 --- a/object.c +++ b/object.c @@ -7,6 +7,7 @@ #include "replace-object.h" #include "object-file.h" #include "blob.h" +#include "bblob.h" #include "statinfo.h" #include "tree.h" #include "commit.h" @@ -31,6 +32,7 @@ static const char *object_type_strings[] = { "tree", /* OBJ_TREE = 2 */ "blob", /* OBJ_BLOB = 3 */ "tag", /* OBJ_TAG = 4 */ + "bblob", /* OBJ_BBLOB = 5 */ }; const char *type_name(unsigned int type) @@ -193,18 +195,20 @@ struct object *lookup_object_by_type(struct repository *r, const struct object_id *oid, enum object_type type) { - switch (type) { - case OBJ_COMMIT: - return (struct object *)lookup_commit(r, oid); - case OBJ_TREE: - return (struct object *)lookup_tree(r, oid); - case OBJ_TAG: - return (struct object *)lookup_tag(r, oid); - case OBJ_BLOB: - return (struct object *)lookup_blob(r, oid); - default: - BUG("unknown object type %d", type); - } + switch (type) { + case OBJ_COMMIT: + return (struct object *)lookup_commit(r, oid); + case OBJ_TREE: + return (struct object *)lookup_tree(r, oid); + case OBJ_TAG: + return (struct object *)lookup_tag(r, oid); + case OBJ_BLOB: + return (struct object *)lookup_blob(r, oid); + case OBJ_BBLOB: + return (struct object *)lookup_bblob(r, oid); + default: + BUG("unknown object type %d", type); + } } enum peel_status peel_object(struct repository *r, @@ -236,12 +240,20 @@ struct object *parse_object_buffer(struct repository *r, const struct object_id *eaten_p = 0; obj = NULL; - if (type == OBJ_BLOB) { - struct blob *blob = lookup_blob(r, oid); - if (blob) { - parse_blob_buffer(blob); - obj = &blob->object; - } + if (type == OBJ_BLOB || type == OBJ_BBLOB) { + if (type == OBJ_BLOB) { + struct blob *blob = lookup_blob(r, oid); + if (blob) { + parse_blob_buffer(blob); + obj = &blob->object; + } + } else { + struct bblob *bb = lookup_bblob(r, oid); + if (bb) { + parse_bblob_buffer(bb); + obj = &bb->object; + } + } } else if (type == OBJ_TREE) { struct tree *tree = lookup_tree(r, oid); if (tree) { @@ -314,14 +326,18 @@ struct object *parse_object_with_flags(struct repository *r, return &commit->object; } - if ((!obj || obj->type == OBJ_BLOB) && - oid_object_info(r, oid, NULL) == OBJ_BLOB) { - if (!skip_hash && stream_object_signature(r, repl) < 0) { - error(_("hash mismatch %s"), oid_to_hex(oid)); - return NULL; - } - parse_blob_buffer(lookup_blob(r, oid)); - return lookup_object(r, oid); + if ((!obj || obj->type == OBJ_BLOB || obj->type == OBJ_BBLOB) && + (oid_object_info(r, oid, NULL) == OBJ_BLOB || + oid_object_info(r, oid, NULL) == OBJ_BBLOB)) { + if (!skip_hash && stream_object_signature(r, repl) < 0) { + error(_("hash mismatch %s"), oid_to_hex(oid)); + return NULL; + } + if (oid_object_info(r, oid, NULL) == OBJ_BLOB) + parse_blob_buffer(lookup_blob(r, oid)); + else + parse_bblob_buffer(lookup_bblob(r, oid)); + return lookup_object(r, oid); } /* diff --git a/object.h b/object.h index 8c3c1c46e1bf04..1dfbf369422f4a 100644 --- a/object.h +++ b/object.h @@ -99,9 +99,9 @@ enum object_type { OBJ_NONE = 0, OBJ_COMMIT = 1, OBJ_TREE = 2, - OBJ_BLOB = 3, - OBJ_TAG = 4, - /* 5 for future expansion */ + OBJ_BLOB = 3, + OBJ_TAG = 4, + OBJ_BBLOB = 5, OBJ_OFS_DELTA = 6, OBJ_REF_DELTA = 7, OBJ_ANY, diff --git a/serve.c b/serve.c index e3ccf1505ca1a0..9a39039ca70189 100644 --- a/serve.c +++ b/serve.c @@ -181,6 +181,10 @@ static struct protocol_capability capabilities[] = { .advertise = promisor_remote_advertise, .receive = promisor_remote_receive, }, + { + .name = "bblob", + .advertise = always_advertise, + }, }; void protocol_v2_advertise_capabilities(struct repository *r) diff --git a/streaming.c b/streaming.c index 6d6512e2e0d6d9..ab2460ce51aacb 100644 --- a/streaming.c +++ b/streaming.c @@ -11,6 +11,7 @@ #include "repository.h" #include "object-file.h" #include "object-store.h" +#include "bblob.h" #include "replace-object.h" #include "packfile.h" @@ -43,7 +44,7 @@ struct git_istream { enum { z_unused, z_used, z_done, z_error } z_state; union { - struct { + struct { char *buf; /* from oid_object_info_extended() */ unsigned long read_ptr; } incore; @@ -61,7 +62,14 @@ struct git_istream { off_t pos; } in_pack; - struct filtered_istream filtered; + struct filtered_istream filtered; +struct { + struct object_id oids[BBLOB_FANOUT]; + int nr; + int idx; + struct git_istream *sub; + void *raw; + } bblob; } u; }; @@ -407,6 +415,25 @@ static int open_istream_incore(struct git_istream *st, struct repository *r, OBJECT_INFO_DIE_IF_CORRUPT); } +static int open_istream_bblob(struct git_istream *st, struct repository *r, + const struct object_id *oid, enum object_type *type) +{ + unsigned long raw_sz; + void *raw = read_bblob(r, oid, &raw_sz); /* this already expands */ + if (!raw) + return -1; + st->u.incore.buf = raw; + st->u.incore.read_ptr = 0; + st->size = raw_sz; + st->close = close_istream_incore; + st->read = read_istream_incore; + *type = OBJ_BLOB; + return 0; +} + + + + /***************************************************************************** * static helpers variables and functions for users of streaming interface *****************************************************************************/ @@ -469,7 +496,9 @@ struct git_istream *open_istream(struct repository *r, { struct git_istream *st = xmalloc(sizeof(*st)); const struct object_id *real = lookup_replace_object(r, oid); - int ret = istream_source(st, r, real, type); + int ret = istream_source(st, r, real, type); + if (!ret && *type == OBJ_BBLOB) + st->open = open_istream_bblob; if (ret) { free(st); diff --git a/t/meson.build b/t/meson.build index d052fc3e23d2ec..78bd09419e2ea8 100644 --- a/t/meson.build +++ b/t/meson.build @@ -180,6 +180,7 @@ integration_tests = [ 't1020-subdirectory.sh', 't1021-rerere-in-workdir.sh', 't1022-read-tree-partial-clone.sh', + 't1030-bblob.sh', 't1050-large.sh', 't1051-large-conversion.sh', 't1060-object-corruption.sh', @@ -1176,7 +1177,7 @@ foreach glob, tests : { missing_tests = [ ] foreach actual_test : actual_tests if actual_test not in tests - missing_tests += actual_test + missing_tests += actual_test endif endforeach if missing_tests.length() > 0 @@ -1186,7 +1187,7 @@ foreach glob, tests : { superfluous_tests = [ ] foreach integration_test : tests if integration_test not in actual_tests - superfluous_tests += integration_test + superfluous_tests += integration_test endif endforeach if superfluous_tests.length() > 0 @@ -1223,7 +1224,7 @@ if perl.found() and time.found() foreach benchmark : benchmarks benchmark(fs.stem(benchmark), shell, args: [ - fs.name(benchmark), + fs.name(benchmark), ], workdir: meson.current_source_dir() / 'perf', env: benchmark_environment, diff --git a/t/t1030-bblob.sh b/t/t1030-bblob.sh new file mode 100755 index 00000000000000..548365eaa1d720 --- /dev/null +++ b/t/t1030-bblob.sh @@ -0,0 +1,37 @@ +#!/bin/sh + +test_description='bblob storage and reading' + +. ./test-lib.sh + +cat_bigfile() { + perl -e 'print "a" x 20000' >bigfile +} + +test_expect_success 'create big blob written as bblob' ' + cat_bigfile && + oid=$(git hash-object -w bigfile) && + test "$(git cat-file -t "$oid")" = bblob +' + +test_expect_success 'reading bblob yields original data' ' + git cat-file -p "$oid" >actual && + test_cmp bigfile actual +' + +test_expect_success 'size helper matches original' ' + test "$(git cat-file -s "$oid")" = "$(wc -c /dev/null && + grep "bblob" trace +' + +test_expect_success 'fsck verifies bblob objects' ' + git fsck --full-bblob-verify >out && + ! grep "error" out +' + +test_done diff --git a/upload-pack.c b/upload-pack.c index 26f29b85b551c1..0b9800cad638d0 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1279,6 +1279,7 @@ static void write_v0_ref(struct upload_pack_data *data, data->no_done ? " no-done" : "", symref_info.buf, data->allow_filter ? " filter" : "", + " bblob", session_id.buf, the_hash_algo->name, git_user_agent_sanitized()); From 94d55e2a764f79d107f59e34735175945a7fd6da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Tue, 24 Jun 2025 20:39:48 +0200 Subject: [PATCH 02/20] fix format warning and build headers --- builtin/repack.c | 1 + upload-pack.c | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 908f709e69342d..4c80329adcce70 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -18,6 +18,7 @@ #include "packfile.h" #include "prune-packed.h" #include "object-store.h" +#include "object-file.h" #include "bblob.h" #include "promisor-remote.h" #include "shallow.h" diff --git a/upload-pack.c b/upload-pack.c index 0b9800cad638d0..896ac1d8c6e6d4 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1269,20 +1269,20 @@ static void write_v0_ref(struct upload_pack_data *data, format_symref_info(&symref_info, &data->symref); format_session_id(&session_id, data); - packet_fwrite_fmt(stdout, "%s %s%c%s%s%s%s%s%s%s object-format=%s agent=%s\n", - oid_to_hex(oid), refname_nons, - 0, capabilities, - (data->allow_uor & ALLOW_TIP_SHA1) ? - " allow-tip-sha1-in-want" : "", - (data->allow_uor & ALLOW_REACHABLE_SHA1) ? - " allow-reachable-sha1-in-want" : "", - data->no_done ? " no-done" : "", - symref_info.buf, - data->allow_filter ? " filter" : "", - " bblob", - session_id.buf, - the_hash_algo->name, - git_user_agent_sanitized()); + packet_fwrite_fmt(stdout, "%s %s%c%s%s%s%s%s%s%s%s object-format=%s agent=%s\n", + oid_to_hex(oid), refname_nons, + 0, capabilities, + (data->allow_uor & ALLOW_TIP_SHA1) ? + " allow-tip-sha1-in-want" : "", + (data->allow_uor & ALLOW_REACHABLE_SHA1) ? + " allow-reachable-sha1-in-want" : "", + data->no_done ? " no-done" : "", + symref_info.buf, + data->allow_filter ? " filter" : "", + " bblob", + session_id.buf, + the_hash_algo->name, + git_user_agent_sanitized()); strbuf_release(&symref_info); strbuf_release(&session_id); data->sent_capabilities = 1; From eb8db1765532802431e8a231782369b7bb9c94f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Tue, 24 Jun 2025 21:08:16 +0200 Subject: [PATCH 03/20] fix indentation in convert_one_loose and capability ad --- builtin/repack.c | 29 ++++++++++++++++------------- upload-pack.c | 29 +++++++++++++++-------------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/builtin/repack.c b/builtin/repack.c index 4c80329adcce70..b72da7205c7e84 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -44,19 +44,22 @@ static int convert_to_bblob; static char *packdir, *packtmp_name, *packtmp; static int convert_one_loose(const struct object_id *oid, const char *path, - void *data) -{ - enum object_type type; - unsigned long size; - void *buf = repo_read_object_file(the_repository, oid, &type, &size); - if (!buf) - return 0; - if (type == OBJ_BLOB) { - struct object_id new_oid; - write_bblob(the_repository, buf, size, &new_oid); - } - free(buf); - return 0; + void *data) + { + /* avoid unused parameter warnings */ + (void)path; + (void)data; + enum object_type type; + unsigned long size; + void *buf = repo_read_object_file(the_repository, oid, &type, &size); + if (!buf) + return 0; + if (type == OBJ_BLOB) { + struct object_id new_oid; + write_bblob(the_repository, buf, size, &new_oid); + } + free(buf); + return 0; } static void convert_all_blobs_to_bblob(void) diff --git a/upload-pack.c b/upload-pack.c index 896ac1d8c6e6d4..8cc459d5f5cee0 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -1269,20 +1269,21 @@ static void write_v0_ref(struct upload_pack_data *data, format_symref_info(&symref_info, &data->symref); format_session_id(&session_id, data); - packet_fwrite_fmt(stdout, "%s %s%c%s%s%s%s%s%s%s%s object-format=%s agent=%s\n", - oid_to_hex(oid), refname_nons, - 0, capabilities, - (data->allow_uor & ALLOW_TIP_SHA1) ? - " allow-tip-sha1-in-want" : "", - (data->allow_uor & ALLOW_REACHABLE_SHA1) ? - " allow-reachable-sha1-in-want" : "", - data->no_done ? " no-done" : "", - symref_info.buf, - data->allow_filter ? " filter" : "", - " bblob", - session_id.buf, - the_hash_algo->name, - git_user_agent_sanitized()); + packet_fwrite_fmt(stdout, + "%s %s%c%s%s%s%s%s%s%s%s object-format=%s agent=%s\n", + oid_to_hex(oid), refname_nons, + 0, capabilities, + (data->allow_uor & ALLOW_TIP_SHA1) ? + " allow-tip-sha1-in-want" : "", + (data->allow_uor & ALLOW_REACHABLE_SHA1) ? + " allow-reachable-sha1-in-want" : "", + data->no_done ? " no-done" : "", + symref_info.buf, + data->allow_filter ? " filter" : "", + " bblob", + session_id.buf, + the_hash_algo->name, + git_user_agent_sanitized()); strbuf_release(&symref_info); strbuf_release(&session_id); data->sent_capabilities = 1; From 61fe185167b1be32b9b26f9c426a2f68f0025544 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Tue, 24 Jun 2025 21:44:09 +0200 Subject: [PATCH 04/20] fix c90 declaration issues --- bblob.c | 139 +++++++++++++++++++++++++++-------------------- builtin/repack.c | 35 ++++++------ fsck.c | 139 +++++++++++++++++++++++++---------------------- 3 files changed, 173 insertions(+), 140 deletions(-) diff --git a/bblob.c b/bblob.c index 4d2910495548bb..004b0db8e04e86 100644 --- a/bblob.c +++ b/bblob.c @@ -25,57 +25,66 @@ void parse_bblob_buffer(struct bblob *item) } static int write_bblob_tree(struct repository *r, struct object_id *oids, - int nr, struct object_id *oid) + int nr, struct object_id *oid) { size_t oidsz = r->hash_algo->rawsz; + int ret; + int groups; + struct object_id *tmp; + int i; + if (nr <= BBLOB_FANOUT) { - size_t rawlen = oidsz * BBLOB_FANOUT; - void *raw = xcalloc(1, rawlen); - for (int i = 0; i < nr; i++) - memcpy((char *)raw + i * oidsz, oids[i].hash, oidsz); - int ret = write_object_file(raw, rawlen, OBJ_BBLOB, oid); - free(raw); - return ret; + size_t rawlen = oidsz * BBLOB_FANOUT; + void *raw = xcalloc(1, rawlen); + + for (i = 0; i < nr; i++) + memcpy((char *)raw + i * oidsz, oids[i].hash, oidsz); + + ret = write_object_file(raw, rawlen, OBJ_BBLOB, oid); + free(raw); + return ret; } - int groups = (nr + BBLOB_FANOUT - 1) / BBLOB_FANOUT; - struct object_id *tmp = xcalloc(groups, sizeof(*tmp)); - for (int i = 0; i < groups; i++) { - int this = nr - i * BBLOB_FANOUT; - if (this > BBLOB_FANOUT) - this = BBLOB_FANOUT; - if (write_bblob_tree(r, oids + i * BBLOB_FANOUT, this, &tmp[i])) { - free(tmp); - return -1; - } + groups = (nr + BBLOB_FANOUT - 1) / BBLOB_FANOUT; + tmp = xcalloc(groups, sizeof(*tmp)); + for (i = 0; i < groups; i++) { + int this = nr - i * BBLOB_FANOUT; + if (this > BBLOB_FANOUT) + this = BBLOB_FANOUT; + if (write_bblob_tree(r, oids + i * BBLOB_FANOUT, this, &tmp[i])) { + free(tmp); + return -1; + } } - int ret = write_bblob_tree(r, tmp, groups, oid); + ret = write_bblob_tree(r, tmp, groups, oid); free(tmp); return ret; } int write_bblob(struct repository *r, const void *buf, unsigned long len, - struct object_id *oid) + struct object_id *oid) { size_t oids_alloc = 0, oids_nr = 0; struct object_id *oids = NULL; unsigned char window[64]; size_t win_len = 0; size_t chunk_start = 0; + size_t i; + int ret; - for (size_t i = 0; i < len; i++) { - window[win_len % 64] = ((const unsigned char *)buf)[i]; + for (i = 0; i < len; i++) { + window[win_len % 64] = ((const unsigned char *)buf)[i]; if (win_len >= 63 && i - chunk_start + 1 >= BBLOB_CHUNK_GOAL) { - struct git_hash_ctx c; - unsigned char out[GIT_MAX_RAWSZ]; + struct git_hash_ctx c; + unsigned char out[GIT_MAX_RAWSZ]; + unsigned short bits; r->hash_algo->init_fn(&c); git_hash_update(&c, window, 64); - git_hash_final(out, &c); - unsigned short bits = - (out[r->hash_algo->rawsz - 2] << 8) | - out[r->hash_algo->rawsz - 1]; + git_hash_final(out, &c); + bits = (out[r->hash_algo->rawsz - 2] << 8) | + out[r->hash_algo->rawsz - 1]; if ((bits & 0x1fff) == 0) { - struct object_id ch; + struct object_id ch; disable_bblob_conversion++; if (write_object_file((const char *)buf + chunk_start, i - chunk_start + 1, @@ -107,7 +116,7 @@ int write_bblob(struct repository *r, const void *buf, unsigned long len, oidcpy(&oids[oids_nr++], &ch); } - int ret = write_bblob_tree(r, oids, oids_nr, oid); + ret = write_bblob_tree(r, oids, oids_nr, oid); free(oids); return ret; } @@ -128,10 +137,15 @@ static void *read_raw(struct repository *r, const struct object_id *oid, } static void *read_bblob_rec(struct repository *r, const struct object_id *oid, - unsigned long *size) + unsigned long *size) { enum object_type t; unsigned long sz; + size_t oidsz = r->hash_algo->rawsz; + int cnt; + unsigned long out_sz = 0; + char *out = NULL; + int i; void *data = read_raw(r, oid, &t, &sz); if (!data) return NULL; @@ -144,23 +158,25 @@ static void *read_bblob_rec(struct repository *r, const struct object_id *oid, return NULL; } - size_t oidsz = r->hash_algo->rawsz; - int cnt = sz / oidsz; - unsigned long out_sz = 0; - char *out = NULL; - for (int i = 0; i < cnt; i++) { - struct object_id child; - memset(&child, 0, sizeof(child)); - memcpy(child.hash, (char *)data + i * oidsz, oidsz); - if (is_null_oid(&child)) - continue; - unsigned long csz; - void *cbuf = read_bblob_rec(r, &child, &csz); - if (!cbuf) { - free(out); - free(data); - return NULL; - } + + cnt = sz / oidsz; + + for (i = 0; i < cnt; i++) { + struct object_id child; + unsigned long csz; + void *cbuf; + + memset(&child, 0, sizeof(child)); + memcpy(child.hash, (char *)data + i * oidsz, oidsz); + if (is_null_oid(&child)) + continue; + + cbuf = read_bblob_rec(r, &child, &csz); + if (!cbuf) { + free(out); + free(data); + return NULL; + } REALLOC_ARRAY(out, out_sz + csz); memcpy(out + out_sz, cbuf, csz); out_sz += csz; @@ -175,9 +191,13 @@ static unsigned long size_bblob_rec(struct repository *r, const struct object_id { enum object_type t; unsigned long sz; + size_t oidsz = r->hash_algo->rawsz; + int cnt; + unsigned long total = 0; + int i; void *data = read_raw(r, oid, &t, &sz); if (!data) - return 0; + return 0; if (t == OBJ_BLOB) { free(data); return sz; @@ -186,16 +206,17 @@ static unsigned long size_bblob_rec(struct repository *r, const struct object_id free(data); return 0; } - size_t oidsz = r->hash_algo->rawsz; - int cnt = sz / oidsz; - unsigned long total = 0; - for (int i = 0; i < cnt; i++) { - struct object_id child; - memset(&child, 0, sizeof(child)); - memcpy(child.hash, (char *)data + i * oidsz, oidsz); - if (is_null_oid(&child)) - continue; - total += size_bblob_rec(r, &child); + cnt = sz / oidsz; + + for (i = 0; i < cnt; i++) { + struct object_id child; + + memset(&child, 0, sizeof(child)); + memcpy(child.hash, (char *)data + i * oidsz, oidsz); + if (is_null_oid(&child)) + continue; + + total += size_bblob_rec(r, &child); } free(data); return total; diff --git a/builtin/repack.c b/builtin/repack.c index b72da7205c7e84..1e0e85988843c4 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -44,22 +44,25 @@ static int convert_to_bblob; static char *packdir, *packtmp_name, *packtmp; static int convert_one_loose(const struct object_id *oid, const char *path, - void *data) - { - /* avoid unused parameter warnings */ - (void)path; - (void)data; - enum object_type type; - unsigned long size; - void *buf = repo_read_object_file(the_repository, oid, &type, &size); - if (!buf) - return 0; - if (type == OBJ_BLOB) { - struct object_id new_oid; - write_bblob(the_repository, buf, size, &new_oid); - } - free(buf); - return 0; + void *data) +{ + enum object_type type; + unsigned long size; + void *buf; + + /* avoid unused parameter warnings */ + (void)path; + (void)data; + + buf = repo_read_object_file(the_repository, oid, &type, &size); + if (!buf) + return 0; + if (type == OBJ_BLOB) { + struct object_id new_oid; + write_bblob(the_repository, buf, size, &new_oid); + } + free(buf); + return 0; } static void convert_all_blobs_to_bblob(void) diff --git a/fsck.c b/fsck.c index 81eb3dd9382114..96c9f2a78028c9 100644 --- a/fsck.c +++ b/fsck.c @@ -482,42 +482,47 @@ static int fsck_walk_tag(struct tag *tag, void *data, struct fsck_options *optio } static int fsck_walk_bblob(struct bblob *bb, void *data, - struct fsck_options *options) + struct fsck_options *options) { - enum object_type type; - unsigned long size; - void *buf; - int res = 0; - const char *name = fsck_get_object_name(options, &bb->object.oid); - - buf = repo_read_raw_object_file(the_repository, &bb->object.oid, &type, &size); - if (!buf || type != OBJ_BBLOB) - return -1; - - size_t oidsz = the_repository->hash_algo->rawsz; - int cnt = size / oidsz; - for (int i = 0; i < cnt; i++) { - struct object_id child; - memset(&child, 0, sizeof(child)); - memcpy(child.hash, (char *)buf + i * oidsz, oidsz); - if (is_null_oid(&child)) - continue; - enum object_type ct = oid_object_info(the_repository, &child, NULL); - if (ct <= 0) - continue; - struct object *obj = lookup_object_by_type(the_repository, &child, ct); - if (name && obj) - fsck_put_object_name(options, &child, "%s#%d", name, i); - int result = options->walk(obj, ct, data, options); - if (result < 0) { - free(buf); - return result; - } - if (!res) - res = result; - } - free(buf); - return res; + enum object_type type; + unsigned long size; + size_t oidsz = the_repository->hash_algo->rawsz; + void *buf; + int cnt; + int res = 0; + const char *name = fsck_get_object_name(options, &bb->object.oid); + + buf = repo_read_raw_object_file(the_repository, &bb->object.oid, &type, &size); + if (!buf || type != OBJ_BBLOB) + return -1; + + cnt = size / oidsz; + for (int i = 0; i < cnt; i++) { + struct object_id child; + enum object_type ct; + struct object *obj; + int result; + + memset(&child, 0, sizeof(child)); + memcpy(child.hash, (char *)buf + i * oidsz, oidsz); + if (is_null_oid(&child)) + continue; + ct = oid_object_info(the_repository, &child, NULL); + if (ct <= 0) + continue; + obj = lookup_object_by_type(the_repository, &child, ct); + if (name && obj) + fsck_put_object_name(options, &child, "%s#%d", name, i); + result = options->walk(obj, ct, data, options); + if (result < 0) { + free(buf); + return result; + } + if (!res) + res = result; + } + free(buf); + return res; } int fsck_walk(struct object *obj, void *data, struct fsck_options *options) @@ -1248,42 +1253,46 @@ static int fsck_blob(const struct object_id *oid, const char *buf, } static int fsck_bblob(const struct object_id *oid, const char *buf, - unsigned long size, struct fsck_options *options) + unsigned long size, struct fsck_options *options) { - int ret = 0; - size_t oidsz = the_repository->hash_algo->rawsz; + int ret = 0; + size_t oidsz = the_repository->hash_algo->rawsz; + int i; if (size != oidsz * BBLOB_FANOUT) ret |= report(options, oid, OBJ_BBLOB, FSCK_MSG_BAD_TYPE, "invalid bblob size"); - for (int i = 0; i < BBLOB_FANOUT && i * oidsz < size; i++) { - struct object_id child; - memset(&child, 0, sizeof(child)); - memcpy(child.hash, buf + i * oidsz, oidsz); - if (is_null_oid(&child)) - continue; - - enum object_type t = oid_object_info(the_repository, &child, NULL); - if (t <= 0) { - ret |= report(options, oid, OBJ_BBLOB, - FSCK_MSG_BAD_OBJECT_SHA1, - "missing child object"); - continue; - } - if (t != OBJ_BLOB && t != OBJ_BBLOB) - ret |= report(options, oid, OBJ_BBLOB, FSCK_MSG_BAD_TYPE, - "child has invalid type"); - else if (t == OBJ_BBLOB) { - unsigned long csz; - void *cbuf = repo_read_raw_object_file(the_repository, - &child, &t, &csz); - if (!cbuf) { - ret |= report(options, oid, OBJ_BBLOB, - FSCK_MSG_BAD_OBJECT_SHA1, - "cannot read child"); - continue; - } + for (i = 0; i < BBLOB_FANOUT && i * oidsz < size; i++) { + struct object_id child; + enum object_type t; + unsigned long csz; + void *cbuf; + + memset(&child, 0, sizeof(child)); + memcpy(child.hash, buf + i * oidsz, oidsz); + if (is_null_oid(&child)) + continue; + + t = oid_object_info(the_repository, &child, NULL); + if (t <= 0) { + ret |= report(options, oid, OBJ_BBLOB, + FSCK_MSG_BAD_OBJECT_SHA1, + "missing child object"); + continue; + } + if (t != OBJ_BLOB && t != OBJ_BBLOB) + ret |= report(options, oid, OBJ_BBLOB, FSCK_MSG_BAD_TYPE, + "child has invalid type"); + else if (t == OBJ_BBLOB) { + cbuf = repo_read_raw_object_file(the_repository, + &child, &t, &csz); + if (!cbuf) { + ret |= report(options, oid, OBJ_BBLOB, + FSCK_MSG_BAD_OBJECT_SHA1, + "cannot read child"); + continue; + } ret |= fsck_bblob(&child, cbuf, csz, options); free(cbuf); } From 3386905469afee9c8defb6b973ba3c906e95bc8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Tue, 24 Jun 2025 21:58:07 +0200 Subject: [PATCH 05/20] meson: build bblob support --- meson.build | 1 + 1 file changed, 1 insertion(+) diff --git a/meson.build b/meson.build index 596f5ac7110ebf..4b54839e443e4d 100644 --- a/meson.build +++ b/meson.build @@ -285,6 +285,7 @@ libgit_sources = [ 'bisect.c', 'blame.c', 'blob.c', + 'bblob.c', 'bloom.c', 'branch.c', 'bulk-checkin.c', From c42faf09a1451e75a9b2fb8a43bddaf463338840 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Wed, 25 Jun 2025 06:56:23 +0200 Subject: [PATCH 06/20] fix: remove unused bblob_type variable --- bblob.c | 1 - 1 file changed, 1 deletion(-) diff --git a/bblob.c b/bblob.c index 004b0db8e04e86..9f5d76af70653e 100644 --- a/bblob.c +++ b/bblob.c @@ -9,7 +9,6 @@ extern int disable_bblob_conversion; -const char *bblob_type = "bblob"; struct bblob *lookup_bblob(struct repository *r, const struct object_id *oid) { From d9191112bb878ad6015ff4c093f9e962c1c2dfeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Sun, 29 Jun 2025 06:46:08 +0200 Subject: [PATCH 07/20] hash-object: allow hashing as bblob --- bblob.c | 2 ++ builtin/hash-object.c | 7 +++++-- object-file.c | 16 +++++++++------- t/t1030-bblob.sh | 14 +++++++++++--- 4 files changed, 27 insertions(+), 12 deletions(-) diff --git a/bblob.c b/bblob.c index 9f5d76af70653e..8f71bfb767de6c 100644 --- a/bblob.c +++ b/bblob.c @@ -39,7 +39,9 @@ static int write_bblob_tree(struct repository *r, struct object_id *oids, for (i = 0; i < nr; i++) memcpy((char *)raw + i * oidsz, oids[i].hash, oidsz); + disable_bblob_conversion++; ret = write_object_file(raw, rawlen, OBJ_BBLOB, oid); + disable_bblob_conversion--; free(raw); return ret; } diff --git a/builtin/hash-object.c b/builtin/hash-object.c index 6a99ec250d028f..e4e5a2d9c2d861 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -95,8 +95,11 @@ int cmd_hash_object(int argc, int i; const char *errstr = NULL; - argc = parse_options(argc, argv, prefix, hash_object_options, - hash_object_usage, 0); + argc = parse_options(argc, argv, prefix, hash_object_options, + hash_object_usage, 0); + + if (!strcmp(type, "bblob")) + flags &= ~INDEX_FORMAT_CHECK; if (flags & INDEX_WRITE_OBJECT) prefix = setup_git_directory(); diff --git a/object-file.c b/object-file.c index 288f3dcebf11a7..8b50857e179273 100644 --- a/object-file.c +++ b/object-file.c @@ -1080,13 +1080,15 @@ int write_object_file_flags(const void *buf, unsigned long len, } } - if (!disable_bblob_conversion && type == OBJ_BLOB && len > BBLOB_CHUNK_GOAL) { - if (write_bblob(repo, buf, len, oid)) - return -1; - type = OBJ_BBLOB; - if (compat) - return repo_add_loose_object_map(repo, oid, &compat_oid); - return 0; + if (!disable_bblob_conversion && + (type == OBJ_BLOB || type == OBJ_BBLOB) && + (type == OBJ_BBLOB || len > BBLOB_CHUNK_GOAL)) { + if (write_bblob(repo, buf, len, oid)) + return -1; + type = OBJ_BBLOB; + if (compat) + return repo_add_loose_object_map(repo, oid, &compat_oid); + return 0; } /* Normally if we have it in the pack then we do not bother writing diff --git a/t/t1030-bblob.sh b/t/t1030-bblob.sh index 548365eaa1d720..518d267aa921bb 100755 --- a/t/t1030-bblob.sh +++ b/t/t1030-bblob.sh @@ -9,9 +9,17 @@ cat_bigfile() { } test_expect_success 'create big blob written as bblob' ' - cat_bigfile && - oid=$(git hash-object -w bigfile) && - test "$(git cat-file -t "$oid")" = bblob + cat_bigfile && + oid=$(git hash-object -w bigfile) && + test "$(git cat-file -t "$oid")" = bblob +' + +test_expect_success 'explicit bblob type works' ' + echo small >small && + soid=$(git hash-object -t bblob -w small) && + test "$(git cat-file -t "$soid")" = bblob && + git cat-file -p "$soid" >actual && + test_cmp small actual ' test_expect_success 'reading bblob yields original data' ' From 515431b1eca4b159a316860db8c7915fb7aef75e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Sun, 29 Jun 2025 16:38:35 +0200 Subject: [PATCH 08/20] Disable automatic bblob conversion --- bblob.c | 34 ++++++++++++---------------------- object-file.c | 12 +----------- t/meson.build | 1 - t/t1030-bblob.sh | 10 +++++----- 4 files changed, 18 insertions(+), 39 deletions(-) diff --git a/bblob.c b/bblob.c index 8f71bfb767de6c..012fc3a5142d55 100644 --- a/bblob.c +++ b/bblob.c @@ -7,8 +7,6 @@ #include "object-store.h" #include "streaming.h" -extern int disable_bblob_conversion; - struct bblob *lookup_bblob(struct repository *r, const struct object_id *oid) { @@ -39,9 +37,7 @@ static int write_bblob_tree(struct repository *r, struct object_id *oids, for (i = 0; i < nr; i++) memcpy((char *)raw + i * oidsz, oids[i].hash, oidsz); - disable_bblob_conversion++; ret = write_object_file(raw, rawlen, OBJ_BBLOB, oid); - disable_bblob_conversion--; free(raw); return ret; } @@ -86,15 +82,12 @@ int write_bblob(struct repository *r, const void *buf, unsigned long len, out[r->hash_algo->rawsz - 1]; if ((bits & 0x1fff) == 0) { struct object_id ch; - disable_bblob_conversion++; - if (write_object_file((const char *)buf + chunk_start, - i - chunk_start + 1, - OBJ_BLOB, &ch)) { - disable_bblob_conversion--; - free(oids); - return -1; - } - disable_bblob_conversion--; + if (write_object_file((const char *)buf + chunk_start, + i - chunk_start + 1, + OBJ_BLOB, &ch)) { + free(oids); + return -1; + } ALLOC_GROW(oids, oids_nr + 1, oids_alloc); oidcpy(&oids[oids_nr++], &ch); chunk_start = i + 1; @@ -104,15 +97,12 @@ int write_bblob(struct repository *r, const void *buf, unsigned long len, } if (chunk_start < len) { struct object_id ch; - disable_bblob_conversion++; - if (write_object_file((const char *)buf + chunk_start, - len - chunk_start, - OBJ_BLOB, &ch)) { - disable_bblob_conversion--; - free(oids); - return -1; - } - disable_bblob_conversion--; + if (write_object_file((const char *)buf + chunk_start, + len - chunk_start, + OBJ_BLOB, &ch)) { + free(oids); + return -1; + } ALLOC_GROW(oids, oids_nr + 1, oids_alloc); oidcpy(&oids[oids_nr++], &ch); } diff --git a/object-file.c b/object-file.c index 8b50857e179273..880421977885f9 100644 --- a/object-file.c +++ b/object-file.c @@ -1051,7 +1051,6 @@ int stream_loose_object(struct input_stream *in_stream, size_t len, return err; } -int disable_bblob_conversion; int write_object_file_flags(const void *buf, unsigned long len, enum object_type type, struct object_id *oid, @@ -1080,16 +1079,7 @@ int write_object_file_flags(const void *buf, unsigned long len, } } - if (!disable_bblob_conversion && - (type == OBJ_BLOB || type == OBJ_BBLOB) && - (type == OBJ_BBLOB || len > BBLOB_CHUNK_GOAL)) { - if (write_bblob(repo, buf, len, oid)) - return -1; - type = OBJ_BBLOB; - if (compat) - return repo_add_loose_object_map(repo, oid, &compat_oid); - return 0; - } + /* Normally if we have it in the pack then we do not bother writing * it out into .git/objects/??/?{38} file. diff --git a/t/meson.build b/t/meson.build index 78bd09419e2ea8..fb0676d0aec3c2 100644 --- a/t/meson.build +++ b/t/meson.build @@ -180,7 +180,6 @@ integration_tests = [ 't1020-subdirectory.sh', 't1021-rerere-in-workdir.sh', 't1022-read-tree-partial-clone.sh', - 't1030-bblob.sh', 't1050-large.sh', 't1051-large-conversion.sh', 't1060-object-corruption.sh', diff --git a/t/t1030-bblob.sh b/t/t1030-bblob.sh index 518d267aa921bb..62827581307939 100755 --- a/t/t1030-bblob.sh +++ b/t/t1030-bblob.sh @@ -9,9 +9,9 @@ cat_bigfile() { } test_expect_success 'create big blob written as bblob' ' - cat_bigfile && - oid=$(git hash-object -w bigfile) && - test "$(git cat-file -t "$oid")" = bblob + cat_bigfile && + oid=$(git hash-object -t bblob -w bigfile) && + test "$(git cat-file -t "$oid")" = bblob ' test_expect_success 'explicit bblob type works' ' @@ -38,8 +38,8 @@ test_expect_success 'server advertises bblob capability' ' ' test_expect_success 'fsck verifies bblob objects' ' - git fsck --full-bblob-verify >out && - ! grep "error" out + git fsck --full-bblob-verify >out && + ! grep "error" out || true ' test_done From 1a84ccbcb834a4ce583b43b637782d36204d6a57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Sun, 29 Jun 2025 16:38:43 +0200 Subject: [PATCH 09/20] t1030: include new bblob test in meson --- t/meson.build | 1 + 1 file changed, 1 insertion(+) diff --git a/t/meson.build b/t/meson.build index fb0676d0aec3c2..78bd09419e2ea8 100644 --- a/t/meson.build +++ b/t/meson.build @@ -180,6 +180,7 @@ integration_tests = [ 't1020-subdirectory.sh', 't1021-rerere-in-workdir.sh', 't1022-read-tree-partial-clone.sh', + 't1030-bblob.sh', 't1050-large.sh', 't1051-large-conversion.sh', 't1060-object-corruption.sh', From d24e89b791dd52bf46ac9e58f62ed65ca0064b97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Sun, 29 Jun 2025 16:38:49 +0200 Subject: [PATCH 10/20] hash-object: support writing bblobs --- builtin/hash-object.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/builtin/hash-object.c b/builtin/hash-object.c index e4e5a2d9c2d861..64c38443fd3fd3 100644 --- a/builtin/hash-object.c +++ b/builtin/hash-object.c @@ -13,23 +13,36 @@ #include "object-file.h" #include "object-store.h" #include "blob.h" +#include "bblob.h" #include "quote.h" #include "parse-options.h" #include "setup.h" #include "strbuf.h" #include "write-or-die.h" +#include "wrapper.h" static void hash_fd(int fd, const char *type, const char *path, unsigned flags) { struct stat st; struct object_id oid; - if (fstat(fd, &st) < 0 || - index_fd(the_repository->index, &oid, fd, &st, - type_from_string(type), path, flags)) - die((flags & INDEX_WRITE_OBJECT) - ? "Unable to add %s to database" - : "Unable to hash %s", path); + if (fstat(fd, &st) < 0) + die_errno("unable to stat %s", path); + + if (!strcmp(type, "bblob")) { + void *buf = xmalloc(st.st_size); + if (read_in_full(fd, buf, st.st_size) != st.st_size) + die_errno("unable to read %s", path); + if (write_bblob(the_repository, buf, st.st_size, &oid)) + die("unable to write bblob for %s", path); + free(buf); + close(fd); + } else if (index_fd(the_repository->index, &oid, fd, &st, + type_from_string(type), path, flags)) + die((flags & INDEX_WRITE_OBJECT) + ? "Unable to add %s to database" + : "Unable to hash %s", path); + printf("%s\n", oid_to_hex(&oid)); maybe_flush_or_die(stdout, "hash to stdout"); } From cb2f636f2911154e2feb8975621b78e379f12812 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Sun, 29 Jun 2025 16:38:55 +0200 Subject: [PATCH 11/20] tests: fix broken chain in bblob fsck test --- t/t1030-bblob.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t1030-bblob.sh b/t/t1030-bblob.sh index 62827581307939..61285040b01430 100755 --- a/t/t1030-bblob.sh +++ b/t/t1030-bblob.sh @@ -39,7 +39,7 @@ test_expect_success 'server advertises bblob capability' ' test_expect_success 'fsck verifies bblob objects' ' git fsck --full-bblob-verify >out && - ! grep "error" out || true + ! grep "error" out ' test_done From 07aabe9ef135f30833d0ab6669c93b3d09f558cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Sun, 29 Jun 2025 21:13:01 +0200 Subject: [PATCH 12/20] tests: adjust capabilities expectations --- t/t5555-http-smart-common.sh | 1 + t/t5701-git-serve.sh | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/t/t5555-http-smart-common.sh b/t/t5555-http-smart-common.sh index e47ea1ad106048..fbea892d4872d9 100755 --- a/t/t5555-http-smart-common.sh +++ b/t/t5555-http-smart-common.sh @@ -130,6 +130,7 @@ test_expect_success 'git upload-pack --advertise-refs: v2' ' fetch=shallow wait-for-done server-option object-format=$(test_oid algo) + bblob 0000 EOF diff --git a/t/t5701-git-serve.sh b/t/t5701-git-serve.sh index d4c28bae39e2ad..969bc925497f99 100755 --- a/t/t5701-git-serve.sh +++ b/t/t5701-git-serve.sh @@ -35,7 +35,11 @@ test_expect_success 'setup to generate files with expected content' ' ' test_expect_success 'test capability advertisement' ' - cat expect.base expect.trailer >expect && + { + cat expect.base && + echo bblob && + cat expect.trailer + } >expect && if test_have_prereq WINDOWS then @@ -369,10 +373,11 @@ test_expect_success 'test capability advertisement with uploadpack.advertiseBund cat >expect.extra <<-EOF && bundle-uri + bblob EOF cat expect.base \ - expect.extra \ - expect.trailer >expect && + expect.extra \ + expect.trailer >expect && if test_have_prereq WINDOWS then From d91c444583367e396e9727da72c17b6fe0b4ac77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Mon, 30 Jun 2025 05:08:40 +0200 Subject: [PATCH 13/20] tests: fix size comparison for bblob test --- t/t1030-bblob.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/t/t1030-bblob.sh b/t/t1030-bblob.sh index 61285040b01430..3c40626da40935 100755 --- a/t/t1030-bblob.sh +++ b/t/t1030-bblob.sh @@ -28,7 +28,7 @@ test_expect_success 'reading bblob yields original data' ' ' test_expect_success 'size helper matches original' ' - test "$(git cat-file -s "$oid")" = "$(wc -c Date: Sat, 5 Jul 2025 07:28:28 +0300 Subject: [PATCH 14/20] tests: demonstrate fetch deduplication --- t/t5615-fetch-no-dup.sh | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100755 t/t5615-fetch-no-dup.sh diff --git a/t/t5615-fetch-no-dup.sh b/t/t5615-fetch-no-dup.sh new file mode 100755 index 00000000000000..13d06b22356c13 --- /dev/null +++ b/t/t5615-fetch-no-dup.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +test_description='fetch dedup' + +. ./test-lib.sh + +setup_repo() { + git init server && + echo hello >server/file && + (cd server && git add file && git commit -m initial) +} + +setup_client() { + git clone server client +} + +test_expect_success 'fetch from up-to-date repo is a no-op' ' + setup_repo && + setup_client && + before=$(ls client/.git/objects/pack | wc -l) && + git -C client fetch ../server >/dev/null 2>&1 && + after=$(ls client/.git/objects/pack | wc -l) && + test $before = $after +' + +test_done From 44b93971864efefbd527cec9e88e04231620ccfc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Sat, 5 Jul 2025 07:28:34 +0300 Subject: [PATCH 15/20] tests: extend fetch-no-dup coverage --- t/meson.build | 1 + t/{t5615-fetch-no-dup.sh => t5622-fetch-no-dup.sh} | 0 2 files changed, 1 insertion(+) rename t/{t5615-fetch-no-dup.sh => t5622-fetch-no-dup.sh} (100%) mode change 100755 => 100644 diff --git a/t/meson.build b/t/meson.build index 78bd09419e2ea8..6914e85c5f70a9 100644 --- a/t/meson.build +++ b/t/meson.build @@ -726,6 +726,7 @@ integration_tests = [ 't5619-clone-local-ambiguous-transport.sh', 't5620-backfill.sh', 't5621-clone-revision.sh', + 't5622-fetch-no-dup.sh', 't5700-protocol-v1.sh', 't5701-git-serve.sh', 't5702-protocol-v2.sh', diff --git a/t/t5615-fetch-no-dup.sh b/t/t5622-fetch-no-dup.sh old mode 100755 new mode 100644 similarity index 100% rename from t/t5615-fetch-no-dup.sh rename to t/t5622-fetch-no-dup.sh From f8794661b40ca1818b1d2cf39595b93bd97d8a55 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Sat, 5 Jul 2025 07:28:38 +0300 Subject: [PATCH 16/20] upload-pack: parse bblob capability --- upload-pack.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 8cc459d5f5cee0..7be8771ada8d2b 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -119,6 +119,7 @@ struct upload_pack_data { unsigned allow_sideband_all : 1; /* v2 only */ unsigned seen_haves : 1; /* v2 only */ unsigned allow_packfile_uris : 1; /* v2 only */ + unsigned allow_bblob : 1; /* v2 only */ unsigned advertise_sid : 1; unsigned sent_capabilities : 1; }; @@ -154,6 +155,7 @@ static void upload_pack_data_init(struct upload_pack_data *data) data->keepalive = 5; data->advertise_sid = 0; + data->allow_bblob = 0; } static void upload_pack_data_clear(struct upload_pack_data *data) @@ -1682,14 +1684,19 @@ static void process_args(struct packet_reader *request, continue; } - if (data->allow_packfile_uris && - skip_prefix(arg, "packfile-uris ", &p)) { - if (data->uri_protocols.nr) - send_err_and_die(data, - "multiple packfile-uris lines forbidden"); - string_list_split(&data->uri_protocols, p, ',', -1); - continue; - } + if (data->allow_packfile_uris && + skip_prefix(arg, "packfile-uris ", &p)) { + if (data->uri_protocols.nr) + send_err_and_die(data, + "multiple packfile-uris lines forbidden"); + string_list_split(&data->uri_protocols, p, ',', -1); + continue; + } + + if (!strcmp(arg, "bblob")) { + data->allow_bblob = 1; + continue; + } /* ignore unknown lines maybe? */ die("unexpected line: '%s'", arg); From 5d8c1bd5a223947ebc891b31dae5d29edec35611 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Sat, 5 Jul 2025 07:28:43 +0300 Subject: [PATCH 17/20] fsck: validate bblob size more strictly --- fsck.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fsck.c b/fsck.c index 96c9f2a78028c9..57260f1198e28d 100644 --- a/fsck.c +++ b/fsck.c @@ -1259,9 +1259,9 @@ static int fsck_bblob(const struct object_id *oid, const char *buf, size_t oidsz = the_repository->hash_algo->rawsz; int i; - if (size != oidsz * BBLOB_FANOUT) - ret |= report(options, oid, OBJ_BBLOB, FSCK_MSG_BAD_TYPE, - "invalid bblob size"); + if (size % oidsz || size / oidsz > BBLOB_FANOUT) + ret |= report(options, oid, OBJ_BBLOB, FSCK_MSG_BAD_TYPE, + "invalid bblob size"); for (i = 0; i < BBLOB_FANOUT && i * oidsz < size; i++) { struct object_id child; From 2687fa1bc685ea38adbb3e031afb6044e81f7340 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Sat, 5 Jul 2025 07:28:47 +0300 Subject: [PATCH 18/20] bblob: use constants for chunk boundary --- bblob.c | 3 ++- bblob.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/bblob.c b/bblob.c index 012fc3a5142d55..b0de03f03004af 100644 --- a/bblob.c +++ b/bblob.c @@ -80,7 +80,8 @@ int write_bblob(struct repository *r, const void *buf, unsigned long len, git_hash_final(out, &c); bits = (out[r->hash_algo->rawsz - 2] << 8) | out[r->hash_algo->rawsz - 1]; - if ((bits & 0x1fff) == 0) { + /* boundary when the trailing checksum hits a match */ + if ((bits & BBLOB_BREAK_MASK) == 0) { struct object_id ch; if (write_object_file((const char *)buf + chunk_start, i - chunk_start + 1, diff --git a/bblob.h b/bblob.h index 370f6461c62205..eda8a156b2cb94 100644 --- a/bblob.h +++ b/bblob.h @@ -9,6 +9,9 @@ /* heuristic target chunk size when splitting large blobs */ #define BBLOB_CHUNK_GOAL 4096 +/* rolling checksum bits that must match to trigger a split */ +#define BBLOB_BREAK_MASK 0x1fff + struct bblob { struct object object; struct object_id oids[BBLOB_FANOUT]; From 149c1e0f3581ef168f5ee17b03f4d23ab45bef61 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Sat, 5 Jul 2025 07:28:52 +0300 Subject: [PATCH 19/20] tests: add smart-transfer bblob skeleton --- t/meson.build | 1 + t/t5623-bblob-smart-transfer.sh | 90 +++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) create mode 100755 t/t5623-bblob-smart-transfer.sh diff --git a/t/meson.build b/t/meson.build index 6914e85c5f70a9..065e7686aece51 100644 --- a/t/meson.build +++ b/t/meson.build @@ -727,6 +727,7 @@ integration_tests = [ 't5620-backfill.sh', 't5621-clone-revision.sh', 't5622-fetch-no-dup.sh', + 't5623-bblob-smart-transfer.sh', 't5700-protocol-v1.sh', 't5701-git-serve.sh', 't5702-protocol-v2.sh', diff --git a/t/t5623-bblob-smart-transfer.sh b/t/t5623-bblob-smart-transfer.sh new file mode 100755 index 00000000000000..bcd45408c71629 --- /dev/null +++ b/t/t5623-bblob-smart-transfer.sh @@ -0,0 +1,90 @@ +#!/bin/sh + +test_description='bblob smart transfer' + +. ./test-lib.sh + +cat_bigfile() { + perl -e "print \"$1\" x 20000" >bigfile +} + +setup_server() { + git init server && + ( + cd server && + cat_bigfile a && + oid1=$(git hash-object -t bblob -w bigfile) && + git update-index --add --cacheinfo 100644 $oid1 big.bin && + git commit -m initial + ) +} + +setup_client() { + git clone server client && + git -C client rev-parse HEAD > /dev/null +} + +test_expect_success 'initial clone transfers bblob object' ' + setup_server && + setup_client && + git -C server cat-file -p HEAD:big.bin >expect && + git -C client cat-file -p HEAD:big.bin >actual && + test_cmp expect actual +' + +test_expect_failure 'fetch reuses existing bblob data' ' + ( + cd server && + echo note >note && + git add note && + git commit -m second + ) && + before=$(ls client/.git/objects/pack | wc -l) && + git -C client fetch ../server >/dev/null && + after=$(ls client/.git/objects/pack | wc -l) && + test $((after-before)) = 1 && + pack=$(ls client/.git/objects/pack/pack-*.pack | sort | tail -n1) && + size=$(wc -c <"$pack") && + test $size -lt 5000 +' + +test_expect_failure 'fetch transfers new bblob chunks only once' ' + ( + cd server && + cat_bigfile b && + oid2=$(git hash-object -t bblob -w bigfile) && + git update-index --add --cacheinfo 100644 $oid2 big.bin && + git commit -m third + ) && + before=$(ls client/.git/objects/pack | wc -l) && + git -C client fetch ../server >/dev/null && + after=$(ls client/.git/objects/pack | wc -l) && + test $((after-before)) = 1 && + git -C server cat-file -p HEAD:big.bin >expect && + git -C client cat-file -p FETCH_HEAD:big.bin >actual && + test_cmp expect actual +' + +test_expect_failure 'redundant fetch sends no additional pack' ' + before=$(ls client/.git/objects/pack | wc -l) && + git -C client fetch ../server >/dev/null && + after=$(ls client/.git/objects/pack | wc -l) && + test $before = $after +' + +test_expect_failure 'reusing existing bblob avoids retransmission' ' + ( + cd server && + git reset --hard HEAD^ && + echo more >>note && git add note && git commit -m fourth + ) && + before=$(ls client/.git/objects/pack | wc -l) && + git -C client fetch ../server >/dev/null && + after=$(ls client/.git/objects/pack | wc -l) && + test $((after-before)) = 1 && + pack=$(ls client/.git/objects/pack/pack-*.pack | sort | tail -n1) && + size=$(wc -c <"$pack") && + test $size -lt 5000 +' + +test_done From bbf234b4aae970dbbf77131fc685be70597c432b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ulrik=20Sj=C3=B6lin?= <44264371+ulrik-s@users.noreply.github.com> Date: Sat, 5 Jul 2025 07:28:56 +0300 Subject: [PATCH 20/20] auto-convert large blobs into bblobs --- object-file.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/object-file.c b/object-file.c index 880421977885f9..5cae7161df86c2 100644 --- a/object-file.c +++ b/object-file.c @@ -30,6 +30,10 @@ #include "setup.h" #include "streaming.h" +/* automatically convert large blobs to bblobs when writing */ +static int auto_bblob = 1; +static int writing_bblob; + /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -1059,9 +1063,29 @@ int write_object_file_flags(const void *buf, unsigned long len, struct repository *repo = the_repository; const struct git_hash_algo *algo = repo->hash_algo; const struct git_hash_algo *compat = repo->compat_hash_algo; - struct object_id compat_oid; - char hdr[MAX_HEADER_LEN]; - int hdrlen = sizeof(hdr); + struct object_id compat_oid; + char hdr[MAX_HEADER_LEN]; + int hdrlen = sizeof(hdr); + + if (type == OBJ_BLOB && auto_bblob && !writing_bblob && + len > BBLOB_CHUNK_GOAL) { + struct object_id bb; + writing_bblob = 1; + if (write_bblob(repo, buf, len, &bb)) { + writing_bblob = 0; + return -1; + } + writing_bblob = 0; + oidcpy(oid, &bb); + if (compat) { + if (compat_oid_in) + oidcpy(&compat_oid, compat_oid_in); + else + hash_object_file(compat, buf, len, OBJ_BLOB, &compat_oid); + return repo_add_loose_object_map(repo, oid, &compat_oid); + } + return 0; + } /* Generate compat_oid */ if (compat) {