diff --git a/Documentation/technical/native-odb-api.txt b/Documentation/technical/native-odb-api.txt new file mode 100644 index 00000000000000..df6552b7f1c7be --- /dev/null +++ b/Documentation/technical/native-odb-api.txt @@ -0,0 +1,54 @@ +Native ODB API overview +========================= + +Git's native object database (ODB) exposes `struct object_database` and +`struct odb_source` as the central data structures for working with local +and alternate object stores.【F:odb.h†L102-L160】 +The API provides helpers to create a database (`odb_new()`), attach paths as +object sources, and read or write objects through functions such as +`odb_write_object_ext()` that operate on the local repository's primary +object directory.【F:odb.h†L169-L477】 + +A consumer that wants to experiment with custom storage can allocate its own +ODB using `odb_new()`, populate `struct odb_source` entries, and reuse Git's +object hashing helpers (for example `hash_object_file()`) to stay compatible +with Git's loose-object format.【F:object-file.h†L1-L126】【F:odb.c†L983-L1007】 + +Simple ODB example helper +------------------------- + +The `test-tool simple-odb` helper demonstrates a minimal object database that +stores entries in a single text file. It hashes payloads using +`hash_object_file()`, encodes the data as hexadecimal, and records the type, +size, and object ID for later retrieval.【F:t/helper/test-simple-odb.c†L1-L213】 +The helper exposes commands to initialize the store, append blobs, list the +stored object IDs, and read an object's payload back onto disk while reporting +its type.【F:t/helper/test-simple-odb.c†L145-L212】 + +The accompanying regression test (`t/t0039-simple-odb.sh`) uses the helper to +write and read blobs, verifying both the listing order and payload +round-tripping to illustrate how an alternative backing store can interoperate +with Git's hashing rules.【F:t/t0039-simple-odb.sh†L1-L46】 + +Comparison with other ODB APIs +------------------------------ + +* **libgit2** exposes an `git_odb` type with pluggable backends and callbacks + that are registered globally. Implementers provide `read`, `write`, and + iteration function pointers, but the integration is centered around a single + multi-backend registry instead of Git's notion of one primary source plus a + linked list of alternates. +* **gitoxide** (gix) models its ODB as a layered `gix_odb::Store`, combining + a cache and multiple stores selected via configuration. Custom stores + implement the `Store` trait and are typically used by wiring them into a + `Repository` configuration object. + +Key compatibility considerations +-------------------------------- + +Git's native API must preserve backwards compatibility with repositories that +may be accessed by older clients, so helpers should reuse Git's hashing +functions and object formats rather than inventing new on-disk layouts. This is +why the example focuses on hashing payloads with `hash_object_file()` and +expressing data in a reversible textual format, mirroring how loose objects are +validated today.【F:t/helper/test-simple-odb.c†L61-L100】 diff --git a/Makefile b/Makefile index 7ea149598d8ed8..be81fd1bc08fbf 100644 --- a/Makefile +++ b/Makefile @@ -854,6 +854,7 @@ TEST_BUILTINS_OBJS += test-sha1.o TEST_BUILTINS_OBJS += test-sha256.o TEST_BUILTINS_OBJS += test-sigchain.o TEST_BUILTINS_OBJS += test-simple-ipc.o +TEST_BUILTINS_OBJS += test-simple-odb.o TEST_BUILTINS_OBJS += test-string-list.o TEST_BUILTINS_OBJS += test-submodule-config.o TEST_BUILTINS_OBJS += test-submodule-nested-repo-config.o diff --git a/t/helper/test-simple-odb.c b/t/helper/test-simple-odb.c new file mode 100644 index 00000000000000..a8f3e996a627ca --- /dev/null +++ b/t/helper/test-simple-odb.c @@ -0,0 +1,297 @@ +#define USE_THE_REPOSITORY_VARIABLE + +#include "test-tool.h" +#include "git-compat-util.h" +#include "hash.h" +#include "hex.h" +#include "object-file.h" +#include "object.h" +#include "setup.h" +#include "strbuf.h" + +struct simple_odb_header { + const struct git_hash_algo *algo; +}; + +static const char simple_odb_magic[] = "simple-odb v1"; + +static void simple_odb_write_header(FILE *f, const struct git_hash_algo *algo) +{ + fprintf(f, "%s %s\n", simple_odb_magic, algo->name); +} + +static void simple_odb_read_header(FILE *f, struct simple_odb_header *out) +{ + struct strbuf line = STRBUF_INIT; + const char *algo_name; + int algo; + + if (strbuf_getline_lf(&line, f) == EOF) + die("invalid simple odb file: missing header"); + + if (!skip_prefix(line.buf, simple_odb_magic, &algo_name) || + !algo_name || *algo_name != ' ') + die("invalid simple odb header: '%s'", line.buf); + + algo_name++; + algo = hash_algo_by_name(algo_name); + if (algo < 0) + die("unknown hash algorithm '%s' in simple odb", algo_name); + + out->algo = &hash_algos[algo]; + strbuf_release(&line); +} + +static const struct git_hash_algo *detect_default_algo(void) +{ + int nongit_ok = 0; + + setup_git_directory_gently(&nongit_ok); + if (!nongit_ok && the_repository->hash_algo) + return the_repository->hash_algo; + + return &hash_algos[GIT_HASH_SHA1]; +} + +static FILE *simple_odb_open(const char *path, const char *mode, + struct simple_odb_header *header) +{ + FILE *f = xfopen(path, mode); + + if (strchr(mode, 'r')) { + rewind(f); + simple_odb_read_header(f, header); + } else if (strchr(mode, '+')) { + rewind(f); + simple_odb_read_header(f, header); + fseek(f, 0, SEEK_END); + } + + return f; +} + +static void simple_odb_encode_hex(struct strbuf *out, const void *data, size_t len) +{ + static const char hex[] = "0123456789abcdef"; + const unsigned char *bytes = data; + + strbuf_grow(out, len * 2); + for (size_t i = 0; i < len; i++) { + strbuf_addch(out, hex[bytes[i] >> 4]); + strbuf_addch(out, hex[bytes[i] & 0x0f]); + } +} + +static void simple_odb_decode_hex(struct strbuf *out, const char *hex, size_t hexlen) +{ + size_t bytes; + char *buf; + + if (hexlen % 2) + die("invalid hex payload length"); + + bytes = hexlen / 2; + buf = xmalloc(bytes + 1); + if (hex_to_bytes((unsigned char *)buf, hex, bytes)) + die("invalid hex payload data"); + buf[bytes] = '\0'; + strbuf_attach(out, buf, bytes, bytes + 1); +} + +static int simple_odb_init(const char *path, const struct git_hash_algo *algo) +{ + FILE *f; + + if (!access(path, F_OK)) + die("simple odb '%s' already exists", path); + + f = xfopen(path, "w"); + simple_odb_write_header(f, algo); + fclose(f); + return 0; +} + +static int simple_odb_write_entry(const char *path, enum object_type type, + struct strbuf *payload, struct object_id *oid) +{ + FILE *f; + struct simple_odb_header header; + struct strbuf hex = STRBUF_INIT; + char oid_hex[GIT_MAX_HEXSZ + 1]; + + f = simple_odb_open(path, "a+", &header); + + hash_object_file(header.algo, payload->buf, payload->len, type, oid); + oid_to_hex_r(oid_hex, oid); + simple_odb_encode_hex(&hex, payload->buf, payload->len); + + fprintf(f, "%s %s %" PRIuMAX "\n", oid_hex, type_name(type), (uintmax_t)payload->len); + fwrite(hex.buf, 1, hex.len, f); + fputc('\n', f); + + strbuf_release(&hex); + fclose(f); + return 0; +} + +static int simple_odb_read_entry(const char *path, const struct object_id *oid, + const char *out_path) +{ + FILE *f; + struct simple_odb_header header; + struct strbuf line = STRBUF_INIT; + struct strbuf data = STRBUF_INIT; + struct object_id current; + int found = 0; + + f = simple_odb_open(path, "r", &header); + + while (strbuf_getline_lf(&line, f) != EOF) { + char *type_str, *size_str; + char *endptr; + enum object_type type; + uintmax_t size; + + if (!line.len) + continue; + + type_str = strchr(line.buf, ' '); + if (!type_str) + die("corrupt simple odb entry header"); + *type_str++ = '\0'; + + size_str = strchr(type_str, ' '); + if (!size_str) + die("corrupt simple odb entry header"); + *size_str++ = '\0'; + + if (get_oid_hex_algop(line.buf, ¤t, header.algo)) + die("invalid object id '%s' in simple odb", line.buf); + + type = type_from_string_gently(type_str, -1, 1); + if (type < 0) + die("invalid type '%s' in simple odb", type_str); + + size = strtoumax(size_str, &endptr, 10); + if (*endptr) + die("invalid size '%s' in simple odb", size_str); + + if (strbuf_getline_lf(&data, f) == EOF) + die("missing payload in simple odb entry"); + + if (data.len != size * 2) + die("corrupt payload for '%s'", line.buf); + + if (!oideq(¤t, oid)) + continue; + + simple_odb_decode_hex(&data, data.buf, data.len); + found = 1; + + if (out_path) { + int out = xopen(out_path, O_CREAT | O_TRUNC | O_WRONLY, 0666); + if (write_in_full(out, data.buf, data.len) < 0) + die_errno("unable to write '%s'", out_path); + close(out); + } + + printf("%s\n", type_name(type)); + break; + } + + if (!found) + die("object %s not found in simple odb", oid_to_hex(oid)); + + strbuf_release(&line); + strbuf_release(&data); + fclose(f); + return 0; +} + +static int simple_odb_list(const char *path) +{ + FILE *f; + struct simple_odb_header header; + struct strbuf line = STRBUF_INIT; + + f = simple_odb_open(path, "r", &header); + + while (strbuf_getline_lf(&line, f) != EOF) { + char *type_str; + + if (!line.len) + continue; + + type_str = strchr(line.buf, ' '); + if (!type_str) + die("corrupt simple odb entry header"); + *type_str = '\0'; + printf("%s\n", line.buf); + + if (strbuf_getline_lf(&line, f) == EOF) + break; + } + + strbuf_release(&line); + fclose(f); + return 0; +} + +int cmd__simple_odb(int argc, const char **argv) +{ + if (argc < 2) + die("test-tool simple-odb [args]"); + + argv++; + argc--; + + if (!strcmp(argv[0], "init")) { + const struct git_hash_algo *algo = detect_default_algo(); + if (argc < 2 || argc > 3) + die("usage: test-tool simple-odb init [algo]"); + if (argc == 3) { + int idx = hash_algo_by_name(argv[2]); + if (idx < 0) + die("unknown hash algorithm '%s'", argv[2]); + algo = &hash_algos[idx]; + } + simple_odb_init(argv[1], algo); + return 0; + } else if (!strcmp(argv[0], "write")) { + struct strbuf payload = STRBUF_INIT; + struct object_id oid; + enum object_type type; + char oid_hex[GIT_MAX_HEXSZ + 1]; + + if (argc != 3) + die("usage: test-tool simple-odb write "); + + type = type_from_string_gently(argv[2], -1, 0); + if (type < 0) + die("unknown object type '%s'", argv[2]); + + if (strbuf_read(&payload, 0, 0) < 0) + die_errno("failed to read payload"); + + simple_odb_write_entry(argv[1], type, &payload, &oid); + oid_to_hex_r(oid_hex, &oid); + printf("%s\n", oid_hex); + strbuf_release(&payload); + return 0; + } else if (!strcmp(argv[0], "read")) { + struct object_id oid; + if (argc != 4) + die("usage: test-tool simple-odb read "); + if (get_oid_hex_any(argv[2], &oid) < 0) + die("invalid object id '%s'", argv[2]); + simple_odb_read_entry(argv[1], &oid, argv[3]); + return 0; + } else if (!strcmp(argv[0], "list")) { + if (argc != 2) + die("usage: test-tool simple-odb list "); + simple_odb_list(argv[1]); + return 0; + } + + die("unknown simple-odb command '%s'", argv[0]); +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index a7abc618b3887e..02a910404b4885 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -77,6 +77,7 @@ static struct test_cmd cmds[] = { { "sha256", cmd__sha256 }, { "sigchain", cmd__sigchain }, { "simple-ipc", cmd__simple_ipc }, + { "simple-odb", cmd__simple_odb }, { "string-list", cmd__string_list }, { "submodule", cmd__submodule }, { "submodule-config", cmd__submodule_config }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 7f150fa1eb9ad2..5b9e3fe0b0f5d1 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -70,6 +70,7 @@ int cmd__sha1_unsafe(int argc, const char **argv); int cmd__sha256(int argc, const char **argv); int cmd__sigchain(int argc, const char **argv); int cmd__simple_ipc(int argc, const char **argv); +int cmd__simple_odb(int argc, const char **argv); int cmd__string_list(int argc, const char **argv); int cmd__submodule(int argc, const char **argv); int cmd__submodule_config(int argc, const char **argv); diff --git a/t/t0039-simple-odb.sh b/t/t0039-simple-odb.sh new file mode 100755 index 00000000000000..e89045c8c5574a --- /dev/null +++ b/t/t0039-simple-odb.sh @@ -0,0 +1,51 @@ +#!/bin/sh + +TEST_NO_CREATE_REPO=1 + +test_description='exercise the native simple ODB helper' + +. ./test-lib.sh + +ODB_FILE=simple.odb + +test_when_finished "rm -f "$ODB_FILE"" + +test_expect_success 'initialize simple object database' ' +test-tool simple-odb init "$ODB_FILE" +' + +write_blob () { +printf %s "$1" | test-tool simple-odb write "$ODB_FILE" blob +} + +test_expect_success 'write and list blob entry' ' +echo foo >expect && +test_when_finished "rm -f expect expect_oid list" && +oid=$(write_blob foo) && +test-tool simple-odb list "$ODB_FILE" >list && +echo "$oid" >expect_oid && +test_cmp expect_oid list +' + +test_expect_success 'read blob content back' ' +test_when_finished "rm -f out expect_type actual_type expect_payload" && +printf foo >expect_payload && +type=$(test-tool simple-odb read "$ODB_FILE" "$oid" out) && +echo blob >expect_type && +echo "$type" >actual_type && +test_cmp expect_type actual_type && +test_cmp expect_payload out +' + +# write a second object to ensure we append correctly + +test_expect_success 'append another blob' ' +echo bar >expect2 && +test_when_finished "rm -f expect2 expect_list list" && +oid2=$(write_blob bar) && +test-tool simple-odb list "$ODB_FILE" >list && +printf "%s\n%s\n" "$oid" "$oid2" >expect_list && +test_cmp expect_list list +' + +test_done