From 75350b3bee91b3d53ca2d6d04a219374e7df270a Mon Sep 17 00:00:00 2001 From: John McBride Date: Sun, 15 Mar 2026 08:52:53 -0400 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20feat:=20Supports=20pgvector?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Towards PCC-245 Signed-off-by: John McBride --- go.mod | 7 +- go.sum | 58 ++++- pkg/vector/pgvector/pgvector.go | 286 +++++++++++++++++++++ pkg/vector/pgvector/pgvector_suite_test.go | 13 + pkg/vector/pgvector/pgvector_test.go | 35 +++ pkg/vector/utils/new.go | 14 + 6 files changed, 400 insertions(+), 13 deletions(-) create mode 100644 pkg/vector/pgvector/pgvector.go create mode 100644 pkg/vector/pgvector/pgvector_suite_test.go create mode 100644 pkg/vector/pgvector/pgvector_test.go diff --git a/go.mod b/go.mod index 0ff561a..38b50d0 100644 --- a/go.mod +++ b/go.mod @@ -22,6 +22,7 @@ require ( github.com/muesli/termenv v0.16.0 github.com/onsi/ginkgo/v2 v2.27.4 github.com/onsi/gomega v1.39.0 + github.com/pgvector/pgvector-go v0.3.0 github.com/posthog/posthog-go v1.10.0 github.com/qdrant/go-client v1.17.1 github.com/segmentio/kafka-go v0.4.50 @@ -54,7 +55,7 @@ require ( github.com/dlclark/regexp2 v1.11.0 // indirect github.com/go-logfmt/logfmt v0.6.0 // indirect github.com/go-logr/logr v1.4.3 // indirect - github.com/go-openapi/inflect v0.19.0 // indirect + github.com/go-openapi/inflect v0.21.0 // indirect github.com/go-task/slim-sprig/v3 v3.0.0 // indirect github.com/go-viper/mapstructure/v2 v2.4.0 // indirect github.com/goccy/go-json v0.10.5 // indirect @@ -63,7 +64,7 @@ require ( github.com/google/pprof v0.0.0-20250403155104-27863c87afa6 // indirect github.com/gorilla/css v1.0.1 // indirect github.com/hashicorp/golang-lru/v2 v2.0.7 // indirect - github.com/hashicorp/hcl/v2 v2.18.1 // indirect + github.com/hashicorp/hcl/v2 v2.23.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect @@ -92,7 +93,7 @@ require ( github.com/yosida95/uritemplate/v3 v3.0.2 // indirect github.com/yuin/goldmark v1.7.8 // indirect github.com/yuin/goldmark-emoji v1.0.5 // indirect - github.com/zclconf/go-cty v1.14.4 // indirect + github.com/zclconf/go-cty v1.16.2 // indirect github.com/zclconf/go-cty-yaml v1.1.0 // indirect go.yaml.in/yaml/v3 v3.0.4 // indirect golang.org/x/exp v0.0.0-20231006140011-7918f672742d // indirect diff --git a/go.sum b/go.sum index bc437c7..c62eb5d 100644 --- a/go.sum +++ b/go.sum @@ -90,8 +90,12 @@ github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-openapi/inflect v0.19.0 h1:9jCH9scKIbHeV9m12SmPilScz6krDxKRasNNSNPXu/4= -github.com/go-openapi/inflect v0.19.0/go.mod h1:lHpZVlpIQqLyKwJ4N+YSc9hchQy/i12fJykb83CRBH4= +github.com/go-openapi/inflect v0.21.0 h1:FoBjBTQEcbg2cJUWX6uwL9OyIW8eqc9k4KhN4lfbeYk= +github.com/go-openapi/inflect v0.21.0/go.mod h1:INezMuUu7SJQc2AyR3WO0DqqYUJSj8Kb4hBd7WtjlAw= +github.com/go-pg/pg/v10 v10.11.0 h1:CMKJqLgTrfpE/aOVeLdybezR2om071Vh38OLZjsyMI0= +github.com/go-pg/pg/v10 v10.11.0/go.mod h1:4BpHRoxE61y4Onpof3x1a2SQvi9c+q1dJnrNdMjsroA= +github.com/go-pg/zerochecker v0.2.0 h1:pp7f72c3DobMWOb2ErtZsnrPaSvHd2W4o9//8HtF4mU= +github.com/go-pg/zerochecker v0.2.0/go.mod h1:NJZ4wKL0NmTtz0GKCoJ8kym6Xn/EQzXRl2OnAe7MmDo= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68= @@ -122,8 +126,8 @@ github.com/gorilla/css v1.0.1 h1:ntNaBIghp6JmvWnxbZKANoLyuXTPZ4cAMlo6RyhlbO8= github.com/gorilla/css v1.0.1/go.mod h1:BvnYkspnSzMmwRK+b8/xgNPLiIuNZr6vbZBTPQ2A3b0= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= -github.com/hashicorp/hcl/v2 v2.18.1 h1:6nxnOJFku1EuSawSD81fuviYUV8DxFr3fp2dUi3ZYSo= -github.com/hashicorp/hcl/v2 v2.18.1/go.mod h1:ThLC89FV4p9MPW804KVbe/cEXoQ8NZEh+JtMeeGErHE= +github.com/hashicorp/hcl/v2 v2.23.0 h1:Fphj1/gCylPxHutVSEOf2fBOh1VE4AuLV7+kbJf3qos= +github.com/hashicorp/hcl/v2 v2.23.0/go.mod h1:62ZYHrXgPoX8xBnzl8QzbWq4dyDsDtfCRgIq1rbJEvA= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= @@ -136,6 +140,12 @@ github.com/jackc/pgx/v5 v5.8.0 h1:TYPDoleBBme0xGSAX3/+NujXXtpZn9HBONkQC7IEZSo= github.com/jackc/pgx/v5 v5.8.0/go.mod h1:QVeDInX2m9VyzvNeiCJVjCkNFqzsNb43204HshNSZKw= github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= +github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= +github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= +github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/jmoiron/sqlx v1.3.5 h1:vFFPA71p1o5gAeqtEAwLU4dnX2napprKtHr7PYIcN3g= +github.com/jmoiron/sqlx v1.3.5/go.mod h1:nRVWtLre0KfCLJvgxzCsLVMogSvQ1zNJtpYr2Ccp0mQ= github.com/joshdk/go-junit v1.0.0 h1:S86cUKIdwBHWwA6xCmFlf3RTLfVXYQfvanM5Uh+K6GE= github.com/joshdk/go-junit v1.0.0/go.mod h1:TiiV0PqkaNfFXjEiyjWM3XXrhVyCa1K4Zfga6W52ung= github.com/klauspost/compress v1.18.4 h1:RPhnKRAQ4Fh8zU2FY/6ZFDwTVTxgJ/EMydqSTzE9a2c= @@ -144,8 +154,8 @@ github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= -github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw= +github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o= github.com/lucasb-eyer/go-colorful v1.3.0 h1:2/yBRLdWBZKrf7gB40FoiKfAWYQ0lqNcbuQwVHXptag= github.com/lucasb-eyer/go-colorful v1.3.0/go.mod h1:R4dSotOR9KMtayYi1e77YzuveK+i7ruzyGqttikkLy0= github.com/maruel/natural v1.1.1 h1:Hja7XhhmvEFhcByqDoHz9QZbkWey+COd9xWfCfn1ioo= @@ -179,6 +189,8 @@ github.com/onsi/gomega v1.39.0 h1:y2ROC3hKFmQZJNFeGAMeHZKkjBL65mIZcvrLQBF9k6Q= github.com/onsi/gomega v1.39.0/go.mod h1:ZCU1pkQcXDO5Sl9/VVEGlDyp+zm0m1cmeG5TOzLgdh4= github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= +github.com/pgvector/pgvector-go v0.3.0 h1:Ij+Yt78R//uYqs3Zk35evZFvr+G0blW0OUN+Q2D1RWc= +github.com/pgvector/pgvector-go v0.3.0/go.mod h1:duFy+PXWfW7QQd5ibqutBO4GxLsUZ9RVXhFZGIBsWSA= github.com/pierrec/lz4/v4 v4.1.15 h1:MO0/ucJhngq7299dKLwIMtgTfbkoSPF6AoMYDd8Q4q0= github.com/pierrec/lz4/v4 v4.1.15/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -198,8 +210,6 @@ github.com/sagikazarmark/locafero v0.11.0 h1:1iurJgmM9G3PA/I+wWYIOw/5SyBtxapeHDc github.com/sagikazarmark/locafero v0.11.0/go.mod h1:nVIGvgyzw595SUSUE6tvCp3YYTeHs15MvlmU87WwIik= github.com/segmentio/kafka-go v0.4.50 h1:mcyC3tT5WeyWzrFbd6O374t+hmcu1NKt2Pu1L3QaXmc= github.com/segmentio/kafka-go v0.4.50/go.mod h1:Y1gn60kzLEEaW28YshXyk2+VCUKbJ3Qr6DrnT3i4+9E= -github.com/sergi/go-diff v1.3.1 h1:xkr+Oxo4BOQKmkn/B9eMK0g5Kg/983T9DqqPHwYqD+8= -github.com/sergi/go-diff v1.3.1/go.mod h1:aMJSSKb2lpPvRNec0+w3fl7LP9IOFzdc9Pa4NFbPK1I= github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 h1:+jumHNA0Wrelhe64i8F6HNlS8pkoyMv5sreGx2Ry5Rw= github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8/go.mod h1:3n1Cwaq1E1/1lhQhtRK2ts/ZwZEhjcQeJQ1RuC6Q/8U= github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I= @@ -228,10 +238,28 @@ github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= github.com/tidwall/sjson v1.2.5 h1:kLy8mja+1c9jlljvWTlSazM7cKDRfJuR/bOJhcY5NcY= github.com/tidwall/sjson v1.2.5/go.mod h1:Fvgq9kS/6ociJEDnK0Fk1cpYF4FIW6ZF7LAe+6jwd28= +github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc h1:9lRDQMhESg+zvGYmW5DyG0UqvY96Bu5QYsTLvCHdrgo= +github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc/go.mod h1:bciPuU6GHm1iF1pBvUfxfsH0Wmnc2VbpgvbI9ZWuIRs= +github.com/uptrace/bun v1.1.12 h1:sOjDVHxNTuM6dNGaba0wUuz7KvDE1BmNu9Gqs2gJSXQ= +github.com/uptrace/bun v1.1.12/go.mod h1:NPG6JGULBeQ9IU6yHp7YGELRa5Agmd7ATZdz4tGZ6z0= +github.com/uptrace/bun/dialect/pgdialect v1.1.12 h1:m/CM1UfOkoBTglGO5CUTKnIKKOApOYxkcP2qn0F9tJk= +github.com/uptrace/bun/dialect/pgdialect v1.1.12/go.mod h1:Ij6WIxQILxLlL2frUBxUBOZJtLElD2QQNDcu/PWDHTc= +github.com/uptrace/bun/driver/pgdriver v1.1.12 h1:3rRWB1GK0psTJrHwxzNfEij2MLibggiLdTqjTtfHc1w= +github.com/uptrace/bun/driver/pgdriver v1.1.12/go.mod h1:ssYUP+qwSEgeDDS1xm2XBip9el1y9Mi5mTAvLoiADLM= github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/valyala/fasthttp v1.62.0 h1:8dKRBX/y2rCzyc6903Zu1+3qN0H/d2MsxPPmVNamiH0= github.com/valyala/fasthttp v1.62.0/go.mod h1:FCINgr4GKdKqV8Q0xv8b+UxPV+H/O5nNFo3D+r54Htg= +github.com/vmihailenco/bufpool v0.1.11 h1:gOq2WmBrq0i2yW5QJ16ykccQ4wH9UyEsgLm6czKAd94= +github.com/vmihailenco/bufpool v0.1.11/go.mod h1:AFf/MOy3l2CFTKbxwt0mp2MwnqjNEs5H/UxrkA5jxTQ= +github.com/vmihailenco/msgpack/v5 v5.3.5 h1:5gO0H1iULLWGhs2H5tbAHIZTV8/cYafcFOr9znI5mJU= +github.com/vmihailenco/msgpack/v5 v5.3.5/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc= +github.com/vmihailenco/tagparser v0.1.2 h1:gnjoVuB/kljJ5wICEEOpx98oXMWPLj22G67Vbd1qPqc= +github.com/vmihailenco/tagparser v0.1.2/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI= +github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= +github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= @@ -249,8 +277,10 @@ github.com/yuin/goldmark v1.7.8 h1:iERMLn0/QJeHFhxSt3p6PeN9mGnvIKSpG9YYorDMnic= github.com/yuin/goldmark v1.7.8/go.mod h1:uzxRWxtg69N339t3louHJ7+O03ezfj6PlliRlaOzY1E= github.com/yuin/goldmark-emoji v1.0.5 h1:EMVWyCGPlXJfUXBXpuMu+ii3TIaxbVBnEX9uaDC4cIk= github.com/yuin/goldmark-emoji v1.0.5/go.mod h1:tTkZEbwu5wkPmgTcitqddVxY9osFZiavD+r4AzQrh1U= -github.com/zclconf/go-cty v1.14.4 h1:uXXczd9QDGsgu0i/QFR/hzI5NYCHLf6NQw/atrbnhq8= -github.com/zclconf/go-cty v1.14.4/go.mod h1:VvMs5i0vgZdhYawQNq5kePSpLAoz8u1xvZgrPIxfnZE= +github.com/zclconf/go-cty v1.16.2 h1:LAJSwc3v81IRBZyUVQDUdZ7hs3SYs9jv0eZJDWHD/70= +github.com/zclconf/go-cty v1.16.2/go.mod h1:VvMs5i0vgZdhYawQNq5kePSpLAoz8u1xvZgrPIxfnZE= +github.com/zclconf/go-cty-debug v0.0.0-20240509010212-0d6042c53940 h1:4r45xpDWB6ZMSMNJFMOjqrGHynW3DIBuR2H9j0ug+Mo= +github.com/zclconf/go-cty-debug v0.0.0-20240509010212-0d6042c53940/go.mod h1:CmBdvvj3nqzfzJ6nTCIwDTPZ56aVGvDrmztiO5g3qrM= github.com/zclconf/go-cty-yaml v1.1.0 h1:nP+jp0qPHv2IhUVqmQSzjvqAWcObN0KBkUl2rWBdig0= github.com/zclconf/go-cty-yaml v1.1.0/go.mod h1:9YLUH4g7lOhVWqUbctnVlZ5KLpg7JAprQNgxSZ1Gyxs= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= @@ -267,6 +297,8 @@ go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZY go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA= go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts= +golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos= golang.org/x/exp v0.0.0-20231006140011-7918f672742d h1:jtJma62tbqLibJ5sFQz8bKtEM8rJBtfilJ2qTU199MI= golang.org/x/exp v0.0.0-20231006140011-7918f672742d/go.mod h1:ldy0pHrwJyGW56pPQzzkH36rKxoZW1tw7ZJpeKx+hdo= golang.org/x/mod v0.32.0 h1:9F4d3PHLljb6x//jOyokMv3eX+YDeepZSEo3mFJy93c= @@ -300,3 +332,9 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EV gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gorm.io/driver/postgres v1.5.4 h1:Iyrp9Meh3GmbSuyIAGyjkN+n9K+GHX9b9MqsTL4EJCo= +gorm.io/driver/postgres v1.5.4/go.mod h1:Bgo89+h0CRcdA33Y6frlaHHVuTdOf87pmyzwW9C/BH0= +gorm.io/gorm v1.25.5 h1:zR9lOiiYf09VNh5Q1gphfyia1JpiClIWG9hQaxB/mls= +gorm.io/gorm v1.25.5/go.mod h1:hbnx/Oo0ChWMn1BIhpy1oYozzpM15i4YPuHDmfYtwg8= +mellium.im/sasl v0.3.1 h1:wE0LW6g7U83vhvxjC1IY8DnXM+EU095yeo8XClvCdfo= +mellium.im/sasl v0.3.1/go.mod h1:xm59PUYpZHhgQ9ZqoJ5QaCqzWMi8IeS49dhp6plPCzw= diff --git a/pkg/vector/pgvector/pgvector.go b/pkg/vector/pgvector/pgvector.go new file mode 100644 index 0000000..fd0e55d --- /dev/null +++ b/pkg/vector/pgvector/pgvector.go @@ -0,0 +1,286 @@ +// Package pgvector provides a PostgreSQL-backed vector driver using the pgvector extension. +package pgvector + +import ( + "context" + "errors" + "fmt" + "log/slog" + "strings" + + "github.com/jackc/pgx/v5" + "github.com/jackc/pgx/v5/pgxpool" + pgvec "github.com/pgvector/pgvector-go" + + "github.com/papercomputeco/tapes/pkg/vector" +) + +const ( + // DefaultTableName is the default table name for storing vector documents. + DefaultTableName = "tapes_embeddings" +) + +// Driver implements vector.Driver using PostgreSQL with the pgvector extension. +type Driver struct { + pool *pgxpool.Pool + tableName string + dimensions uint + logger *slog.Logger +} + +// Config holds configuration for the pgvector driver. +type Config struct { + // ConnString is the PostgreSQL connection string (e.g. "postgres://user:pass@host:5432/db"). + ConnString string + + // TableName is the name of the table to store embeddings in. + // Defaults to DefaultTableName if empty. + TableName string + + // Dimensions is the number of dimensions for the embedding vectors. + Dimensions uint +} + +// NewDriver creates a new pgvector driver connected to PostgreSQL. +func NewDriver(c Config, log *slog.Logger) (*Driver, error) { + if c.ConnString == "" { + return nil, errors.New("pgvector connection string must be provided") + } + + if c.Dimensions == 0 { + return nil, errors.New("pgvector embedding dimensions cannot be 0, must be configured") + } + + tableName := c.TableName + if tableName == "" { + tableName = DefaultTableName + } + + pool, err := pgxpool.New(context.Background(), c.ConnString) + if err != nil { + return nil, fmt.Errorf("creating pgx connection pool: %w", err) + } + + // Verify connectivity + if err := pool.Ping(context.Background()); err != nil { + pool.Close() + return nil, fmt.Errorf("%w: %w", vector.ErrConnection, err) + } + + d := &Driver{ + pool: pool, + tableName: tableName, + dimensions: c.Dimensions, + logger: log, + } + + if err := d.ensureSchema(context.Background()); err != nil { + pool.Close() + return nil, fmt.Errorf("ensuring schema: %w", err) + } + + log.Info("connected to PostgreSQL with pgvector", + "table", tableName, + "dimensions", c.Dimensions, + ) + + return d, nil +} + +func (d *Driver) ensureSchema(ctx context.Context) error { + // Enable the pgvector extension + if _, err := d.pool.Exec(ctx, `CREATE EXTENSION IF NOT EXISTS vector`); err != nil { + return fmt.Errorf("enabling vector extension: %w", err) + } + + // Create the embeddings table + createTable := fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s ( + id TEXT PRIMARY KEY, + hash TEXT NOT NULL DEFAULT '', + embedding vector(%d) NOT NULL + ) + `, d.tableName, d.dimensions) + + if _, err := d.pool.Exec(ctx, createTable); err != nil { + return fmt.Errorf("creating table: %w", err) + } + + // Create a cosine distance index for efficient similarity search. + // IVFFlat requires rows to exist for training, so use HNSW which works on empty tables. + createIndex := fmt.Sprintf(` + CREATE INDEX IF NOT EXISTS %s_embedding_idx + ON %s + USING hnsw (embedding vector_cosine_ops) + `, d.tableName, d.tableName) + + if _, err := d.pool.Exec(ctx, createIndex); err != nil { + return fmt.Errorf("creating index: %w", err) + } + + return nil +} + +// Add stores documents with their embeddings. +// If a document with the same ID already exists, it is updated. +func (d *Driver) Add(ctx context.Context, docs []vector.Document) error { + if len(docs) == 0 { + return nil + } + + query := fmt.Sprintf(` + INSERT INTO %s (id, hash, embedding) + VALUES ($1, $2, $3) + ON CONFLICT (id) DO UPDATE SET + hash = EXCLUDED.hash, + embedding = EXCLUDED.embedding + `, d.tableName) + + batch := &pgx.Batch{} + for _, doc := range docs { + batch.Queue(query, doc.ID, doc.Hash, pgvec.NewVector(doc.Embedding)) + } + + br := d.pool.SendBatch(ctx, batch) + defer br.Close() + + for range docs { + if _, err := br.Exec(); err != nil { + return fmt.Errorf("upserting document: %w", err) + } + } + + d.logger.Debug("added documents to pgvector", "count", len(docs)) + return nil +} + +// Query finds the topK most similar documents to the given embedding. +// Uses cosine distance; results are ordered by similarity (highest first). +func (d *Driver) Query(ctx context.Context, embedding []float32, topK int) ([]vector.QueryResult, error) { + if topK <= 0 { + topK = 10 + } + + // Cosine distance: 0 = identical, 2 = opposite. + // Convert to similarity score: 1 - distance gives [−1, 1] range. + query := fmt.Sprintf(` + SELECT id, hash, embedding, 1 - (embedding <=> $1) AS score + FROM %s + ORDER BY embedding <=> $1 + LIMIT $2 + `, d.tableName) + + rows, err := d.pool.Query(ctx, query, pgvec.NewVector(embedding), topK) + if err != nil { + return nil, fmt.Errorf("querying vectors: %w", err) + } + defer rows.Close() + + var results []vector.QueryResult + for rows.Next() { + var ( + id string + hash string + emb pgvec.Vector + score float32 + ) + if err := rows.Scan(&id, &hash, &emb, &score); err != nil { + return nil, fmt.Errorf("scanning query result: %w", err) + } + + results = append(results, vector.QueryResult{ + Document: vector.Document{ + ID: id, + Hash: hash, + Embedding: emb.Slice(), + }, + Score: score, + }) + } + + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterating query results: %w", err) + } + + d.logger.Debug("queried pgvector", "results", len(results)) + return results, nil +} + +// Get retrieves documents by their IDs. +func (d *Driver) Get(ctx context.Context, ids []string) ([]vector.Document, error) { + if len(ids) == 0 { + return nil, nil + } + + // Build parameterized query with positional args + placeholders := make([]string, len(ids)) + args := make([]any, len(ids)) + for i, id := range ids { + placeholders[i] = fmt.Sprintf("$%d", i+1) + args[i] = id + } + + query := fmt.Sprintf(` + SELECT id, hash, embedding + FROM %s + WHERE id IN (%s) + `, d.tableName, strings.Join(placeholders, ",")) + + rows, err := d.pool.Query(ctx, query, args...) + if err != nil { + return nil, fmt.Errorf("querying documents: %w", err) + } + defer rows.Close() + + var docs []vector.Document + for rows.Next() { + var ( + id string + hash string + emb pgvec.Vector + ) + if err := rows.Scan(&id, &hash, &emb); err != nil { + return nil, fmt.Errorf("scanning document: %w", err) + } + docs = append(docs, vector.Document{ + ID: id, + Hash: hash, + Embedding: emb.Slice(), + }) + } + + if err := rows.Err(); err != nil { + return nil, fmt.Errorf("iterating documents: %w", err) + } + + return docs, nil +} + +// Delete removes documents by their IDs. +func (d *Driver) Delete(ctx context.Context, ids []string) error { + if len(ids) == 0 { + return nil + } + + placeholders := make([]string, len(ids)) + args := make([]any, len(ids)) + for i, id := range ids { + placeholders[i] = fmt.Sprintf("$%d", i+1) + args[i] = id + } + + query := fmt.Sprintf(`DELETE FROM %s WHERE id IN (%s)`, d.tableName, strings.Join(placeholders, ",")) + + if _, err := d.pool.Exec(ctx, query, args...); err != nil { + return fmt.Errorf("deleting documents: %w", err) + } + + d.logger.Debug("deleted documents from pgvector", "count", len(ids)) + return nil +} + +// Close releases resources held by the driver. +func (d *Driver) Close() error { + d.pool.Close() + return nil +} diff --git a/pkg/vector/pgvector/pgvector_suite_test.go b/pkg/vector/pgvector/pgvector_suite_test.go new file mode 100644 index 0000000..260be20 --- /dev/null +++ b/pkg/vector/pgvector/pgvector_suite_test.go @@ -0,0 +1,13 @@ +package pgvector_test + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +func TestPgvector(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Pgvector Vector Suite") +} diff --git a/pkg/vector/pgvector/pgvector_test.go b/pkg/vector/pgvector/pgvector_test.go new file mode 100644 index 0000000..d471a8d --- /dev/null +++ b/pkg/vector/pgvector/pgvector_test.go @@ -0,0 +1,35 @@ +package pgvector_test + +import ( + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "github.com/papercomputeco/tapes/pkg/vector" + "github.com/papercomputeco/tapes/pkg/vector/pgvector" +) + +var _ = Describe("Driver", func() { + Describe("Interface compliance", func() { + It("should implement vector.Driver interface", func() { + var _ vector.Driver = (*pgvector.Driver)(nil) + }) + }) + + Describe("NewDriver", func() { + It("should return an error when connection string is empty", func() { + _, err := pgvector.NewDriver(pgvector.Config{ + Dimensions: 128, + }, nil) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("connection string must be provided")) + }) + + It("should return an error when dimensions is zero", func() { + _, err := pgvector.NewDriver(pgvector.Config{ + ConnString: "postgres://localhost:5432/test", + }, nil) + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("dimensions cannot be 0")) + }) + }) +}) diff --git a/pkg/vector/utils/new.go b/pkg/vector/utils/new.go index 0ff0c80..014f304 100644 --- a/pkg/vector/utils/new.go +++ b/pkg/vector/utils/new.go @@ -10,6 +10,7 @@ import ( "github.com/papercomputeco/tapes/pkg/vector" "github.com/papercomputeco/tapes/pkg/vector/chroma" + "github.com/papercomputeco/tapes/pkg/vector/pgvector" "github.com/papercomputeco/tapes/pkg/vector/qdrant" "github.com/papercomputeco/tapes/pkg/vector/sqlitevec" ) @@ -29,6 +30,8 @@ func NewVectorDriver(o *NewVectorDriverOpts) (vector.Driver, error) { return newSqliteVecDriver(o) case "qdrant": return newQdrantDriver(o) + case "pgvector": + return newPgvectorDriver(o) default: return nil, fmt.Errorf("unsupported vector store provider: %s", o.ProviderType) } @@ -93,3 +96,14 @@ func newQdrantDriver(o *NewVectorDriverOpts) (vector.Driver, error) { Dimensions: uint64(o.Dimensions), }, o.Logger) } + +func newPgvectorDriver(o *NewVectorDriverOpts) (vector.Driver, error) { + if o.Target == "" { + return nil, errors.New("pgvector target connection string must be provided") + } + + return pgvector.NewDriver(pgvector.Config{ + ConnString: o.Target, + Dimensions: o.Dimensions, + }, o.Logger) +}