-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathquery.sql
More file actions
executable file
·117 lines (104 loc) · 3.24 KB
/
query.sql
File metadata and controls
executable file
·117 lines (104 loc) · 3.24 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
-- name: CreateDocument :one
INSERT INTO documents (file_path)
VALUES ($1)
ON CONFLICT (file_path) DO UPDATE
SET updated_at = CURRENT_TIMESTAMP
RETURNING *;
-- name: GetDocument :one
SELECT * FROM documents WHERE id = $1 LIMIT 1;
-- name: ListDocuments :many
SELECT * FROM documents ORDER BY created_at DESC;
-- name: DeleteDocument :exec
DELETE FROM documents
WHERE id = $1;
-- name: CreateEmbedding :one
INSERT INTO embeddings (
document_id,
model_name,
embedding,
chunk_text,
chunk_size,
created_at,
metadata,
metadata_hash,
embedding_text
) VALUES (
$1, $2, $3, $4, length($4), DEFAULT, $5, $6, $7
)
RETURNING id, document_id, model_name, embedding, chunk_text, chunk_size, created_at, metadata, metadata_hash, embedding_text;
-- name: GetEmbedding :one
SELECT e.* FROM embeddings e
JOIN documents d ON d.id = e.document_id
WHERE e.id = $1 LIMIT 1;
-- name: DeleteEmbeddings :exec
DELETE FROM embeddings;
-- name: ListDocumentEmbeddings :many
SELECT e.* FROM embeddings e
JOIN documents d ON d.id = e.document_id
WHERE e.document_id = $1
AND (sqlc.narg(metadata_hash)::text IS NULL OR sqlc.narg(metadata_hash)::text = e.metadata_hash);
-- name: GetStorageStats :one
SELECT
COUNT(e.id) as embedding_count,
SUM(e.chunk_size) as total_bytes,
COUNT(DISTINCT d.id) as document_count
FROM embeddings e
LEFT JOIN documents d ON e.document_id = d.id;
-- name: FindTopKNNEmbeddings :many
SELECT
e.id,
e.document_id,
e.chunk_text,
e.chunk_size,
d.file_path,
e.metadata,
(e.embedding <=> sqlc.arg(query_embedding)::vector)::float8 as distance,
(1 - (e.embedding <=> sqlc.arg(query_embedding)::vector))::float8 as similarity
FROM embeddings e
JOIN documents d ON d.id = e.document_id
WHERE e.model_name = sqlc.arg(model_name)
AND (sqlc.narg(metadata_hash)::text IS NULL OR sqlc.narg(metadata_hash)::text = e.metadata_hash)
ORDER BY e.embedding <=> sqlc.arg(query_embedding)::vector ASC
LIMIT sqlc.arg(k);
-- name: FindSimilarEmbeddingsInDocument :many
WITH similarity_scores AS (
SELECT
e.id,
e.document_id,
e.chunk_text,
e.chunk_size,
e.metadata,
d.file_path,
1 - (e.embedding <=> sqlc.arg(query_embedding)::vector) as similarity
FROM embeddings e
JOIN documents d ON d.id = e.document_id
WHERE e.document_id = sqlc.arg(document_id)
AND e.model_name = sqlc.arg(model_name)
AND 1 - (e.embedding <=> sqlc.arg(query_embedding)::vector) > sqlc.arg(similarity_threshold)
AND (sqlc.narg(metadata_hash)::text IS NULL OR sqlc.narg(metadata_hash)::text = e.metadata_hash)
)
SELECT *
FROM similarity_scores
ORDER BY similarity DESC
LIMIT sqlc.arg(max_results);
-- name: GetStats :one
SELECT
COUNT(DISTINCT d.id) as document_count,
COUNT(e.id) as chunk_count,
COALESCE(SUM(e.chunk_size), 0) as total_bytes
FROM documents d
LEFT JOIN embeddings e ON e.document_id = d.id;
-- name: ListChunks :many
SELECT
e.id,
e.chunk_text,
e.metadata,
e.chunk_size,
e.model_name,
e.created_at,
d.file_path,
d.id as document_id
FROM embeddings e
JOIN documents d ON d.id = e.document_id
WHERE (sqlc.narg(metadata_hash)::text IS NULL OR e.metadata_hash = sqlc.narg(metadata_hash)::text)
ORDER BY e.created_at DESC;