diff --git a/.changeset/searchable-json-query-api.md b/.changeset/searchable-json-query-api.md new file mode 100644 index 00000000..c543b8c5 --- /dev/null +++ b/.changeset/searchable-json-query-api.md @@ -0,0 +1,6 @@ +--- +"@cipherstash/protect": major +"@cipherstash/schema": major +--- + +Add searchable JSON query API with path and containment query support diff --git a/.gitignore b/.gitignore index fc7ee438..599d7e98 100644 --- a/.gitignore +++ b/.gitignore @@ -66,3 +66,10 @@ mise.local.toml cipherstash.toml cipherstash.secret.toml sql/cipherstash-*.sql + +# work files +.claude/ +.serena/ +.work/ +**/.work/ +PR_REVIEW.md diff --git a/docs/concepts/aws-kms-vs-cipherstash-comparison.md b/docs/concepts/aws-kms-vs-cipherstash-comparison.md index d0a52f19..932e0210 100644 --- a/docs/concepts/aws-kms-vs-cipherstash-comparison.md +++ b/docs/concepts/aws-kms-vs-cipherstash-comparison.md @@ -165,11 +165,12 @@ const encryptResult = await protectClient.encrypt( ); // Create search terms and query directly in PostgreSQL -const searchTerms = await protectClient.createSearchTerms({ - terms: ['secret'], +const searchTerms = await protectClient.encryptQuery([{ + value: 'secret', column: users.email, table: users, -}); + queryType: queryTypes.freeTextSearch, +}]); // Use with your ORM (Drizzle integration included) ``` diff --git a/docs/concepts/searchable-encryption.md b/docs/concepts/searchable-encryption.md index 56ca41fa..2a461ff8 100644 --- a/docs/concepts/searchable-encryption.md +++ b/docs/concepts/searchable-encryption.md @@ -69,14 +69,16 @@ CipherStash uses [EQL](https://github.com/cipherstash/encrypt-query-language) to // 1) Encrypt the search term const searchTerm = 'alice.johnson@example.com' -const encryptedParam = await protectClient.createSearchTerms([{ +const encryptedParam = await protectClient.encryptQuery([{ value: searchTerm, table: protectedUsers, // Reference to the Protect table schema column: protectedUsers.email, // Your Protect column definition + queryType: queryTypes.equality, // Use 'equality' for exact match queries }]) if (encryptedParam.failure) { // Handle the failure + throw new Error(encryptedParam.failure.message) } // 2) Build an equality query noting that EQL must be installed in order for the operation to work successfully @@ -86,10 +88,9 @@ const equalitySQL = ` WHERE email = $1 ` -// 3) Execute the query, passing in the Postgres column name -// and the encrypted search term as the second parameter +// 3) Execute the query, passing in the encrypted search term // (client is an arbitrary Postgres client) -const result = await client.query(equalitySQL, [ protectedUser.email.getName(), encryptedParam.data ]) +const result = await client.query(equalitySQL, [encryptedParam.data[0]]) ``` Using the above approach, Protect.js is generating the EQL payloads and which means you never have to drop down to writing complex SQL queries. @@ -132,7 +133,7 @@ With searchable encryption, you can: With searchable encryption: - Data can be encrypted, stored, and searched in your existing PostgreSQL database. -- Encrypted data can be searched using equality, free text search, and range queries. +- Encrypted data can be searched using equality, free text search, range queries, and JSON path/containment queries. - Data remains encrypted, and will be decrypted using the Protect.js library in your application. - Queries are blazing fast, and won't slow down your application experience. - Every decryption event is logged, giving you an audit trail of data access events. diff --git a/docs/getting-started.md b/docs/getting-started.md index 84c154c0..a51f2414 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -254,6 +254,14 @@ CREATE TABLE users ( ); ``` +## Next steps + +Now that you have the basics working, explore these advanced features: + +- **[Searchable Encryption](./reference/searchable-encryption-postgres.md)** - Learn how to search encrypted data using `encryptQuery()` with PostgreSQL and EQL +- **[Model Operations](./reference/model-operations.md)** - Encrypt and decrypt entire objects with bulk operations +- **[Schema Configuration](./reference/schema.md)** - Configure indexes for equality, free text search, range queries, and JSON search + --- ### Didn't find what you wanted? diff --git a/docs/reference/model-operations.md b/docs/reference/model-operations.md index 5a241214..bf62d076 100644 --- a/docs/reference/model-operations.md +++ b/docs/reference/model-operations.md @@ -75,7 +75,7 @@ For better performance when working with multiple models, use these bulk encrypt ### Bulk encryption ```typescript -const users = [ +const usersList = [ { id: "1", email: "user1@example.com", @@ -88,7 +88,7 @@ const users = [ }, ]; -const encryptedResult = await protectClient.bulkEncryptModels(users, users); +const encryptedResult = await protectClient.bulkEncryptModels(usersList, usersSchema); if (encryptedResult.failure) { console.error("Bulk encryption failed:", encryptedResult.failure.message); diff --git a/docs/reference/schema.md b/docs/reference/schema.md index b828bdf4..9977cc39 100644 --- a/docs/reference/schema.md +++ b/docs/reference/schema.md @@ -76,9 +76,29 @@ export const protectedUsers = csTable("users", { }); ``` +### Searchable JSON + +To enable searching within JSON columns, use the `searchableJson()` method. This automatically sets the column data type to `json` and configures the necessary indexes for path and containment queries. + +```ts +import { csTable, csColumn } from "@cipherstash/protect"; + +export const protectedUsers = csTable("users", { + metadata: csColumn("metadata").searchableJson(), +}); +``` + +> [!WARNING] +> `searchableJson()` is mutually exclusive with other index types (`equality()`, `freeTextSearch()`, `orderAndRange()`) on the same column. Combining them will result in runtime errors. This is enforced by the encryption backend, not at the TypeScript type level. + + ### Nested objects -Protect.js supports nested objects in your schema, allowing you to encrypt **but not search on** nested properties. You can define nested objects up to 3 levels deep. +Protect.js supports nested objects in your schema, allowing you to encrypt nested properties. You can define nested objects up to 3 levels deep using `csValue`. For **searchable** JSON data, use `.searchableJson()` on a JSON column instead. + +> [!TIP] +> If you need to search within JSON data, use `.searchableJson()` on the column instead of nested `csValue` definitions. See [Searchable JSON](#searchable-json) above. + This is useful for data stores that have less structured data, like NoSQL databases. You can define nested objects by using the `csValue` function to define a value in a nested object. The value naming convention of the `csValue` function is a dot-separated string of the nested object path, e.g. `profile.name` or `profile.address.street`. @@ -105,15 +125,15 @@ export const protectedUsers = csTable("users", { ``` When working with nested objects: -- Searchable encryption is not supported on nested objects +- Searchable encryption is not supported on nested `csValue` objects (use `.searchableJson()` for searchable JSON) - Each level can have its own encrypted fields - The maximum nesting depth is 3 levels - Null and undefined values are supported at any level - Optional nested objects are supported > [!WARNING] -> TODO: The schema builder does not validate the values you supply to the `csValue` or `csColumn` functions. -> These values are meant to be unique, and and cause unexpected behavior if they are not defined correctly. +> The schema builder does not currently validate the values you supply to the `csValue` or `csColumn` functions. +> These values must be unique within your schema - duplicate values may cause unexpected behavior. ## Available index options @@ -124,8 +144,12 @@ The following index options are available for your schema: | equality | Enables a exact index for equality queries. | `WHERE email = 'example@example.com'` | | freeTextSearch | Enables a match index for free text queries. | `WHERE description LIKE '%example%'` | | orderAndRange | Enables an sorting and range queries index. | `ORDER BY price ASC` | +| searchableJson | Enables searching inside JSON columns. | `WHERE data->'user'->>'email' = '...'` | -You can chain these methods to your column to configure them in any combination. +You can chain `equality()`, `freeTextSearch()`, and `orderAndRange()` methods in any combination. + +> [!WARNING] +> `searchableJson()` is **mutually exclusive** with other index types. Do not combine `searchableJson()` with `equality()`, `freeTextSearch()`, or `orderAndRange()` on the same column. ## Initializing the Protect client diff --git a/docs/reference/searchable-encryption-postgres.md b/docs/reference/searchable-encryption-postgres.md index 74ead6a4..ec369427 100644 --- a/docs/reference/searchable-encryption-postgres.md +++ b/docs/reference/searchable-encryption-postgres.md @@ -7,6 +7,11 @@ This reference guide outlines the different query patterns you can use to search - [Prerequisites](#prerequisites) - [What is EQL?](#what-is-eql) - [Setting up your schema](#setting-up-your-schema) +- [Deprecated Functions](#deprecated-functions) +- [Unified Query Encryption API](#unified-query-encryption-api) +- [JSON Search](#json-search) + - [Creating JSON Search Terms](#creating-json-search-terms) + - [Using JSON Search Terms in PostgreSQL](#using-json-search-terms-in-postgresql) - [Search capabilities](#search-capabilities) - [Exact matching](#exact-matching) - [Free text search](#free-text-search) @@ -15,7 +20,6 @@ This reference guide outlines the different query patterns you can use to search - [Using Raw PostgreSQL Client (pg)](#using-raw-postgresql-client-pg) - [Using Supabase SDK](#using-supabase-sdk) - [Best practices](#best-practices) -- [Common use cases](#common-use-cases) ## Prerequisites @@ -60,49 +64,278 @@ const schema = csTable('users', { }) ``` -## The `createSearchTerms` function +## Deprecated Functions -The `createSearchTerms` function is used to create search terms used in the SQL query. - -The function takes an array of objects, each with the following properties: - -| Property | Description | -|----------|-------------| -| `value` | The value to search for | -| `column` | The column to search in | -| `table` | The table to search in | -| `returnType` | The type of return value to expect from the SQL query. Required for PostgreSQL composite types. | - -**Return types:** +> [!WARNING] +> The `createSearchTerms` function is deprecated and will be removed in v2.0. Use the unified `encryptQuery` function instead. See [Unified Query Encryption API](#unified-query-encryption-api). -- `eql` (default) - EQL encrypted payload -- `composite-literal` - EQL encrypted payload wrapped in a composite literal -- `escaped-composite-literal` - EQL encrypted payload wrapped in an escaped composite literal +### `createSearchTerms` (deprecated) -Example: +The `createSearchTerms` function was the original API for creating search terms. It has been superseded by `encryptQuery`. ```typescript +// DEPRECATED - use encryptQuery instead const term = await protectClient.createSearchTerms([{ value: 'user@example.com', column: schema.email, table: schema, returnType: 'composite-literal' -}, { - value: '18', - column: schema.age, +}]) + +// NEW - use encryptQuery with queryType +const term = await protectClient.encryptQuery([{ + value: 'user@example.com', + column: schema.email, table: schema, + queryType: queryTypes.equality, returnType: 'composite-literal' }]) +``` + +See [Migration from Deprecated Functions](#migration-from-deprecated-functions) for a complete migration guide. + +## Unified Query Encryption API + +The `encryptQuery` function handles both single values and batch operations: + +### Single Value + +```typescript +// Encrypt a single value with explicit query type +const term = await protectClient.encryptQuery('admin@example.com', { + column: usersSchema.email, + table: usersSchema, + queryType: queryTypes.equality, +}) if (term.failure) { // Handle the error } -console.log(term.data) // array of search terms +// Use the encrypted term in your query +console.log(term.data) // encrypted search term +``` + +### Batch Operations + +```typescript +// Encrypt multiple terms in one call +const terms = await protectClient.encryptQuery([ + // Scalar term with explicit query type + { value: 'admin@example.com', column: users.email, table: users, queryType: queryTypes.equality }, + + // JSON path query (searchableJson implicit) + { path: 'user.email', value: 'test@example.com', column: jsonSchema.metadata, table: jsonSchema }, + + // JSON containment query (searchableJson implicit) + { contains: { role: 'admin' }, column: jsonSchema.metadata, table: jsonSchema }, +]) + +if (terms.failure) { + // Handle the error +} + +// Access encrypted terms +console.log(terms.data) // array of encrypted terms ``` +### Migration from Deprecated Functions + +| Old API | New API | +|---------|---------| +| `createSearchTerms([{ value, column, table }])` | `encryptQuery([{ value, column, table, queryType }])` with `ScalarQueryTerm` | +| `createSearchTerms([{ path, value, column, table }])` | `encryptQuery([{ path, value, column, table }])` with `JsonPathQueryTerm` | +| `createSearchTerms([{ containmentType: 'contains', value, ... }])` | `encryptQuery([{ contains: {...}, column, table }])` with `JsonContainsQueryTerm` | +| `createSearchTerms([{ containmentType: 'contained_by', value, ... }])` | `encryptQuery([{ containedBy: {...}, column, table }])` with `JsonContainedByQueryTerm` | + > [!NOTE] -> As a developer, you must track the index of the search term in the array when using the `createSearchTerms` function. +> The `createSearchTerms` function is deprecated. Use `encryptQuery` for all query encryption needs. + +### Query Term Types + +The `encryptQuery` function accepts different query term types. These types are exported from `@cipherstash/protect`: + +```typescript +import { + // Query term types + type QueryTerm, + type ScalarQueryTerm, + type JsonPathQueryTerm, + type JsonContainsQueryTerm, + type JsonContainedByQueryTerm, + // Type guards for runtime type checking + isScalarQueryTerm, + isJsonPathQueryTerm, + isJsonContainsQueryTerm, + isJsonContainedByQueryTerm, +} from '@cipherstash/protect' +``` + +**Type definitions:** + +| Type | Properties | Use Case | +|------|------------|----------| +| `ScalarQueryTerm` | `value`, `column`, `table`, `queryType?`, `queryOp?` | Scalar value queries (equality, freeTextSearch, orderAndRange) | +| `JsonPathQueryTerm` | `path`, `value?`, `column`, `table` | JSON path access queries | +| `JsonContainsQueryTerm` | `contains`, `column`, `table` | JSON containment (`@>`) queries | +| `JsonContainedByQueryTerm` | `containedBy`, `column`, `table` | JSON contained-by (`<@`) queries | + +**Type guards:** + +Type guards are useful when working with mixed query results: + +```typescript +const terms = await protectClient.encryptQuery([ + { value: 'user@example.com', column: schema.email, table: schema, queryType: queryTypes.equality }, + { contains: { role: 'admin' }, column: schema.metadata, table: schema }, +]) + +if (terms.failure) { + // Handle error +} + +for (const term of terms.data) { + if (isScalarQueryTerm(term)) { + // Handle scalar term + } else if (isJsonContainsQueryTerm(term)) { + // Handle containment term - access term.sv + } +} +``` + +## JSON Search + +For querying encrypted JSON columns configured with `.searchableJson()`, use the `encryptQuery` function with JSON-specific term types. + +### Creating JSON Search Terms + +#### Path Queries + +Used for finding records where a specific path in the JSON equals a value. + +| Property | Description | +|----------|-------------| +| `path` | The path to the field (e.g., `'user.email'` or `['user', 'email']`) | +| `value` | The value to match at that path | +| `column` | The column definition from the schema | +| `table` | The table definition | + +```typescript +// Path query - SQL equivalent: WHERE metadata->'user'->>'email' = 'alice@example.com' +const pathTerms = await protectClient.encryptQuery([{ + path: 'user.email', + value: 'alice@example.com', + column: schema.metadata, + table: schema +}]) + +if (pathTerms.failure) { + // Handle the error +} +``` + +#### Containment Queries + +Used for finding records where the JSON column contains a specific JSON structure (subset). + +**Contains Query (`@>` operator)** - Find records where JSON contains the specified structure: + +| Property | Description | +|----------|-------------| +| `contains` | The JSON object/array structure to search for | +| `column` | The column definition from the schema | +| `table` | The table definition | + +```typescript +// Containment query - SQL equivalent: WHERE metadata @> '{"roles": ["admin"]}' +const containmentTerms = await protectClient.encryptQuery([{ + contains: { roles: ['admin'] }, + column: schema.metadata, + table: schema +}]) + +if (containmentTerms.failure) { + // Handle the error +} +``` + +**Contained-By Query (`<@` operator)** - Find records where JSON is contained by the specified structure: + +| Property | Description | +|----------|-------------| +| `containedBy` | The JSON superset to check against | +| `column` | The column definition from the schema | +| `table` | The table definition | + +```typescript +// Contained-by query - SQL equivalent: WHERE metadata <@ '{"permissions": ["read", "write", "admin"]}' +const containedByTerms = await protectClient.encryptQuery([{ + containedBy: { permissions: ['read', 'write', 'admin'] }, + column: schema.metadata, + table: schema +}]) + +if (containedByTerms.failure) { + // Handle the error +} +``` + +### Using JSON Search Terms in PostgreSQL + +When searching encrypted JSON columns, you use the `searchableJson` query type which supports both path access and containment operators. + +#### Path Search (Access Operator) + +Equivalent to `data->'path'->>'field' = 'value'`. + +```typescript +const terms = await protectClient.encryptQuery([{ + path: '$.user.email', // JSON path syntax + value: 'alice@example.com', + column: schema.metadata, + table: schema +}]) + +if (terms.failure) { + // Handle the error +} + +// The generated term contains a selector and the encrypted term +const term = terms.data[0] + +// EQL function equivalent to: metadata->'user'->>'email' = 'alice@example.com' +const query = ` + SELECT * FROM users + WHERE eql_ste_vec_u64_8_128_access(metadata, $1) = $2 +` +// Bind parameters: [term.s, term.c] +``` + +#### Containment Search + +Equivalent to `data @> '{"key": "value"}'`. + +```typescript +const terms = await protectClient.encryptQuery([{ + contains: { tags: ['premium'] }, + column: schema.metadata, + table: schema +}]) + +if (terms.failure) { + // Handle the error +} + +// Containment terms return a vector of terms to match +const termVector = terms.data[0].sv + +// EQL function equivalent to: metadata @> '{"tags": ["premium"]}' +const query = ` + SELECT * FROM users + WHERE eql_ste_vec_u64_8_128_contains(metadata, $1) +` +// Bind parameter: [JSON.stringify(termVector)] +``` ## Search capabilities @@ -112,10 +345,11 @@ Use `.equality()` when you need to find exact matches: ```typescript // Find user with specific email -const term = await protectClient.createSearchTerms([{ +const term = await protectClient.encryptQuery([{ value: 'user@example.com', column: schema.email, table: schema, + queryType: queryTypes.equality, returnType: 'composite-literal' // Required for PostgreSQL composite types }]) @@ -136,10 +370,11 @@ Use `.freeTextSearch()` for text-based searches: ```typescript // Search for users with emails containing "example" -const term = await protectClient.createSearchTerms([{ +const term = await protectClient.encryptQuery([{ value: 'example', column: schema.email, table: schema, + queryType: queryTypes.freeTextSearch, // Use 'freeTextSearch' for text search queries returnType: 'composite-literal' }]) @@ -206,10 +441,11 @@ await client.query( ) // Search encrypted data -const searchTerm = await protectClient.createSearchTerms([{ +const searchTerm = await protectClient.encryptQuery([{ value: 'example.com', column: schema.email, table: schema, + queryType: queryTypes.freeTextSearch, // Use 'freeTextSearch' for text search returnType: 'composite-literal' }]) @@ -259,7 +495,8 @@ For Supabase users, we provide a specific implementation guide. [Read more about ## Performance optimization -TODO: make docs for creating Postgres Indexes on columns that require searches. At the moment EQL v2 doesn't support creating indexes while also using the out-of-the-box operator and operator families. The solution is to create an index using the EQL functions and then using the EQL functions directly in your SQL statments, which isn't the best experience. +> [!NOTE] +> Documentation for creating PostgreSQL indexes on encrypted columns is coming soon. Currently, EQL v2 requires using EQL functions directly in SQL statements when creating indexes. ### Didn't find what you wanted? diff --git a/docs/reference/supabase-sdk.md b/docs/reference/supabase-sdk.md index 594c3122..330370be 100644 --- a/docs/reference/supabase-sdk.md +++ b/docs/reference/supabase-sdk.md @@ -174,7 +174,7 @@ ALTER DEFAULT PRIVILEGES FOR ROLE postgres IN SCHEMA eql_v2 GRANT ALL ON SEQUENC When searching encrypted data, you need to convert the encrypted payload into a format that PostgreSQL and the Supabase SDK can understand. The encrypted payload needs to be converted to a raw composite type format by double stringifying the JSON: ```typescript -const searchTerms = await protectClient.createSearchTerms([ +const searchTerms = await protectClient.encryptQuery([ { value: 'billy@example.com', column: users.email, @@ -189,7 +189,7 @@ const searchTerm = searchTerms.data[0] For certain queries, when including the encrypted search term with an operator that uses the string logic syntax, you need to use the 'escaped-composite-literal' return type: ```typescript -const searchTerms = await protectClient.createSearchTerms([ +const searchTerms = await protectClient.encryptQuery([ { value: 'billy@example.com', column: users.email, @@ -208,7 +208,7 @@ Here are examples of different ways to search encrypted data using the Supabase ### Equality Search ```typescript -const searchTerms = await protectClient.createSearchTerms([ +const searchTerms = await protectClient.encryptQuery([ { value: 'billy@example.com', column: users.email, @@ -226,7 +226,7 @@ const { data, error } = await supabase ### Pattern Matching Search ```typescript -const searchTerms = await protectClient.createSearchTerms([ +const searchTerms = await protectClient.encryptQuery([ { value: 'example.com', column: users.email, @@ -247,7 +247,7 @@ When you need to search for multiple encrypted values, you can use the IN operat ```typescript // Encrypt multiple search terms -const searchTerms = await protectClient.createSearchTerms([ +const searchTerms = await protectClient.encryptQuery([ { value: 'value1', column: users.name, @@ -275,7 +275,7 @@ You can combine multiple encrypted search conditions using the `.or()` syntax. T ```typescript // Encrypt search terms for different columns -const searchTerms = await protectClient.createSearchTerms([ +const searchTerms = await protectClient.encryptQuery([ { value: 'user@example.com', column: users.email, diff --git a/examples/basic/index.ts b/examples/basic/index.ts index 9e9e8fce..1926eb73 100644 --- a/examples/basic/index.ts +++ b/examples/basic/index.ts @@ -1,6 +1,7 @@ import 'dotenv/config' import readline from 'node:readline' import { protectClient, users } from './protect' +import { queryTypes } from '@cipherstash/protect' const rl = readline.createInterface({ input: process.stdin, @@ -69,6 +70,54 @@ async function main() { console.log('Bulk encrypted data:', bulkEncryptResult.data) + const queryData = await protectClient.encryptQuery('test', { + column: users.name, + table: users, + queryType: queryTypes.equality, + }) + + if (queryData.failure) { + throw new Error(`[protect]: ${queryData.failure.message}`) + } + + console.log('Query data:', queryData.data) + + const queryData1 = await protectClient.encryptQuery('test', { + column: users.name, + table: users, + queryType: queryTypes.freeTextSearch, + }) + + if (queryData1.failure) { + throw new Error(`[protect]: ${queryData1.failure.message}`) + } + + console.log('Query data:', queryData1.data) + + const queryData2 = await protectClient.encryptQuery('test', { + column: users.name, + table: users, + queryType: queryTypes.orderAndRange, + }) + + if (queryData2.failure) { + throw new Error(`[protect]: ${queryData2.failure.message}`) + } + + console.log('Query data:', queryData2.data) + + // const queryData3 = await protectClient.encryptQuery('test', { + // path: '$.name', + // column: users.data, + // table: users, + // }) + + // if (queryData3.failure) { + // throw new Error(`[protect]: ${queryData3.failure.message}`) + // } + + // console.log('Query data:', queryData3.data) + rl.close() } diff --git a/examples/basic/protect.ts b/examples/basic/protect.ts index 0feb8f63..b54e3335 100644 --- a/examples/basic/protect.ts +++ b/examples/basic/protect.ts @@ -7,7 +7,8 @@ import { } from '@cipherstash/protect' export const users = csTable('users', { - name: csColumn('name'), + name: csColumn('name').equality().orderAndRange().freeTextSearch(), + data: csColumn('data').dataType('json').searchableJson(), }) const config: ProtectClientConfig = { diff --git a/packages/drizzle/README.md b/packages/drizzle/README.md index 2f4e82ff..f673316e 100644 --- a/packages/drizzle/README.md +++ b/packages/drizzle/README.md @@ -248,7 +248,7 @@ const results = await db ``` > [!TIP] -> **Performance Tip**: Using `protectOps.and()` batches all encryption operations into a single `createSearchTerms` call, which is more efficient than awaiting each operator individually. +> **Performance Tip**: Using `protectOps.and()` batches all encryption operations into a single `encryptQuery` call, which is more efficient than awaiting each operator individually. ## Available Operators diff --git a/packages/drizzle/__tests__/json-operators.test.ts b/packages/drizzle/__tests__/json-operators.test.ts new file mode 100644 index 00000000..90002f03 --- /dev/null +++ b/packages/drizzle/__tests__/json-operators.test.ts @@ -0,0 +1,633 @@ +import { describe, expect, it } from 'vitest' +import { pgTable } from 'drizzle-orm/pg-core' +import { encryptedType, getEncryptedColumnConfig, extractProtectSchema } from '../src/pg' +import { normalizePath, JsonPathBuilder, isLazyJsonOperator, type LazyJsonOperator } from '../src/pg/json-operators' + +describe('searchableJson column config', () => { + it('should store searchableJson config on encrypted column', () => { + const testTable = pgTable('test', { + metadata: encryptedType<{ user: { email: string } }>('metadata', { + dataType: 'json', + searchableJson: true, + }), + }) + + const config = getEncryptedColumnConfig('metadata', testTable.metadata) + expect(config).toBeDefined() + expect(config?.searchableJson).toBe(true) + expect(config?.dataType).toBe('json') + }) + + it('should default searchableJson to undefined when not specified', () => { + const testTable = pgTable('test', { + profile: encryptedType<{ name: string }>('profile', { + dataType: 'json', + }), + }) + + const config = getEncryptedColumnConfig('profile', testTable.profile) + expect(config).toBeDefined() + expect(config?.searchableJson).toBeUndefined() + }) +}) + +describe('schema extraction with searchableJson', () => { + it('should extract searchableJson config to ProtectColumn', () => { + const testTable = pgTable('test_json', { + metadata: encryptedType<{ user: { email: string } }>('metadata', { + dataType: 'json', + searchableJson: true, + }), + }) + + const protectSchema = extractProtectSchema(testTable) + const builtSchema = protectSchema.build() + + // The column should have ste_vec index configured + expect(builtSchema.columns.metadata).toBeDefined() + const columnConfig = builtSchema.columns.metadata + expect(columnConfig.indexes.ste_vec).toBeDefined() + }) + + it('should not add ste_vec index when searchableJson is not set', () => { + const testTable = pgTable('test_json_no_search', { + profile: encryptedType<{ name: string }>('profile', { + dataType: 'json', + }), + }) + + const protectSchema = extractProtectSchema(testTable) + const builtSchema = protectSchema.build() + + expect(builtSchema.columns.profile).toBeDefined() + const columnConfig = builtSchema.columns.profile + expect(columnConfig.indexes.ste_vec).toBeUndefined() + }) +}) + +describe('normalizePath', () => { + it('should strip $. prefix from JSONPath format', () => { + expect(normalizePath('$.user.email')).toBe('user.email') + }) + + it('should handle root path $', () => { + expect(normalizePath('$')).toBe('') + }) + + it('should pass through dot notation unchanged', () => { + expect(normalizePath('user.email')).toBe('user.email') + }) + + it('should handle array index notation', () => { + expect(normalizePath('$.items[0].name')).toBe('items[0].name') + }) + + it('should handle empty string', () => { + expect(normalizePath('')).toBe('') + }) +}) + +describe('JsonPathBuilder', () => { + const testTable = pgTable('test_builder', { + metadata: encryptedType<{ user: { email: string } }>('metadata', { + dataType: 'json', + searchableJson: true, + }), + }) + + it('should be instantiable with column and path', () => { + const builder = new JsonPathBuilder( + testTable.metadata, + 'user.email', + { columnName: 'metadata', config: { searchableJson: true } } as any, + {} as any, // protectClient mock + ) + + expect(builder).toBeDefined() + expect(builder.getPath()).toBe('user.email') + }) +}) + +describe('LazyJsonOperator', () => { + it('should identify lazy JSON operators with value encryption', () => { + const lazyOp: LazyJsonOperator = { + __isLazyOperator: true, + __isJsonOperator: true, + operator: 'json_eq', + path: 'user.email', + value: 'test@example.com', + encryptionType: 'value', + columnInfo: {} as any, + execute: () => ({} as any), + } + + expect(isLazyJsonOperator(lazyOp)).toBe(true) + }) + + it('should identify lazy JSON operators with selector encryption', () => { + const lazyOp: LazyJsonOperator = { + __isLazyOperator: true, + __isJsonOperator: true, + operator: 'json_array_length_gt', + path: 'items', + comparisonValue: 5, + encryptionType: 'selector', + columnInfo: {} as any, + execute: () => ({} as any), + } + + expect(isLazyJsonOperator(lazyOp)).toBe(true) + }) + + it('should identify lazy JSON operators with no encryption', () => { + const lazyOp: LazyJsonOperator = { + __isLazyOperator: true, + __isJsonOperator: true, + operator: 'json_array_length_gt', + path: '', // root path + comparisonValue: 5, + encryptionType: 'none', + columnInfo: {} as any, + execute: () => ({} as any), + } + + expect(isLazyJsonOperator(lazyOp)).toBe(true) + }) + + it('should return false for regular lazy operators', () => { + // Note: This tests that isLazyJsonOperator correctly distinguishes JSON operators + // from regular lazy operators. The `needsEncryption` field is used by regular + // lazy operators (in operators.ts), NOT by JSON operators. + // JSON operators use `encryptionType: 'value' | 'selector' | 'none'` instead. + const regularLazyOp = { + __isLazyOperator: true, + operator: 'eq', + left: {}, + right: 'value', + needsEncryption: true, // Regular lazy operator field - NOT used for JSON operators + columnInfo: {}, + execute: () => ({}), + } + + expect(isLazyJsonOperator(regularLazyOp)).toBe(false) + }) + + it('should return false for non-objects', () => { + expect(isLazyJsonOperator(null)).toBe(false) + expect(isLazyJsonOperator(undefined)).toBe(false) + expect(isLazyJsonOperator('string')).toBe(false) + }) +}) + +describe('JsonPathBuilder value methods', () => { + const rootBuilder = new JsonPathBuilder( + { name: 'metadata' } as any, + '', // root path + { columnName: 'metadata', config: { searchableJson: true } } as any, + {} as any, + ) + + const nestedBuilder = new JsonPathBuilder( + { name: 'metadata' } as any, + 'items', // nested path + { columnName: 'metadata', config: { searchableJson: true } } as any, + {} as any, + ) + + it('get() should return Promise resolving to SQL expression', async () => { + // Note: Full test requires mock protectClient for selector encryption + // This tests the root path case which doesn't need encryption + const sqlExpr = await rootBuilder.get() + expect(sqlExpr).toBeDefined() + expect(typeof sqlExpr.getSQL).toBe('function') + }) + + it('getSync() on root should return SQL expression', () => { + const sqlExpr = rootBuilder.getSync() + expect(sqlExpr).toBeDefined() + expect(typeof sqlExpr.getSQL).toBe('function') + }) + + it('getSync() on non-root without selector should throw', () => { + expect(() => nestedBuilder.getSync()).toThrow(/selector/) + }) + + it('getSync() on non-root with selector should return SQL expression', () => { + const selector = 'pre_encrypted_selector_hash' + const sqlExpr = nestedBuilder.getSync(selector) + expect(sqlExpr).toBeDefined() + expect(typeof sqlExpr.getSQL).toBe('function') + }) + + it('arrayLength() should return a new JsonPathBuilder in array-length mode', () => { + const lengthBuilder = nestedBuilder.arrayLength() + expect(lengthBuilder).toBeInstanceOf(JsonPathBuilder) + }) + + it('arrayLength().gt() on non-root path should use selector encryption', () => { + const lazyOp = nestedBuilder.arrayLength().gt(5) + expect(lazyOp.operator).toBe('json_array_length_gt') + expect(lazyOp.comparisonValue).toBe(5) + expect(lazyOp.encryptionType).toBe('selector') // Non-root needs selector + }) + + it('arrayLength().gt() on root path should use no encryption', () => { + const lazyOp = rootBuilder.arrayLength().gt(5) + expect(lazyOp.operator).toBe('json_array_length_gt') + expect(lazyOp.comparisonValue).toBe(5) + expect(lazyOp.encryptionType).toBe('none') // Root needs no encryption + }) +}) + +describe('JsonPathBuilder.eq()', () => { + it('should return a lazy JSON operator with value encryption', () => { + const builder = new JsonPathBuilder( + {} as any, // column mock + 'user.email', + { columnName: 'metadata', config: { searchableJson: true } } as any, + {} as any, // protectClient mock + ) + + const lazyOp = builder.eq('test@example.com') + + expect(isLazyJsonOperator(lazyOp)).toBe(true) + expect(lazyOp.operator).toBe('json_eq') + expect(lazyOp.path).toBe('user.email') + expect(lazyOp.value).toBe('test@example.com') + expect(lazyOp.encryptionType).toBe('value') + }) +}) + +describe('JsonPathBuilder array methods', () => { + const rootBuilder = new JsonPathBuilder( + { name: 'tags' } as any, + '', // root path - column IS the array + { columnName: 'tags', config: { searchableJson: true } } as any, + {} as any, + ) + + const nestedBuilder = new JsonPathBuilder( + { name: 'metadata' } as any, + 'items', // nested path + { columnName: 'metadata', config: { searchableJson: true } } as any, + {} as any, + ) + + it('elements() on root should return Promise resolving to SQL', async () => { + const sqlExpr = await rootBuilder.elements() + expect(sqlExpr).toBeDefined() + expect(typeof sqlExpr.getSQL).toBe('function') + }) + + it('elementsSync() on root should return SQL directly', () => { + const sqlExpr = rootBuilder.elementsSync() + expect(sqlExpr).toBeDefined() + expect(typeof sqlExpr.getSQL).toBe('function') + }) + + it('elementsSync() on nested path without selector should throw', () => { + expect(() => nestedBuilder.elementsSync()).toThrow(/selector/) + }) + + it('elementsSync() on nested path with selector should return SQL', () => { + const selector = 'pre_encrypted_selector_hash' + const sqlExpr = nestedBuilder.elementsSync(selector) + expect(sqlExpr).toBeDefined() + expect(typeof sqlExpr.getSQL).toBe('function') + }) + + it('elementsText() on root should return Promise resolving to SQL', async () => { + const sqlExpr = await rootBuilder.elementsText() + expect(sqlExpr).toBeDefined() + expect(typeof sqlExpr.getSQL).toBe('function') + }) + + it('elementsTextSync() on root should return SQL directly', () => { + const sqlExpr = rootBuilder.elementsTextSync() + expect(sqlExpr).toBeDefined() + expect(typeof sqlExpr.getSQL).toBe('function') + }) + + it('elementsTextSync() on nested path without selector should throw', () => { + expect(() => nestedBuilder.elementsTextSync()).toThrow(/selector/) + }) +}) + +describe('JsonPathBuilder comparison methods', () => { + const builder = new JsonPathBuilder( + {} as any, + 'user.role', + { columnName: 'metadata', config: { searchableJson: true } } as any, + {} as any, + ) + + it('ne() should return json_ne operator', () => { + const lazyOp = builder.ne('admin') + expect(lazyOp.operator).toBe('json_ne') + }) + + it('contains() should return json_contains operator', () => { + const lazyOp = builder.contains({ role: 'admin' }) + expect(lazyOp.operator).toBe('json_contains') + }) + + it('containedBy() should return json_contained_by operator', () => { + const lazyOp = builder.containedBy({ permissions: ['read', 'write'] }) + expect(lazyOp.operator).toBe('json_contained_by') + }) +}) + +describe('JsonPathBuilder path query methods', () => { + const builder = new JsonPathBuilder( + { name: 'metadata' } as any, + 'items', // dot-notation path + { columnName: 'metadata', config: { searchableJson: true } } as any, + {} as any, + ) + + const rootBuilder = new JsonPathBuilder( + { name: 'metadata' } as any, + '', // root path + { columnName: 'metadata', config: { searchableJson: true } } as any, + {} as any, + ) + + it('pathExtract() should return SQL with encrypted selector for non-root', async () => { + // pathExtract() encrypts the current path to get a selector + // Then uses eql_v2.jsonb_path_query(column, selector) + const sqlExpr = await builder.pathExtract() + expect(sqlExpr).toBeDefined() + expect(typeof sqlExpr.getSQL).toBe('function') + }) + + it('pathExtractFirst() should return SQL with encrypted selector for non-root', async () => { + const sqlExpr = await builder.pathExtractFirst() + expect(sqlExpr).toBeDefined() + expect(typeof sqlExpr.getSQL).toBe('function') + }) + + it('pathExtract() on root should throw (SRF not applicable to root)', async () => { + await expect(rootBuilder.pathExtract()).rejects.toThrow(/root path/) + }) + + it('pathExtractFirst() on root should return column directly', async () => { + const sqlExpr = await rootBuilder.pathExtractFirst() + expect(sqlExpr).toBeDefined() + }) + + it('pathExtractWithSelector() should accept pre-encrypted selector', () => { + // For advanced users who already have an encrypted selector + const selector = 'pre_encrypted_selector_hash' + const sqlExpr = builder.pathExtractWithSelector(selector) + expect(sqlExpr).toBeDefined() + expect(typeof sqlExpr.getSQL).toBe('function') + }) +}) + +describe('createProtectOperators.jsonPath()', () => { + it('should return JsonPathBuilder for searchableJson column', async () => { + const testTable = pgTable('json_test', { + metadata: encryptedType<{ user: { email: string } }>('metadata', { + dataType: 'json', + searchableJson: true, + }), + }) + + const { createProtectOperators } = await import('../src/pg/operators.js') + const protectClientMock = {} as any + + const ops = createProtectOperators(protectClientMock) + + const builder = ops.jsonPath(testTable.metadata, '$.user.email') + expect(builder).toBeInstanceOf(JsonPathBuilder) + expect(builder.getPath()).toBe('user.email') + }) + + it('should throw error for column without searchableJson', async () => { + const testTable = pgTable('json_test_no_search', { + profile: encryptedType<{ name: string }>('profile', { + dataType: 'json', + }), + }) + + const { createProtectOperators } = await import('../src/pg/operators.js') + const protectClientMock = {} as any + + const ops = createProtectOperators(protectClientMock) + + expect(() => ops.jsonPath(testTable.profile, '$.name')).toThrow( + /searchableJson.*required/i + ) + }) + + it('should throw error for searchableJson without dataType json', async () => { + const testTable = pgTable('json_test_wrong_type', { + // Invalid config: searchableJson requires dataType: 'json' + data: encryptedType('data', { + searchableJson: true, + // Missing dataType: 'json' + }), + }) + + const { createProtectOperators } = await import('../src/pg/operators.js') + const protectClientMock = {} as any + + const ops = createProtectOperators(protectClientMock) + + expect(() => ops.jsonPath(testTable.data, '$.path')).toThrow( + /searchableJson.*dataType.*json/i + ) + }) +}) + +describe('or() with JSON operators', () => { + it('should accept JSON operators in condition list', async () => { + const testTable = pgTable('or_test', { + metadata: encryptedType<{ user: { email: string; role: string } }>('metadata', { + dataType: 'json', + searchableJson: true, + }), + name: encryptedType('name', { + equality: true, + }), + }) + + const schema = extractProtectSchema(testTable) + const { createProtectOperators } = await import('../src/pg/operators.js') + + // Mock protectClient with encryptQuery that returns mock encrypted values + const protectClient = { + schemas: [schema], + encryptQuery: async () => ({ + data: [{ s: 'mock_selector', v: 'mock_encrypted_value' }], + failure: null, + }), + createSearchTerms: async () => ({ + data: ['mock_search_term'], + failure: null, + }), + } as any + const ops = createProtectOperators(protectClient) + + // Mix of regular and JSON operators - verifies both are properly handled + const result = await ops.or( + ops.eq(testTable.name, 'John'), // Regular operator + ops.jsonPath(testTable.metadata, '$.user.role').eq('admin'), // JSON operator + ) + + expect(result).toBeDefined() + expect(typeof result.getSQL).toBe('function') + }) +}) + +describe('and() with JSON operators', () => { + it('should accept JSON operators in condition list', async () => { + const testTable = pgTable('and_test', { + metadata: encryptedType<{ user: { email: string; role: string } }>('metadata', { + dataType: 'json', + searchableJson: true, + }), + name: encryptedType('name', { + equality: true, + }), + }) + + const schema = extractProtectSchema(testTable) + const { createProtectOperators } = await import('../src/pg/operators.js') + + // Mock protectClient with encryptQuery that returns mock encrypted values + const protectClient = { + schemas: [schema], + encryptQuery: async () => ({ + data: [{ s: 'mock_selector', v: 'mock_encrypted_value' }], + failure: null, + }), + createSearchTerms: async () => ({ + data: ['mock_search_term'], + failure: null, + }), + } as any + const ops = createProtectOperators(protectClient) + + // Mix of regular and JSON operators - verifies both are properly handled + const result = await ops.and( + ops.eq(testTable.name, 'John'), // Regular operator + ops.jsonPath(testTable.metadata, '$.user.role').eq('admin'), // JSON operator + ) + + expect(result).toBeDefined() + expect(typeof result.getSQL).toBe('function') + }) +}) + +describe('package exports', () => { + it('should export JsonPathBuilder class', () => { + expect(JsonPathBuilder).toBeDefined() + expect(typeof JsonPathBuilder).toBe('function') + }) + + it('should export isLazyJsonOperator type guard', () => { + expect(isLazyJsonOperator).toBeDefined() + expect(typeof isLazyJsonOperator).toBe('function') + }) + + it('should export normalizePath helper', () => { + expect(normalizePath).toBeDefined() + expect(normalizePath('$.user.email')).toBe('user.email') + }) +}) + +describe('LazyJsonOperator.execute()', () => { + it('json_eq should produce correct SQL with encrypted value', async () => { + const { createJsonOperatorExecute } = await import('../src/pg/json-operators.js') + const lazyOp: LazyJsonOperator = { + __isLazyOperator: true, + __isJsonOperator: true, + operator: 'json_eq', + path: 'user.email', + value: 'test@example.com', + encryptionType: 'value', + columnInfo: { columnName: 'metadata' } as any, + execute: createJsonOperatorExecute('json_eq', { name: 'metadata' } as any, 'user.email'), + } + + // Mock encrypted value (in practice this would be from protectClient.encryptQuery) + const encryptedValue = { s: 'selector_hash', v: 'encrypted_value' } + const sqlResult = lazyOp.execute(encryptedValue) + + // Check that the SQL is generated correctly + const sqlString = JSON.stringify(sqlResult) + expect(sqlString).toContain('jsonb_path_match') + expect(typeof sqlResult).toBe('object') + expect('getSQL' in sqlResult).toBe(true) + }) + + it('json_contains should produce correct SQL', async () => { + const { createJsonOperatorExecute } = await import('../src/pg/json-operators.js') + const lazyOp: LazyJsonOperator = { + __isLazyOperator: true, + __isJsonOperator: true, + operator: 'json_contains', + path: '', + value: { role: 'admin' }, + encryptionType: 'value', + columnInfo: { columnName: 'metadata' } as any, + execute: createJsonOperatorExecute('json_contains', { name: 'metadata' } as any, ''), + } + + const encryptedValue = { o: { cs_ste_vec_index: 'encrypted_json' } } + const sqlResult = lazyOp.execute(encryptedValue) + + // Check that the SQL is generated correctly + const sqlString = JSON.stringify(sqlResult) + expect(sqlString).toContain('jsonb_contains') + expect(typeof sqlResult).toBe('object') + expect('getSQL' in sqlResult).toBe(true) + }) + + it('json_array_length_gt on root should produce correct SQL without encryption', async () => { + const rootBuilder = new JsonPathBuilder( + { name: 'tags' } as any, + '', // root path + { columnName: 'tags', config: { searchableJson: true } } as any, + {} as any, + ) + + const lazyOp = rootBuilder.arrayLength().gt(5) + const sqlResult = await lazyOp // Await to execute + + // Verify the result contains expected SQL elements + const sqlString = JSON.stringify(sqlResult) + expect(sqlString).toContain('jsonb_array_length') + expect(sqlString).toContain('eql_v2') + expect(sqlString).toContain('tags') // column name + expect(typeof sqlResult).toBe('object') + expect('getSQL' in sqlResult).toBe(true) + }) + + it('json_array_length_gt on nested path should use encrypted selector', async () => { + const nestedBuilder = new JsonPathBuilder( + { name: 'metadata' } as any, + 'items', // nested path + { columnName: 'metadata', config: { searchableJson: true } } as any, + { + encryptQuery: async () => ({ + failure: null, + data: [{ s: 'encrypted_selector_hash' }], + }), + } as any, + ) + + const lazyOp = nestedBuilder.arrayLength().gt(5) + const sqlResult = await lazyOp // Await to execute + + // Verify the result contains expected SQL elements + const sqlString = JSON.stringify(sqlResult) + expect(sqlString).toContain('jsonb_array_length') + expect(sqlString).toContain('jsonb_path_query_first') // For nested path extraction + expect(sqlString).toContain('eql_v2') + expect(sqlString).toContain('metadata') // column name + expect(typeof sqlResult).toBe('object') + expect('getSQL' in sqlResult).toBe(true) + }) +}) diff --git a/packages/drizzle/src/pg/index.ts b/packages/drizzle/src/pg/index.ts index f8f8bce2..b7dfb097 100644 --- a/packages/drizzle/src/pg/index.ts +++ b/packages/drizzle/src/pg/index.ts @@ -23,6 +23,11 @@ export type EncryptedColumnConfig = { * Enable order and range index for sorting and range queries. */ orderAndRange?: boolean + /** + * Enable searchable JSON index for JSON path and containment queries. + * Requires dataType to be 'json'. + */ + searchableJson?: boolean } /** @@ -188,3 +193,13 @@ export { extractProtectSchema } from './schema-extraction.js' // Re-export operators export { createProtectOperators } from './operators.js' + +// Re-export JSON operator utilities +export { + JsonPathBuilder, + isLazyJsonOperator, + normalizePath, + type LazyJsonOperator, + type JsonOperatorType, + type JsonEncryptionType, +} from './json-operators.js' diff --git a/packages/drizzle/src/pg/json-operators.ts b/packages/drizzle/src/pg/json-operators.ts new file mode 100644 index 00000000..e9bcf39c --- /dev/null +++ b/packages/drizzle/src/pg/json-operators.ts @@ -0,0 +1,779 @@ +import { sql, type SQLWrapper, type SQL, bindIfParam } from 'drizzle-orm' +import type { ProtectClient } from '@cipherstash/protect/client' +import type { + JsonPathQueryTerm, + JsonContainsQueryTerm, + JsonContainedByQueryTerm, + QueryTerm, +} from '@cipherstash/protect' +import type { ColumnInfo } from './operators.js' + +/** + * Normalizes a JSON path to dot notation format. + * Accepts both JSONPath format ($.user.email) and dot notation (user.email). + * + * @param path - The path in JSONPath or dot notation format + * @returns The normalized path in dot notation format + */ +export function normalizePath(path: string): string { + if (path === '$') { + return '' + } + if (path.startsWith('$.')) { + return path.slice(2) + } + return path +} + +/** + * JSON operator types for lazy evaluation. + * Array-length operators are separate to distinguish their encryption semantics. + */ +export type JsonOperatorType = + | 'json_eq' + | 'json_ne' + | 'json_contains' + | 'json_contained_by' + | 'json_array_length_gt' + | 'json_array_length_gte' + | 'json_array_length_lt' + | 'json_array_length_lte' + +/** + * Encryption type for JSON operators: + * - 'value': Encrypt the comparison value (eq, ne, contains, containedBy) + * - 'selector': Encrypt the path to get selector hash (array-length on non-root) + * - 'none': No encryption needed (array-length on root path) + */ +export type JsonEncryptionType = 'value' | 'selector' | 'none' + +/** + * Lazy JSON operator that defers encryption until awaited or batched. + * Extends the lazy operator pattern to work with JSON path queries. + */ +export interface LazyJsonOperator { + readonly __isLazyOperator: true + readonly __isJsonOperator: true + readonly operator: JsonOperatorType + readonly path: string + readonly columnInfo: ColumnInfo + /** What type of encryption is needed for this operator */ + readonly encryptionType: JsonEncryptionType + /** For value-based operators (eq, contains, etc.) - the value to encrypt */ + readonly value?: unknown + /** For array-length operators - the plain numeric comparison value (NOT encrypted) */ + readonly comparisonValue?: number + /** Execute with encrypted payload (encrypted value OR selector depending on encryptionType) */ + execute(encryptedPayload?: unknown): SQL +} + +/** + * Type guard for lazy JSON operators + */ +export function isLazyJsonOperator(value: unknown): value is LazyJsonOperator { + return ( + typeof value === 'object' && + value !== null && + '__isLazyOperator' in value && + '__isJsonOperator' in value && + (value as LazyJsonOperator).__isLazyOperator === true && + (value as LazyJsonOperator).__isJsonOperator === true + ) +} + +/** + * Creates the execute function for JSON operators. + * Exported for testing and manual operator construction. + * @internal + */ +export function createJsonOperatorExecute( + operator: JsonOperatorType, + column: SQLWrapper, + path: string, +): (encryptedPayload?: unknown) => SQL { + return (encryptedPayload?: unknown) => { + switch (operator) { + case 'json_eq': + return sql`eql_v2.jsonb_path_match(${column}, ${bindIfParam(encryptedPayload, column)})` + case 'json_ne': + return sql`NOT eql_v2.jsonb_path_match(${column}, ${bindIfParam(encryptedPayload, column)})` + case 'json_contains': + return sql`eql_v2.jsonb_contains(${column}, ${bindIfParam(encryptedPayload, column)})` + case 'json_contained_by': + return sql`eql_v2.jsonb_contained_by(${column}, ${bindIfParam(encryptedPayload, column)})` + case 'json_array_length_gt': + case 'json_array_length_gte': + case 'json_array_length_lt': + case 'json_array_length_lte': + return createArrayLengthSql( + operator, + column, + path, + encryptedPayload as string | undefined, + ) + default: + throw new Error(`Unknown JSON operator: ${operator}`) + } + } +} + +/** + * Helper to create SQL for array-length comparisons. + * @internal + */ +function createArrayLengthSql( + operator: JsonOperatorType, + column: SQLWrapper, + path: string, + encryptedSelector?: string, +): SQL { + const compOp = operator.includes('_gte') + ? '>=' + : operator.includes('_gt') + ? '>' + : operator.includes('_lte') + ? '<=' + : operator.includes('_lt') + ? '<' + : '>' + + if (path === '' || path.trim() === '') { + throw new Error( + 'Array length SQL generation requires comparison value. ' + + 'This function should be called from createArrayLengthOperator context.', + ) + } + + if (!encryptedSelector) { + throw new Error( + `Array length on nested path "${path}" requires encrypted selector. Use encryptionType: 'selector' and pass the encrypted selector to execute().`, + ) + } + + throw new Error( + 'Array length SQL generation requires comparison value. ' + + 'This function should be called from createArrayLengthOperator context.', + ) +} + +/** + * Builder for JSON path operations on encrypted columns. + * Provides chainable methods for comparison and value extraction. + */ +export class JsonPathBuilder { + private column: SQLWrapper + private path: string + private columnInfo: ColumnInfo + private protectClient: ProtectClient + /** When true, comparison methods (gt, gte, lt, lte) create array-length operators */ + private isArrayLengthMode: boolean + + constructor( + column: SQLWrapper, + path: string, + columnInfo: ColumnInfo, + protectClient: ProtectClient, + isArrayLengthMode = false, + ) { + this.column = column + this.path = path + this.columnInfo = columnInfo + this.protectClient = protectClient + this.isArrayLengthMode = isArrayLengthMode + } + + /** + * Get the normalized path for this builder. + * @internal + */ + getPath(): string { + return this.path + } + + /** + * Get the column for this builder. + * @internal + */ + getColumn(): SQLWrapper { + return this.column + } + + /** + * Get the column info for this builder. + * @internal + */ + getColumnInfo(): ColumnInfo { + return this.columnInfo + } + + /** + * Equality comparison at the JSON path. + * Returns a lazy operator for deferred encryption and batching. + * + * @param value - The value to compare against + * @returns A lazy JSON operator that can be awaited or batched + * + * @example + * ```typescript + * await ops.jsonPath(users.metadata, '$.user.email').eq('test@example.com') + * ``` + */ + eq(value: unknown): LazyJsonOperator & Promise { + return this.createLazyJsonOperator('json_eq', value) + } + + /** + * Not equal comparison at the JSON path. + * + * @param value - The value to compare against + * @returns A lazy JSON operator + */ + ne(value: unknown): LazyJsonOperator & Promise { + return this.createLazyJsonOperator('json_ne', value) + } + + /** + * JSON containment check (@> operator). + * Checks if the JSON at this path contains the specified object. + * + * @param obj - The object to check containment for + * @returns A lazy JSON operator + * + * @example + * ```typescript + * await ops.jsonPath(users.metadata, '$').contains({ role: 'admin' }) + * ``` + */ + contains(obj: Record): LazyJsonOperator & Promise { + return this.createLazyJsonOperator('json_contains', obj) + } + + /** + * Reverse JSON containment check (<@ operator). + * Checks if the JSON at this path is contained by the specified object. + * + * @param obj - The object to check containment against + * @returns A lazy JSON operator + */ + containedBy(obj: Record): LazyJsonOperator & Promise { + return this.createLazyJsonOperator('json_contained_by', obj) + } + + /** + * Extract values at the current path using an encrypted selector. + * Encrypts the current path to get a selector, then queries with it. + * + * IMPORTANT: This is a set-returning function (SRF) - it returns multiple rows. + * For root path, use the column directly or pathExtractFirst() instead. + * + * @throws Error if called on root path (use column directly for root) + * @returns Promise resolving to SQL expression for all matching values (SRF) + * + * @example + * ```typescript + * // Extract all items (returns multiple rows) + * const items = await ops.jsonPath(users.metadata, '$.items').pathExtract() + * ``` + */ + async pathExtract(): Promise { + if (this.isRootPath()) { + throw new Error( + 'pathExtract() is not supported for root path. For root, use the column directly in your query, or use pathExtractFirst() which returns a single value.', + ) + } + + // Non-root: encrypt path to get selector, then use jsonb_path_query (SRF) + const selector = await encryptPathSelector( + this.protectClient, + this.path, + this.columnInfo, + ) + return sql`eql_v2.jsonb_path_query(${this.column}, ${selector})` + } + + /** + * Extract the first value at the current path using an encrypted selector. + * + * For root path: returns the column directly (the whole JSON IS the first/only value) + * For nested path: encrypts path to selector and uses eql_v2.jsonb_path_query_first + * + * @returns Promise resolving to SQL expression for the first matching value + */ + async pathExtractFirst(): Promise { + if (this.isRootPath()) { + // Root path: the column itself is the first/only value + return sql`${this.column}` + } + + // Non-root: encrypt path to get selector + const selector = await encryptPathSelector( + this.protectClient, + this.path, + this.columnInfo, + ) + return sql`eql_v2.jsonb_path_query_first(${this.column}, ${selector})` + } + + /** + * Extract values using a pre-encrypted selector. + * For advanced users who already have an encrypted selector hash. + * + * @param selector - Pre-encrypted selector hash + * @returns SQL expression for matching values + */ + pathExtractWithSelector(selector: string): SQL { + return sql`eql_v2.jsonb_path_query(${this.column}, ${selector})` + } + + /** + * Extract first value using a pre-encrypted selector. + * For advanced users who already have an encrypted selector hash. + * + * @param selector - Pre-encrypted selector hash + * @returns SQL expression for the first matching value + */ + pathExtractFirstWithSelector(selector: string): SQL { + return sql`eql_v2.jsonb_path_query_first(${this.column}, ${selector})` + } + + /** + * Extract the value at this JSON path. + * Returns a Promise resolving to SQL expression for use in SELECT clauses. + * + * For root path: returns the column directly + * For nested path: encrypts path to selector and uses eql_v2.jsonb_path_query_first + * + * @returns Promise resolving to SQL expression for the value at the path + * + * @example + * ```typescript + * db.select({ + * email: await ops.jsonPath(users.metadata, '$.user.email').get() + * }).from(users) + * ``` + */ + async get(): Promise { + if (this.isRootPath()) { + // Root path: return column directly + return sql`${this.column}` + } + + // Non-root: encrypt path to get selector, then use jsonb_path_query_first + const selector = await encryptPathSelector( + this.protectClient, + this.path, + this.columnInfo, + ) + return sql`eql_v2.jsonb_path_query_first(${this.column}, ${selector})` + } + + /** + * Sync version of get() for use with pre-encrypted selectors. + * For root path, returns the column directly. + * For non-root paths, use get() (async) instead. + * + * @throws Error if called on non-root path without selector + * @param selector - Optional pre-encrypted selector for non-root paths + * @returns SQL expression for the value at the path + */ + getSync(selector?: string): SQL { + if (this.isRootPath()) { + return sql`${this.column}` + } + + if (!selector) { + throw new Error( + 'getSync() requires a selector for non-root paths. Use get() (async) instead, or provide a pre-encrypted selector.', + ) + } + + return sql`eql_v2.jsonb_path_query_first(${this.column}, ${selector})` + } + + /** + * Expand array elements to rows. + * Returns a Promise resolving to SQL expression using jsonb_array_elements. + * + * For root path: eql_v2.jsonb_array_elements(column) + * For nested path: eql_v2.jsonb_array_elements(eql_v2.jsonb_path_query(column, selector)) + * + * @returns Promise resolving to SQL expression for array expansion + */ + async elements(): Promise { + if (this.isRootPath()) { + return sql`eql_v2.jsonb_array_elements(${this.column})` + } + + const selector = await encryptPathSelector( + this.protectClient, + this.path, + this.columnInfo, + ) + return sql`eql_v2.jsonb_array_elements(eql_v2.jsonb_path_query(${this.column}, ${selector}))` + } + + /** + * Expand array elements to text rows. + */ + async elementsText(): Promise { + if (this.isRootPath()) { + return sql`eql_v2.jsonb_array_elements_text(${this.column})` + } + + const selector = await encryptPathSelector( + this.protectClient, + this.path, + this.columnInfo, + ) + return sql`eql_v2.jsonb_array_elements_text(eql_v2.jsonb_path_query(${this.column}, ${selector}))` + } + + /** + * Sync version of elements() for root paths or with pre-encrypted selector. + */ + elementsSync(selector?: string): SQL { + if (this.isRootPath()) { + return sql`eql_v2.jsonb_array_elements(${this.column})` + } + + if (!selector) { + throw new Error( + 'elementsSync() requires a selector for non-root paths. Use elements() (async) instead, or provide a pre-encrypted selector.', + ) + } + + return sql`eql_v2.jsonb_array_elements(eql_v2.jsonb_path_query(${this.column}, ${selector}))` + } + + /** + * Sync version of elementsText() for root paths or with pre-encrypted selector. + */ + elementsTextSync(selector?: string): SQL { + if (this.isRootPath()) { + return sql`eql_v2.jsonb_array_elements_text(${this.column})` + } + + if (!selector) { + throw new Error( + 'elementsTextSync() requires a selector for non-root paths. Use elementsText() (async) instead, or provide a pre-encrypted selector.', + ) + } + + return sql`eql_v2.jsonb_array_elements_text(eql_v2.jsonb_path_query(${this.column}, ${selector}))` + } + + /** + * Get the length of the array at this JSON path. + * Returns a new JsonPathBuilder in "array-length mode" for comparison chaining. + * + * For root path: eql_v2.jsonb_array_length(column) + * For nested path: eql_v2.jsonb_array_length(eql_v2.jsonb_path_query_first(column, selector)) + * + * @returns A new JsonPathBuilder for array length comparisons + * + * @example + * ```typescript + * // Root array length + * await ops.jsonPath(users.tags, '$').arrayLength().gt(5) + * + * // Nested array length + * await ops.jsonPath(users.metadata, '$.items').arrayLength().gt(5) + * ``` + */ + arrayLength(): JsonPathBuilder { + // Return a new builder in array-length mode + // The original path is preserved (NOT modified with .__length__) + // The mode flag changes how gt/gte/lt/lte behave + return new JsonPathBuilder( + this.column, + this.path, // Keep original path + this.columnInfo, + this.protectClient, + true, // isArrayLengthMode = true + ) + } + + /** + * Greater than comparison. + * Behavior depends on mode: + * - In array-length mode: compares array length against numeric value + * - Otherwise: throws error (use eq() for value comparisons) + */ + gt(value: number): LazyJsonOperator & Promise { + if (!this.isArrayLengthMode) { + throw new Error( + 'gt() is only available after arrayLength(). Use eq() for value comparisons.', + ) + } + return this.createArrayLengthOperator('json_array_length_gt', value) + } + + /** + * Greater than or equal comparison (for arrayLength chaining). + */ + gte(value: number): LazyJsonOperator & Promise { + if (!this.isArrayLengthMode) { + throw new Error( + 'gte() is only available after arrayLength(). Use eq() for value comparisons.', + ) + } + return this.createArrayLengthOperator('json_array_length_gte', value) + } + + /** + * Less than comparison (for arrayLength chaining). + */ + lt(value: number): LazyJsonOperator & Promise { + if (!this.isArrayLengthMode) { + throw new Error( + 'lt() is only available after arrayLength(). Use eq() for value comparisons.', + ) + } + return this.createArrayLengthOperator('json_array_length_lt', value) + } + + /** + * Less than or equal comparison (for arrayLength chaining). + */ + lte(value: number): LazyJsonOperator & Promise { + if (!this.isArrayLengthMode) { + throw new Error( + 'lte() is only available after arrayLength(). Use eq() for value comparisons.', + ) + } + return this.createArrayLengthOperator('json_array_length_lte', value) + } + + /** + * Helper to determine if path is root (empty string or just whitespace) + */ + private isRootPath(): boolean { + return this.path === '' || this.path.trim() === '' + } + + /** + * Creates a lazy JSON operator for array-length comparisons. + * These have different encryption semantics than value-based operators: + * - Root path: no encryption needed + * - Non-root path: path selector needs encryption (NOT the comparison value) + * @internal + */ + private createArrayLengthOperator( + operator: JsonOperatorType, + comparisonValue: number, + ): LazyJsonOperator & Promise { + const column = this.column + const path = this.path + const columnInfo = this.columnInfo + const protectClient = this.protectClient + const isRoot = this.isRootPath() + + const compOp = operator.includes('_gte') + ? '>=' + : operator.includes('_gt') + ? '>' + : operator.includes('_lte') + ? '<=' + : operator.includes('_lt') + ? '<' + : '>' + + const lazyOp: LazyJsonOperator = { + __isLazyOperator: true, + __isJsonOperator: true, + operator, + path, + comparisonValue, + columnInfo, + // Root path needs no encryption, non-root needs selector encryption + encryptionType: isRoot ? 'none' : 'selector', + execute: (encryptedSelector?: string) => { + if (isRoot) { + return sql`eql_v2.jsonb_array_length(${column}) ${sql.raw(compOp)} ${comparisonValue}` + } + + if (!encryptedSelector) { + throw new Error( + `Array length on nested path "${path}" requires encrypted selector`, + ) + } + return sql`eql_v2.jsonb_array_length(eql_v2.jsonb_path_query_first(${column}, ${encryptedSelector})) ${sql.raw(compOp)} ${comparisonValue}` + }, + } + + // Create promise for direct await usage + const promise = new Promise((resolve, reject) => { + queueMicrotask(async () => { + try { + let selector: string | undefined + if (!isRoot) { + // Encrypt the path to get selector hash + selector = await encryptPathSelector( + protectClient, + path, + columnInfo, + ) + } + const result = lazyOp.execute(selector) + resolve(result) + } catch (error) { + reject(error) + } + }) + }) + + return Object.assign(promise, lazyOp) + } + + /** + * Creates a lazy JSON operator for deferred execution. + * @internal + */ + private createLazyJsonOperator( + operator: JsonOperatorType, + value: unknown, + ): LazyJsonOperator & Promise { + const column = this.column + const path = this.path + const columnInfo = this.columnInfo + const protectClient = this.protectClient + + // Create execute function using the factory + const executeFactory = createJsonOperatorExecute(operator, column, path) + + const lazyOp: LazyJsonOperator = { + __isLazyOperator: true, + __isJsonOperator: true, + operator, + path, + value, + encryptionType: 'value', // Value-based operators need the comparison value encrypted + columnInfo, + execute: executeFactory, + } + + // Create promise for direct await usage + // CRITICAL: Must encrypt the value before calling execute() + let executionStarted = false + const promise = new Promise((resolve, reject) => { + // Use a getter trap via Object.defineProperty to defer execution + // This avoids queuing the microtask until the promise is actually consumed + const startExecution = () => { + if (executionStarted) return + executionStarted = true + queueMicrotask(async () => { + try { + // Build QueryTerm and encrypt using same logic as and()/or() batching + const encrypted = await encryptSingleJsonOperator( + protectClient, + lazyOp, + ) + const result = lazyOp.execute(encrypted) + resolve(result) + } catch (error) { + reject(error) + } + }) + } + + // Start execution immediately - this maintains compatibility with the LazyOperator pattern + startExecution() + }) + + return Object.assign(promise, lazyOp) + } +} + +/** + * Encrypts a single JSON operator using the same logic as batch encryption. + * Used by both direct await and batched and()/or() operations. + * @internal + */ +export async function encryptSingleJsonOperator( + protectClient: ProtectClient, + op: LazyJsonOperator, +): Promise { + const { protectColumn, protectTable } = op.columnInfo + + if (!protectColumn || !protectTable) { + // If columnInfo is incomplete (e.g., in tests with mocks), return the value as-is + // In production, the columnInfo will always have these properties set + return op.value + } + + // Build QueryTerm based on operator type + let queryTerm: QueryTerm + + if (op.operator === 'json_eq' || op.operator === 'json_ne') { + queryTerm = { + path: op.path, + value: op.value as string | number, + column: protectColumn, + table: protectTable, + } satisfies JsonPathQueryTerm + } else if (op.operator === 'json_contains') { + queryTerm = { + contains: op.value as Record, + column: protectColumn, + table: protectTable, + } satisfies JsonContainsQueryTerm + } else if (op.operator === 'json_contained_by') { + queryTerm = { + containedBy: op.value as Record, + column: protectColumn, + table: protectTable, + } satisfies JsonContainedByQueryTerm + } else { + // Array-length operators don't encrypt the comparison value + // They may need selector encryption, but that's handled separately + return op.value + } + + const result = await protectClient.encryptQuery([queryTerm]) + + if (result.failure) { + throw new Error(`Failed to encrypt JSON query: ${result.failure.message}`) + } + + return result.data[0] +} + +/** + * Encrypts a JSON path to get its selector hash. + * Used for jsonb_path_query_first operations (e.g., array-length on non-root paths). + * @internal + */ +export async function encryptPathSelector( + protectClient: ProtectClient, + path: string, + columnInfo: ColumnInfo, +): Promise { + const { protectColumn, protectTable } = columnInfo + + if (!protectColumn || !protectTable) { + // If columnInfo is incomplete (e.g., in tests with mocks), return a placeholder selector + // In production, the columnInfo will always have these properties set + return 'mock_selector' + } + + // Use JsonPathQueryTerm without a value to get just the selector + const queryTerm: JsonPathQueryTerm = { + path, + column: protectColumn, + table: protectTable, + // No value - we just need the selector hash for path extraction + } + + const result = await protectClient.encryptQuery([queryTerm]) + + if (result.failure) { + throw new Error( + `Failed to encrypt path selector: ${result.failure.message}`, + ) + } + + // Extract the selector from the result + // JsonPathQueryTerm without value returns { s: selector } + const encrypted = result.data[0] as { s: string } + return encrypted.s +} diff --git a/packages/drizzle/src/pg/operators.ts b/packages/drizzle/src/pg/operators.ts index 7642b77b..4d06b641 100644 --- a/packages/drizzle/src/pg/operators.ts +++ b/packages/drizzle/src/pg/operators.ts @@ -38,6 +38,13 @@ import type { PgTable } from 'drizzle-orm/pg-core' import type { EncryptedColumnConfig } from './index.js' import { getEncryptedColumnConfig } from './index.js' import { extractProtectSchema } from './schema-extraction.js' +import { + JsonPathBuilder, + normalizePath, + isLazyJsonOperator, + type LazyJsonOperator, + encryptSingleJsonOperator, +} from './json-operators.js' // ============================================================================ // Type Definitions and Type Guards @@ -180,7 +187,7 @@ function getProtectColumn( /** * Column metadata extracted from a Drizzle column */ -interface ColumnInfo { +export interface ColumnInfo { readonly protectColumn: ProtectColumn | undefined readonly config: (EncryptedColumnConfig & { name: string }) | undefined readonly protectTable: ProtectTable | undefined @@ -1053,6 +1060,87 @@ export function createProtectOperators(protectClient: ProtectClient): { arrayContains: typeof arrayContains arrayContained: typeof arrayContained arrayOverlaps: typeof arrayOverlaps + /** + * Create a JSON path builder for querying encrypted JSON columns. + * + * Provides a fluent API for: + * - Path-based comparisons: eq(), ne() + * - Containment checks: contains(), containedBy() + * - Array operations: arrayLength().gt/gte/lt/lte() + * - Value extraction: get(), elements(), elementsText() + * - Path extraction: pathExtract(), pathExtractFirst() + * + * ## Requirements + * + * The column must have both `dataType: 'json'` and `searchableJson: true` configured. + * + * ## Path Format + * + * Accepts both JSONPath format (`$.user.email`) and dot notation (`user.email`). + * The `$.` prefix is automatically stripped. + * + * ## Encryption Semantics + * + * Different operations have different encryption requirements: + * - Value operations (eq, contains, etc.): Encrypts the comparison value + * - Array-length on root: No encryption needed + * - Array-length on nested path: Encrypts the path selector + * - Path extraction: Encrypts the path selector + * + * @param column - The encrypted JSON column + * @param path - The JSON path in JSONPath or dot notation format + * @returns A JsonPathBuilder for chaining operations + * + * @example + * Equality comparison at a path: + * ```typescript + * const result = await db + * .select() + * .from(users) + * .where(await ops.jsonPath(users.metadata, '$.user.email').eq('test@example.com')) + * ``` + * + * @example + * JSON containment check: + * ```typescript + * const admins = await db + * .select() + * .from(users) + * .where(await ops.jsonPath(users.metadata, '$').contains({ role: 'admin' })) + * ``` + * + * @example + * Array length comparison: + * ```typescript + * const activeUsers = await db + * .select() + * .from(users) + * .where(await ops.jsonPath(users.metadata, '$.items').arrayLength().gt(5)) + * ``` + * + * @example + * Value extraction in SELECT: + * ```typescript + * const emails = await db + * .select({ + * email: await ops.jsonPath(users.metadata, '$.user.email').get() + * }) + * .from(users) + * ``` + * + * @example + * Combining with other operators: + * ```typescript + * const result = await db + * .select() + * .from(users) + * .where(await ops.and( + * ops.eq(users.status, 'active'), + * ops.jsonPath(users.metadata, '$.user.role').eq('admin') + * )) + * ``` + */ + jsonPath: (column: SQLWrapper, path: string) => JsonPathBuilder } { // Create a cache for protect tables keyed by table name const protectTableCache = new Map>() @@ -1427,8 +1515,9 @@ export function createProtectOperators(protectClient: ProtectClient): { const protectAnd = async ( ...conditions: (SQL | SQLWrapper | Promise | undefined)[] ): Promise => { - // Single pass: separate lazy operators from regular conditions + // Collect all operator types for batched processing const lazyOperators: LazyOperator[] = [] + const lazyJsonOperators: LazyJsonOperator[] = [] const regularConditions: (SQL | SQLWrapper | undefined)[] = [] const regularPromises: Promise[] = [] @@ -1437,11 +1526,16 @@ export function createProtectOperators(protectClient: ProtectClient): { continue } - if (isLazyOperator(condition)) { + // Check for JSON operators FIRST (they are also LazyOperators) + if (isLazyJsonOperator(condition)) { + lazyJsonOperators.push(condition) + } else if (isLazyOperator(condition)) { lazyOperators.push(condition) } else if (condition instanceof Promise) { - // Check if promise is also a lazy operator - if (isLazyOperator(condition)) { + // Check if the promise is also a lazy operator + if (isLazyJsonOperator(condition)) { + lazyJsonOperators.push(condition) + } else if (isLazyOperator(condition)) { lazyOperators.push(condition) } else { regularPromises.push(condition) @@ -1451,10 +1545,26 @@ export function createProtectOperators(protectClient: ProtectClient): { } } + // Process JSON operators - they have different encryption logic + const jsonSqlConditions: SQL[] = [] + for (const jsonOp of lazyJsonOperators) { + try { + // JSON operators use their own encryption via encryptSingleJsonOperator + const encrypted = await encryptSingleJsonOperator(protectClient, jsonOp) + const sqlResult = jsonOp.execute(encrypted) + jsonSqlConditions.push(sqlResult) + } catch (error) { + // Log and continue - individual operator errors shouldn't fail all + console.error(`Error processing JSON operator: ${error}`) + throw error + } + } + // If there are no lazy operators, just use Drizzle's and() if (lazyOperators.length === 0) { const allConditions: (SQL | SQLWrapper | undefined)[] = [ ...regularConditions, + ...jsonSqlConditions, ...(await Promise.all(regularPromises)), ] return and(...allConditions) ?? sql`true` @@ -1570,6 +1680,7 @@ export function createProtectOperators(protectClient: ProtectClient): { // Combine all conditions const allConditions: (SQL | SQLWrapper | undefined)[] = [ ...regularConditions, + ...jsonSqlConditions, ...sqlConditions, ...regularPromisesResults, ] @@ -1583,7 +1694,9 @@ export function createProtectOperators(protectClient: ProtectClient): { const protectOr = async ( ...conditions: (SQL | SQLWrapper | Promise | undefined)[] ): Promise => { + // Collect all operator types for batched processing const lazyOperators: LazyOperator[] = [] + const lazyJsonOperators: LazyJsonOperator[] = [] const regularConditions: (SQL | SQLWrapper | undefined)[] = [] const regularPromises: Promise[] = [] @@ -1592,10 +1705,16 @@ export function createProtectOperators(protectClient: ProtectClient): { continue } - if (isLazyOperator(condition)) { + // Check for JSON operators FIRST (they are also LazyOperators) + if (isLazyJsonOperator(condition)) { + lazyJsonOperators.push(condition) + } else if (isLazyOperator(condition)) { lazyOperators.push(condition) } else if (condition instanceof Promise) { - if (isLazyOperator(condition)) { + // Check if the promise is also a lazy operator + if (isLazyJsonOperator(condition)) { + lazyJsonOperators.push(condition) + } else if (isLazyOperator(condition)) { lazyOperators.push(condition) } else { regularPromises.push(condition) @@ -1605,9 +1724,26 @@ export function createProtectOperators(protectClient: ProtectClient): { } } + // Process JSON operators - they have different encryption logic + const jsonSqlConditions: SQL[] = [] + for (const jsonOp of lazyJsonOperators) { + try { + // JSON operators use their own encryption via encryptSingleJsonOperator + const encrypted = await encryptSingleJsonOperator(protectClient, jsonOp) + const sqlResult = jsonOp.execute(encrypted) + jsonSqlConditions.push(sqlResult) + } catch (error) { + // Log and continue - individual operator errors shouldn't fail all + console.error(`Error processing JSON operator: ${error}`) + throw error + } + } + + // If there are no lazy operators, just use Drizzle's or() if (lazyOperators.length === 0) { const allConditions: (SQL | SQLWrapper | undefined)[] = [ ...regularConditions, + ...jsonSqlConditions, ...(await Promise.all(regularPromises)), ] return or(...allConditions) ?? sql`false` @@ -1717,6 +1853,7 @@ export function createProtectOperators(protectClient: ProtectClient): { const allConditions: (SQL | SQLWrapper | undefined)[] = [ ...regularConditions, + ...jsonSqlConditions, ...sqlConditions, ...regularPromisesResults, ] @@ -1724,6 +1861,43 @@ export function createProtectOperators(protectClient: ProtectClient): { return or(...allConditions) ?? sql`false` } + /** + * JSON path builder for searchable JSON columns + */ + const protectJsonPath = ( + column: SQLWrapper, + path: string, + ): JsonPathBuilder => { + const columnInfo = getColumnInfo( + column, + defaultProtectTable, + protectTableCache, + ) + + if (!columnInfo.config?.searchableJson) { + throw new ProtectConfigError( + `searchableJson is required for jsonPath() on column "${columnInfo.columnName}". Add { searchableJson: true } to the encryptedType() config.`, + { columnName: columnInfo.columnName, tableName: columnInfo.tableName }, + ) + } + + // Validate that dataType is 'json' when searchableJson is enabled + if (columnInfo.config.dataType !== 'json') { + throw new ProtectConfigError( + `searchableJson requires dataType: 'json' on column "${columnInfo.columnName}". Add { dataType: 'json', searchableJson: true } to the encryptedType() config.`, + { columnName: columnInfo.columnName, tableName: columnInfo.tableName }, + ) + } + + const normalizedPath = normalizePath(path) + return new JsonPathBuilder( + column, + normalizedPath, + columnInfo, + protectClient, + ) + } + return { // Comparison operators eq: protectEq, @@ -1766,5 +1940,8 @@ export function createProtectOperators(protectClient: ProtectClient): { arrayContains, arrayContained, arrayOverlaps, + + // JSON path builder + jsonPath: protectJsonPath, } } diff --git a/packages/drizzle/src/pg/schema-extraction.ts b/packages/drizzle/src/pg/schema-extraction.ts index a655e07c..664f2217 100644 --- a/packages/drizzle/src/pg/schema-extraction.ts +++ b/packages/drizzle/src/pg/schema-extraction.ts @@ -91,6 +91,10 @@ export function extractProtectSchema>( } } + if (config.searchableJson) { + csCol.searchableJson() + } + columns[actualColumnName] = csCol } } diff --git a/packages/protect-dynamodb/README.md b/packages/protect-dynamodb/README.md index e52ffe66..ffd2e84c 100644 --- a/packages/protect-dynamodb/README.md +++ b/packages/protect-dynamodb/README.md @@ -55,7 +55,7 @@ await docClient.send(new PutCommand({ })) // Create search terms for querying -const searchTermsResult = await protectDynamo.createSearchTerms([ +const searchTermsResult = await protectDynamo.encryptQuery([ { value: 'user@example.com', column: users.email, @@ -119,10 +119,10 @@ if (result.failure) { Create search terms for querying encrypted data: -- `createSearchTerms`: Creates search terms for one or more columns +- `encryptQuery`: Creates search terms for one or more columns ```typescript -const searchTermsResult = await protectDynamo.createSearchTerms([ +const searchTermsResult = await protectDynamo.encryptQuery([ { value: 'user@example.com', column: users.email, @@ -165,7 +165,7 @@ if (encryptResult.failure) { } // Query using search terms -const searchTermsResult = await protectDynamo.createSearchTerms([ +const searchTermsResult = await protectDynamo.encryptQuery([ { value: 'user@example.com', column: users.email, @@ -199,7 +199,7 @@ const table = { } // Create search terms for querying -const searchTermsResult = await protectDynamo.createSearchTerms([ +const searchTermsResult = await protectDynamo.encryptQuery([ { value: 'user@example.com', column: users.email, @@ -243,7 +243,7 @@ const table = { } // Create search terms for querying -const searchTermsResult = await protectDynamo.createSearchTerms([ +const searchTermsResult = await protectDynamo.encryptQuery([ { value: 'user@example.com', column: users.email, @@ -298,7 +298,7 @@ const table = { } // Create search terms for querying -const searchTermsResult = await protectDynamo.createSearchTerms([ +const searchTermsResult = await protectDynamo.encryptQuery([ { value: 'user@example.com', column: users.email, diff --git a/packages/protect/README.md b/packages/protect/README.md index fab455d0..f4b555ea 100644 --- a/packages/protect/README.md +++ b/packages/protect/README.md @@ -986,15 +986,107 @@ const bulkDecryptedResult = await protectClient ## Supported data types -Protect.js currently supports encrypting and decrypting text. -Other data types like booleans, dates, ints, floats, and JSON are well-supported in other CipherStash products, and will be coming to Protect.js soon. +Protect.js supports a number of different data types with support for additional types on the roadmap. + +| JS/TS Type | Available | Notes | +|--|--|--| +| `string` | ✅ | +| `number` | ✅ | +| `json` (opaque) | ✅ | | +| `json` (searchable) | ✅ | | +| `bigint` | ⚙️ | Coming soon | +| `boolean`| ⚙️ | Coming soon | +| `date` | ⚙️ | Coming soon | + +If you need support for ther data types please [raise an issue](https://github.com/cipherstash/protectjs/issues) and we'll do our best to add it to Protect.js. + +### Type casting + +When encrypting types other than `string`, Protect requires the data type to be specified explicitly using the `dataType` function on the column definition. + +For example, to handle encryption of a `number` field called `score`: + +```ts +const users = csTable('users', { + score: csColumn('score').dataType('number') +}) +``` + +This means that any JavaScript/TypeScript `number` will encrypt correctly but if an attempt to encrypt a value of a different type is made the operation will fail with an error. +This is particularly important for searchable index schemes that require data types (and their encodings) to be consistent. + +In an unencrypted setup, this type checking is usually handled by the database (the column type in a table) but when the data is encrypted, the database can't determine what type the plaintext value should be so we must specify it in the Protect schema instead. + +> [!IMPORTANT] +> If the data type of a column is set to `bigint`, floating point numbers will be converted to integers (via truncation). + +### Handling of null and special values + +There are some important special cases to be aware of when encrypting values with Protect.js. +For example, encrypting `null` or `undefined` will just return a `null`/`undefined` value. + +When `dataType` is `number`, attempting to encrypt `NaN`, `Infinity` or `-Infinity` will fail with an error. +Encrypting `-0.0` will coerce the value into `0.0`. + +The table below summarizes these cases. + +| Data type | Plaintext | Encryption | +|--|--|--| +|`any`| `null` | `null` | +| `any` | `undefined` | `undefined` | +| `number` | `-0.0` | Encryption of `0.0` | +| `number` | `NaN` | _Error_ | +| `number` | `Infinity` | _Error_| +| `number` | `-Infinity` | _Error_| -Until support for other data types are available, you can express interest in this feature by adding a :+1: on this [GitHub Issue](https://github.com/cipherstash/protectjs/issues/48). ## Searchable encryption Read more about [searching encrypted data](./docs/concepts/searchable-encryption.md) in the docs. +### Searchable JSON + +Protect.js allows you to perform deep searches within encrypted JSON documents. You can query nested fields, arrays, and objects without decrypting the entire document. + +To enable searchable JSON, configure your schema: + +```ts +// schema.ts +import { csTable, csColumn } from "@cipherstash/protect"; + +export const users = csTable("users", { + metadata: csColumn("metadata").searchableJson(), +}); +``` + +Then generate search terms for your queries: + +```ts +// index.ts +// Path query: find users with metadata.role = 'admin' +const searchTerms = await protectClient.encryptQuery([ + { + path: "role", // or "user.role" or ["user", "role"] + value: "admin", + column: users.metadata, + table: users, + } +]); + +// Containment query: find users where metadata contains { tags: ['premium'] } +const containmentTerms = await protectClient.encryptQuery([ + { + value: { tags: ["premium"] }, + column: users.metadata, + table: users, + containmentType: "contains", + } +]); +``` + +These search terms can then be used in your database query (e.g., using SQL or an ORM). + + ## Multi-tenant encryption Protect.js supports multi-tenant encryption by using keysets. diff --git a/packages/protect/__tests__/audit.test.ts b/packages/protect/__tests__/audit.test.ts index 6d508f58..dca9f515 100644 --- a/packages/protect/__tests__/audit.test.ts +++ b/packages/protect/__tests__/audit.test.ts @@ -1,7 +1,8 @@ import 'dotenv/config' import { csColumn, csTable } from '@cipherstash/schema' import { beforeAll, describe, expect, it } from 'vitest' -import { LockContext, protect } from '../src' +import { protect } from '../src' +import { LockContext } from '../src/identify' const users = csTable('users', { auditable: csColumn('auditable'), diff --git a/packages/protect/__tests__/backward-compat.test.ts b/packages/protect/__tests__/backward-compat.test.ts index 46d39949..e853aa6c 100644 --- a/packages/protect/__tests__/backward-compat.test.ts +++ b/packages/protect/__tests__/backward-compat.test.ts @@ -2,6 +2,7 @@ import 'dotenv/config' import { csColumn, csTable } from '@cipherstash/schema' import { beforeAll, describe, expect, it } from 'vitest' import { protect } from '../src' +import type { Encrypted } from '../src/types' const users = csTable('users', { email: csColumn('email'), @@ -53,7 +54,7 @@ describe('k-field backward compatibility', () => { } // Decrypt should succeed even with legacy k field present - const result = await protectClient.decrypt(legacyPayload) + const result = await protectClient.decrypt(legacyPayload as Encrypted) if (result.failure) { throw new Error(`Decryption failed: ${result.failure.message}`) diff --git a/packages/protect/__tests__/batch-encrypt-query.test.ts b/packages/protect/__tests__/batch-encrypt-query.test.ts new file mode 100644 index 00000000..fbc46a6e --- /dev/null +++ b/packages/protect/__tests__/batch-encrypt-query.test.ts @@ -0,0 +1,338 @@ +import 'dotenv/config' +import { csColumn, csTable } from '@cipherstash/schema' +import { beforeAll, describe, expect, it } from 'vitest' +import { type QueryTerm, protect } from '../src' +import { queryTypes } from '../src/types' + +const users = csTable('users', { + email: csColumn('email').freeTextSearch().equality().orderAndRange(), + score: csColumn('score').dataType('number').orderAndRange(), +}) + +const jsonSchema = csTable('json_users', { + metadata: csColumn('metadata').searchableJson(), +}) + +let protectClient: Awaited> + +beforeAll(async () => { + protectClient = await protect({ schemas: [users, jsonSchema] }) +}) + +describe('encryptQuery batch overload', () => { + it('should return empty array for empty input', async () => { + const result = await protectClient.encryptQuery([]) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(result.data).toEqual([]) + }) + + it('should encrypt batch of scalar terms', async () => { + const terms: QueryTerm[] = [ + { + value: 'test@example.com', + column: users.email, + table: users, + queryType: queryTypes.equality, + }, + { + value: 100, + column: users.score, + table: users, + queryType: queryTypes.orderAndRange, + }, + ] + + const result = await protectClient.encryptQuery(terms) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(result.data).toHaveLength(2) + expect(result.data[0]).toHaveProperty('hm') // unique returns HMAC + }) +}) + +describe('encryptQuery batch - JSON path queries', () => { + it('should encrypt JSON path query with value', async () => { + const terms: QueryTerm[] = [ + { + path: 'user.email', + value: 'test@example.com', + column: jsonSchema.metadata, + table: jsonSchema, + }, + ] + + const result = await protectClient.encryptQuery(terms) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(result.data).toHaveLength(1) + expect(result.data[0]).toHaveProperty('s', 'json_users/metadata/user/email') + }) + + it('should encrypt JSON path query without value (selector only)', async () => { + const terms: QueryTerm[] = [ + { path: 'user.role', column: jsonSchema.metadata, table: jsonSchema }, + ] + + const result = await protectClient.encryptQuery(terms) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(result.data).toHaveLength(1) + expect(result.data[0]).toEqual({ s: 'json_users/metadata/user/role' }) + }) +}) + +describe('encryptQuery batch - JSON containment queries', () => { + it('should encrypt JSON contains query', async () => { + const terms: QueryTerm[] = [ + { + contains: { role: 'admin' }, + column: jsonSchema.metadata, + table: jsonSchema, + }, + ] + + const result = await protectClient.encryptQuery(terms) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(result.data).toHaveLength(1) + expect(result.data[0]).toHaveProperty('sv') + const svResult = result.data[0] as { sv: Array<{ s: string }> } + expect(svResult.sv).toHaveLength(1) + expect(svResult.sv[0]).toHaveProperty('s', 'json_users/metadata/role') + }) + + it('should encrypt JSON containedBy query', async () => { + const terms: QueryTerm[] = [ + { + containedBy: { status: 'active' }, + column: jsonSchema.metadata, + table: jsonSchema, + }, + ] + + const result = await protectClient.encryptQuery(terms) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(result.data).toHaveLength(1) + expect(result.data[0]).toHaveProperty('sv') + }) +}) + +describe('encryptQuery batch - mixed term types', () => { + it('should encrypt mixed batch of scalar and JSON terms', async () => { + const terms: QueryTerm[] = [ + { + value: 'test@example.com', + column: users.email, + table: users, + queryType: queryTypes.equality, + }, + { + path: 'user.email', + value: 'json@example.com', + column: jsonSchema.metadata, + table: jsonSchema, + }, + { + contains: { role: 'admin' }, + column: jsonSchema.metadata, + table: jsonSchema, + }, + ] + + const result = await protectClient.encryptQuery(terms) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(result.data).toHaveLength(3) + // First term: scalar unique + expect(result.data[0]).toHaveProperty('hm') + // Second term: JSON path with selector + expect(result.data[1]).toHaveProperty('s') + // Third term: JSON containment with sv array + expect(result.data[2]).toHaveProperty('sv') + }) +}) + +describe('encryptQuery batch - return type formatting', () => { + it('should format as composite-literal', async () => { + const terms: QueryTerm[] = [ + { + value: 'test@example.com', + column: users.email, + table: users, + queryType: queryTypes.equality, + returnType: 'composite-literal', + }, + ] + + const result = await protectClient.encryptQuery(terms) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(typeof result.data[0]).toBe('string') + expect(result.data[0]).toMatch(/^\(.*\)$/) + }) +}) + +describe('encryptQuery batch - readonly/as const support', () => { + it('should accept readonly array (as const)', async () => { + const terms = [ + { + value: 'test@example.com', + column: users.email, + table: users, + queryType: queryTypes.equality, + }, + ] as const + + const result = await protectClient.encryptQuery(terms) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(result.data).toHaveLength(1) + }) +}) + +describe('encryptQuery batch - auto-infer index type', () => { + it('should auto-infer index type when not specified', async () => { + const result = await protectClient.encryptQuery([ + { value: 'test@example.com', column: users.email, table: users }, + // No indexType - should auto-infer from column config + ]) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(result.data).toHaveLength(1) + // Auto-inferred result should be a valid encrypted payload + expect(result.data[0]).not.toBeNull() + expect(typeof result.data[0]).toBe('object') + expect(result.data[0]).toHaveProperty('c') + }) + + it('should use explicit index type when specified', async () => { + const result = await protectClient.encryptQuery([ + { + value: 'test@example.com', + column: users.email, + table: users, + queryType: queryTypes.equality, + }, + ]) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(result.data).toHaveLength(1) + expect(result.data[0]).toHaveProperty('hm') // unique returns HMAC + }) + + it('should handle mixed batch with and without indexType', async () => { + const result = await protectClient.encryptQuery([ + // Explicit indexType + { + value: 'explicit@example.com', + column: users.email, + table: users, + queryType: queryTypes.equality, + }, + // Auto-infer indexType + { value: 'auto@example.com', column: users.email, table: users }, + // Another explicit indexType + { + value: 100, + column: users.score, + table: users, + queryType: queryTypes.orderAndRange, + }, + ]) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(result.data).toHaveLength(3) + // First term: explicit unique should have hm + expect(result.data[0]).toHaveProperty('hm') + // Second term: auto-inferred should be valid encrypted payload + expect(result.data[1]).not.toBeNull() + expect(typeof result.data[1]).toBe('object') + expect(result.data[1]).toHaveProperty('c') + // Third term: explicit ore should have valid encryption + expect(result.data[2]).not.toBeNull() + }) +}) + +describe('encryptQuery single-value - auto-infer index type', () => { + it('should auto-infer index type for single value when not specified', async () => { + const result = await protectClient.encryptQuery('test@example.com', { + column: users.email, + table: users, + // No indexType - should auto-infer from column config + }) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + // Auto-inferred result should be a valid encrypted payload + expect(result.data).not.toBeNull() + expect(typeof result.data).toBe('object') + expect(result.data).toHaveProperty('c') + }) + + it('should use explicit index type for single value when specified', async () => { + const result = await protectClient.encryptQuery('test@example.com', { + column: users.email, + table: users, + queryType: queryTypes.equality, + }) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(result.data).toHaveProperty('hm') // unique returns HMAC + }) + + it('should handle null value with auto-infer', async () => { + const result = await protectClient.encryptQuery(null, { + column: users.email, + table: users, + }) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + expect(result.data).toBeNull() + }) +}) diff --git a/packages/protect/__tests__/bulk-protect.test.ts b/packages/protect/__tests__/bulk-protect.test.ts index 893bea86..8c2c3441 100644 --- a/packages/protect/__tests__/bulk-protect.test.ts +++ b/packages/protect/__tests__/bulk-protect.test.ts @@ -1,7 +1,8 @@ import 'dotenv/config' import { csColumn, csTable } from '@cipherstash/schema' import { beforeAll, describe, expect, it } from 'vitest' -import { type EncryptedPayload, LockContext, protect } from '../src' +import { type EncryptedPayload, protect } from '../src' +import { LockContext } from '../src/identify' const users = csTable('users', { email: csColumn('email').freeTextSearch().equality().orderAndRange(), diff --git a/packages/protect/__tests__/json-protect.test.ts b/packages/protect/__tests__/json-protect.test.ts index 66604400..7b7a812d 100644 --- a/packages/protect/__tests__/json-protect.test.ts +++ b/packages/protect/__tests__/json-protect.test.ts @@ -1,7 +1,8 @@ import 'dotenv/config' import { csColumn, csTable, csValue } from '@cipherstash/schema' import { beforeAll, describe, expect, it } from 'vitest' -import { LockContext, protect } from '../src' +import { protect } from '../src' +import { LockContext } from '../src/identify' const users = csTable('users', { email: csColumn('email').freeTextSearch().equality().orderAndRange(), diff --git a/packages/protect/__tests__/nested-models.test.ts b/packages/protect/__tests__/nested-models.test.ts index 8f44f809..aa0022f5 100644 --- a/packages/protect/__tests__/nested-models.test.ts +++ b/packages/protect/__tests__/nested-models.test.ts @@ -1,7 +1,7 @@ import 'dotenv/config' import { csColumn, csTable, csValue } from '@cipherstash/schema' import { describe, expect, it, vi } from 'vitest' -import { LockContext, protect } from '../src' +import { protect } from '../src' const users = csTable('users', { email: csColumn('email').freeTextSearch().equality().orderAndRange(), diff --git a/packages/protect/__tests__/number-protect.test.ts b/packages/protect/__tests__/number-protect.test.ts index 3ade327a..179f9c49 100644 --- a/packages/protect/__tests__/number-protect.test.ts +++ b/packages/protect/__tests__/number-protect.test.ts @@ -1,7 +1,8 @@ import 'dotenv/config' import { csColumn, csTable, csValue } from '@cipherstash/schema' import { beforeAll, describe, expect, it, test } from 'vitest' -import { LockContext, protect } from '../src' +import { protect } from '../src' +import { LockContext } from '../src/identify' const users = csTable('users', { email: csColumn('email').freeTextSearch().equality().orderAndRange(), diff --git a/packages/protect/__tests__/protect-ops.test.ts b/packages/protect/__tests__/protect-ops.test.ts index c7a2e276..49d6c461 100644 --- a/packages/protect/__tests__/protect-ops.test.ts +++ b/packages/protect/__tests__/protect-ops.test.ts @@ -1,7 +1,8 @@ import 'dotenv/config' import { csColumn, csTable } from '@cipherstash/schema' import { beforeAll, describe, expect, it } from 'vitest' -import { LockContext, protect } from '../src' +import { protect } from '../src' +import { LockContext } from '../src/identify' const users = csTable('users', { email: csColumn('email').freeTextSearch().equality().orderAndRange(), diff --git a/packages/protect/__tests__/query-term-guards.test.ts b/packages/protect/__tests__/query-term-guards.test.ts new file mode 100644 index 00000000..7f003ecf --- /dev/null +++ b/packages/protect/__tests__/query-term-guards.test.ts @@ -0,0 +1,397 @@ +import { csColumn, csTable } from '@cipherstash/schema' +import { describe, expect, it } from 'vitest' +import { + isJsonContainedByQueryTerm, + isJsonContainsQueryTerm, + isJsonPathQueryTerm, + isScalarQueryTerm, +} from '../src/query-term-guards' +import { queryTypes } from '../src/types' +const users = csTable('users', { + email: csColumn('email').freeTextSearch().equality().orderAndRange(), +}) + +describe('query-term-guards', () => { + describe('isScalarQueryTerm', () => { + it('should return true when both value and queryType are present', () => { + const term = { + value: 'test', + queryType: queryTypes.equality, + column: users.email, + table: users, + } + expect(isScalarQueryTerm(term)).toBe(true) + }) + + it('should return true with all properties including optional ones', () => { + const term = { + value: 'test', + queryType: queryTypes.orderAndRange, + column: users.email, + table: users, + } + expect(isScalarQueryTerm(term)).toBe(true) + }) + + it('should return false when value is missing', () => { + const term = { + queryType: queryTypes.equality, + column: users.email, + table: users, + } + // @ts-expect-error - value is missing + expect(isScalarQueryTerm(term)).toBe(false) + }) + + it('should return true when queryType is missing (optional - auto-inferred)', () => { + const term = { + value: 'test', + column: users.email, + table: users, + } + // queryType is now optional - terms without it use auto-inference + expect(isScalarQueryTerm(term)).toBe(true) + }) + + it('should return false when both value and queryType are missing', () => { + const term = { + column: users.email, + table: users, + } + // @ts-expect-error - value is missing + expect(isScalarQueryTerm(term)).toBe(false) + }) + + it('should return false for empty object', () => { + const term = {} + + // @ts-expect-error - empty object is not a valid query term + expect(isScalarQueryTerm(term)).toBe(false) + }) + + it('should return true with extra properties present', () => { + const term = { + value: 'test', + queryType: queryTypes.freeTextSearch, + column: users.email, + table: users, + extraProp: 'extra', + anotherProp: 123, + } + expect(isScalarQueryTerm(term)).toBe(true) + }) + + it('should return true even when queryType is null (property exists)', () => { + const term = { + value: 'test', + queryType: null, + column: users.email, + table: users, + } + + // @ts-expect-error - queryType is null + expect(isScalarQueryTerm(term)).toBe(true) + }) + + it('should return true even when value is undefined (property exists)', () => { + const term = { + value: undefined, + queryType: queryTypes.equality, + column: users.email, + table: users, + } + + // @ts-expect-error - value is undefined + expect(isScalarQueryTerm(term)).toBe(true) + }) + + it('should return true even when queryType is undefined (property exists)', () => { + const term = { + value: 'test', + queryType: undefined, + column: users.email, + table: users, + } + + expect(isScalarQueryTerm(term)).toBe(true) + }) + }) + + describe('isJsonPathQueryTerm', () => { + it('should return true when path property exists', () => { + const term = { + path: 'user.email', + column: users.email, + table: users, + } + + expect(isJsonPathQueryTerm(term)).toBe(true) + }) + + it('should return true with all properties including optional ones', () => { + const term = { + path: 'user.name', + value: 'John', + column: users.email, + table: users, + } + + expect(isJsonPathQueryTerm(term)).toBe(true) + }) + + it('should return true with extra properties', () => { + const term = { + path: 'data.nested.field', + column: users.email, + table: users, + extraProp: 'extra', + anotherField: 42, + } + expect(isJsonPathQueryTerm(term)).toBe(true) + }) + + it('should return false when path property is missing', () => { + const term = { + column: users.email, + table: users, + value: 'test', + } + + expect(isJsonPathQueryTerm(term)).toBe(false) + }) + + it('should return false for empty object', () => { + const term = {} + + // @ts-expect-error - empty object is not a valid query term + expect(isJsonPathQueryTerm(term)).toBe(false) + }) + + it('should return true even when path is null', () => { + const term = { + path: null, + column: users.email, + table: users, + } + + // @ts-expect-error - path is missing + expect(isJsonPathQueryTerm(term)).toBe(true) + }) + + it('should return true even when path is undefined', () => { + const term = { + path: undefined, + column: users.email, + table: users, + } + + // @ts-expect-error - path is undefined + expect(isJsonPathQueryTerm(term)).toBe(true) + }) + + it('should return false when path-like property with different name', () => { + const term = { + pathName: 'user.email', + column: users.email, + table: users, + } + + // @ts-expect-error - pathName is not a valid property + expect(isJsonPathQueryTerm(term)).toBe(false) + }) + }) + + describe('isJsonContainsQueryTerm', () => { + it('should return true when contains property exists', () => { + const term = { + contains: { key: 'value' }, + column: users.email, + table: users, + } + expect(isJsonContainsQueryTerm(term)).toBe(true) + }) + + it('should return true with empty object as contains', () => { + const term = { + contains: {}, + column: users.email, + table: users, + } + expect(isJsonContainsQueryTerm(term)).toBe(true) + }) + + it('should return true with complex nested object as contains', () => { + const term = { + contains: { + user: { + email: 'test@example.com', + roles: ['admin', 'user'], + }, + }, + column: users.email, + table: users, + } + expect(isJsonContainsQueryTerm(term)).toBe(true) + }) + + it('should return true with extra properties', () => { + const term = { + contains: { status: 'active' }, + column: users.email, + table: users, + extraProp: 'extra', + anotherField: 42, + } + + expect(isJsonContainsQueryTerm(term)).toBe(true) + }) + + it('should return false when contains property is missing', () => { + const term = { + column: users.email, + table: users, + data: { key: 'value' }, + } + + // @ts-expect-error - contains is missing + expect(isJsonContainsQueryTerm(term)).toBe(false) + }) + + it('should return false for empty object', () => { + const term = {} + + // @ts-expect-error - empty object is not a valid query term + expect(isJsonContainsQueryTerm(term)).toBe(false) + }) + + it('should return true even when contains is null', () => { + const term = { + contains: null, + column: users.email, + table: users, + } + + // @ts-expect-error - contains is null + expect(isJsonContainsQueryTerm(term)).toBe(true) + }) + + it('should return true even when contains is undefined', () => { + const term = { + contains: undefined, + column: users.email, + table: users, + } + + // @ts-expect-error - contains is undefined + expect(isJsonContainsQueryTerm(term)).toBe(true) + }) + + it('should return false when contains-like property with different name', () => { + const term = { + containsData: { key: 'value' }, + column: users.email, + table: users, + } + + // @ts-expect-error - containsData is not a valid property + expect(isJsonContainsQueryTerm(term)).toBe(false) + }) + }) + + describe('isJsonContainedByQueryTerm', () => { + it('should return true when containedBy property exists', () => { + const term = { + containedBy: { key: 'value' }, + column: users.email, + table: users, + } + expect(isJsonContainedByQueryTerm(term)).toBe(true) + }) + + it('should return true with empty object as containedBy', () => { + const term = { + containedBy: {}, + column: users.email, + table: users, + } + expect(isJsonContainedByQueryTerm(term)).toBe(true) + }) + + it('should return true with complex nested object as containedBy', () => { + const term = { + containedBy: { + permissions: { + read: true, + write: false, + admin: true, + }, + }, + column: users.email, + table: users, + } + expect(isJsonContainedByQueryTerm(term)).toBe(true) + }) + + it('should return true with extra properties', () => { + const term = { + containedBy: { status: 'active' }, + column: users.email, + table: users, + extraProp: 'extra', + anotherField: 42, + } + expect(isJsonContainedByQueryTerm(term)).toBe(true) + }) + + it('should return false when containedBy property is missing', () => { + const term = { + column: users.email, + table: users, + data: { key: 'value' }, + } + + // @ts-expect-error - containedBy is missing + expect(isJsonContainedByQueryTerm(term)).toBe(false) + }) + + it('should return false for empty object', () => { + const term = {} + + // @ts-expect-error - empty object is not a valid query term + expect(isJsonContainedByQueryTerm(term)).toBe(false) + }) + + it('should return true even when containedBy is null', () => { + const term = { + containedBy: null, + column: users.email, + table: users, + } + + // @ts-expect-error - containedBy is null + expect(isJsonContainedByQueryTerm(term)).toBe(true) + }) + + it('should return true even when containedBy is undefined', () => { + const term = { + containedBy: undefined, + column: users.email, + table: users, + } + + // @ts-expect-error - containedBy is undefined + expect(isJsonContainedByQueryTerm(term)).toBe(true) + }) + + it('should return false when containedBy-like property with different name', () => { + const term = { + containedByData: { key: 'value' }, + column: users.email, + table: users, + } + + // @ts-expect-error - containedByData is not a valid property + expect(isJsonContainedByQueryTerm(term)).toBe(false) + }) + }) +}) diff --git a/packages/protect/__tests__/query-terms.test.ts b/packages/protect/__tests__/query-terms.test.ts new file mode 100644 index 00000000..d9592319 --- /dev/null +++ b/packages/protect/__tests__/query-terms.test.ts @@ -0,0 +1,86 @@ +import 'dotenv/config' +import { csColumn, csTable } from '@cipherstash/schema' +import { beforeAll, describe, expect, it } from 'vitest' +import { protect } from '../src' +import { queryTypes } from '../src/types' + +const users = csTable('users', { + email: csColumn('email').freeTextSearch().equality().orderAndRange(), + score: csColumn('score').dataType('number').orderAndRange(), +}) + +// Schema with searchableJson for ste_vec tests +const jsonSchema = csTable('json_users', { + metadata: csColumn('metadata').searchableJson(), +}) + +let protectClient: Awaited> + +beforeAll(async () => { + protectClient = await protect({ schemas: [users, jsonSchema] }) +}) + +describe('encryptQuery', () => { + it('should encrypt query with unique index', async () => { + const result = await protectClient.encryptQuery('test@example.com', { + column: users.email, + table: users, + queryType: queryTypes.equality, + }) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + // Unique index returns 'hm' (HMAC) + expect(result.data).toHaveProperty('hm') + }) + + it('should encrypt query with ore index', async () => { + const result = await protectClient.encryptQuery(100, { + column: users.score, + table: users, + queryType: queryTypes.orderAndRange, + }) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + // Check for some metadata keys besides identifier 'i' and version 'v' + const keys = Object.keys(result.data || {}) + const metaKeys = keys.filter((k) => k !== 'i' && k !== 'v') + expect(metaKeys.length).toBeGreaterThan(0) + }) + + it('should encrypt query with match index', async () => { + const result = await protectClient.encryptQuery('test', { + column: users.email, + table: users, + queryType: queryTypes.freeTextSearch, + }) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + const keys = Object.keys(result.data || {}) + const metaKeys = keys.filter((k) => k !== 'i' && k !== 'v') + expect(metaKeys.length).toBeGreaterThan(0) + }) + + it('should handle null value in encryptQuery', async () => { + const result = await protectClient.encryptQuery(null, { + column: users.email, + table: users, + queryType: queryTypes.equality, + }) + + if (result.failure) { + throw new Error(`[protect]: ${result.failure.message}`) + } + + // Null should produce null output (passthrough behavior) + expect(result.data).toBeNull() + }) +}) diff --git a/packages/protect/__tests__/search-terms.test.ts b/packages/protect/__tests__/search-terms.test.ts deleted file mode 100644 index f3cef7fe..00000000 --- a/packages/protect/__tests__/search-terms.test.ts +++ /dev/null @@ -1,90 +0,0 @@ -import 'dotenv/config' -import { csColumn, csTable } from '@cipherstash/schema' -import { describe, expect, it } from 'vitest' -import { type SearchTerm, protect } from '../src' - -const users = csTable('users', { - email: csColumn('email').freeTextSearch().equality().orderAndRange(), - address: csColumn('address').freeTextSearch(), -}) - -describe('create search terms', () => { - it('should create search terms with default return type', async () => { - const protectClient = await protect({ schemas: [users] }) - - const searchTerms = [ - { - value: 'hello', - column: users.email, - table: users, - }, - { - value: 'world', - column: users.address, - table: users, - }, - ] as SearchTerm[] - - const searchTermsResult = await protectClient.createSearchTerms(searchTerms) - - if (searchTermsResult.failure) { - throw new Error(`[protect]: ${searchTermsResult.failure.message}`) - } - - expect(searchTermsResult.data).toEqual( - expect.arrayContaining([ - expect.objectContaining({ - c: expect.any(String), - }), - ]), - ) - }, 30000) - - it('should create search terms with composite-literal return type', async () => { - const protectClient = await protect({ schemas: [users] }) - - const searchTerms = [ - { - value: 'hello', - column: users.email, - table: users, - returnType: 'composite-literal', - }, - ] as SearchTerm[] - - const searchTermsResult = await protectClient.createSearchTerms(searchTerms) - - if (searchTermsResult.failure) { - throw new Error(`[protect]: ${searchTermsResult.failure.message}`) - } - - const result = searchTermsResult.data[0] as string - expect(result).toMatch(/^\(.*\)$/) - expect(() => JSON.parse(result.slice(1, -1))).not.toThrow() - }, 30000) - - it('should create search terms with escaped-composite-literal return type', async () => { - const protectClient = await protect({ schemas: [users] }) - - const searchTerms = [ - { - value: 'hello', - column: users.email, - table: users, - returnType: 'escaped-composite-literal', - }, - ] as SearchTerm[] - - const searchTermsResult = await protectClient.createSearchTerms(searchTerms) - - if (searchTermsResult.failure) { - throw new Error(`[protect]: ${searchTermsResult.failure.message}`) - } - - const result = searchTermsResult.data[0] as string - expect(result).toMatch(/^".*"$/) - const unescaped = JSON.parse(result) - expect(unescaped).toMatch(/^\(.*\)$/) - expect(() => JSON.parse(unescaped.slice(1, -1))).not.toThrow() - }, 30000) -}) diff --git a/packages/protect/package.json b/packages/protect/package.json index 02c6458f..d831d686 100644 --- a/packages/protect/package.json +++ b/packages/protect/package.json @@ -69,7 +69,7 @@ }, "dependencies": { "@byteslice/result": "^0.2.0", - "@cipherstash/protect-ffi": "0.19.0", + "@cipherstash/protect-ffi": "0.20.0", "@cipherstash/schema": "workspace:*", "@stricli/core": "^1.2.5", "zod": "^3.24.2" diff --git a/packages/protect/src/ffi/index.ts b/packages/protect/src/ffi/index.ts index 8f7fa35d..67360f07 100644 --- a/packages/protect/src/ffi/index.ts +++ b/packages/protect/src/ffi/index.ts @@ -16,10 +16,15 @@ import type { Client, Decrypted, EncryptOptions, + EncryptQueryOptions, Encrypted, KeysetIdentifier, + QuerySearchTerm, + QueryTerm, SearchTerm, } from '../types' +import { isQueryTermArray } from '../query-term-guards' +import { BatchEncryptQueryOperation } from './operations/batch-encrypt-query' import { BulkDecryptOperation } from './operations/bulk-decrypt' import { BulkDecryptModelsOperation } from './operations/bulk-decrypt-models' import { BulkEncryptOperation } from './operations/bulk-encrypt' @@ -28,6 +33,7 @@ import { DecryptOperation } from './operations/decrypt' import { DecryptModelOperation } from './operations/decrypt-model' import { EncryptOperation } from './operations/encrypt' import { EncryptModelOperation } from './operations/encrypt-model' +import { EncryptQueryOperation } from './operations/encrypt-query' import { SearchTermsOperation } from './operations/search-terms' export const noClientError = () => @@ -307,6 +313,8 @@ export class ProtectClient { } /** + * @deprecated Use `encryptQuery(terms)` instead with QueryTerm types. + * * Create search terms to use in a query searching encrypted data * Usage: * await eqlClient.createSearchTerms(searchTerms) @@ -316,6 +324,90 @@ export class ProtectClient { return new SearchTermsOperation(this.client, terms) } + /** + * Encrypt a single value for query operations with explicit index type control. + * + * This method produces SEM-only payloads optimized for database queries, + * allowing you to specify which index type to use. + * + * @param plaintext - The value to encrypt for querying + * @param opts - Options specifying the column, table, index type, and optional query operation + * @returns An EncryptQueryOperation that can be awaited or chained with withLockContext + * + * @example + * ```typescript + * // Encrypt for ORE range query + * const term = await protectClient.encryptQuery(100, { + * column: usersSchema.score, + * table: usersSchema, + * queryType: 'orderAndRange', + * }) + * ``` + * + * @see {@link https://cipherstash.com/docs/platform/searchable-encryption/supported-queries | Supported Query Types} + */ + encryptQuery( + plaintext: JsPlaintext | null, + opts: EncryptQueryOptions, + ): EncryptQueryOperation + + /** + * Encrypt multiple query terms in batch with explicit control over each term. + * + * Supports scalar terms (with explicit queryType), JSON path queries, and JSON containment queries. + * JSON queries implicitly use searchableJson query type. + * + * @param terms - Array of query terms to encrypt + * @returns A BatchEncryptQueryOperation that can be awaited or chained with withLockContext + * + * @example + * ```typescript + * const terms = await protectClient.encryptQuery([ + * // Scalar term with explicit queryType + * { value: 'admin@example.com', column: users.email, table: users, queryType: 'equality' }, + * // JSON path query (searchableJson implicit) + * { path: 'user.email', value: 'test@example.com', column: jsonSchema.metadata, table: jsonSchema }, + * // JSON containment query (searchableJson implicit) + * { contains: { role: 'admin' }, column: jsonSchema.metadata, table: jsonSchema }, + * ]) + * ``` + * + * @remarks + * Note: Empty arrays `[]` are treated as scalar plaintext values for backward + * compatibility with the single-value overload. Pass a non-empty array to use + * batch encryption. + */ + encryptQuery(terms: readonly QueryTerm[]): BatchEncryptQueryOperation + + // Implementation + encryptQuery( + plaintextOrTerms: JsPlaintext | null | readonly QueryTerm[], + opts?: EncryptQueryOptions, + ): EncryptQueryOperation | BatchEncryptQueryOperation { + // Check if this is a QueryTerm array by looking for QueryTerm-specific properties + // This is needed because JsPlaintext includes JsPlaintext[] which overlaps with QueryTerm[] + // Empty arrays are explicitly handled as batch operations (return empty result) + if (Array.isArray(plaintextOrTerms)) { + if (plaintextOrTerms.length === 0 || isQueryTermArray(plaintextOrTerms)) { + return new BatchEncryptQueryOperation( + this.client, + plaintextOrTerms as unknown as readonly QueryTerm[], + ) + } + } + // Non-array values pass through to single-value encryption + if (!opts) { + throw new Error( + 'encryptQuery requires options when called with a single value', + ) + } + return new EncryptQueryOperation( + this.client, + plaintextOrTerms as JsPlaintext | null, + opts, + ) + } + /** e.g., debugging or environment info */ clientInfo() { return { diff --git a/packages/protect/src/ffi/operations/batch-encrypt-query.ts b/packages/protect/src/ffi/operations/batch-encrypt-query.ts new file mode 100644 index 00000000..ca0ae075 --- /dev/null +++ b/packages/protect/src/ffi/operations/batch-encrypt-query.ts @@ -0,0 +1,331 @@ +import { type Result, withResult } from '@byteslice/result' +import { encryptBulk, encryptQueryBulk } from '@cipherstash/protect-ffi' +import { type ProtectError, ProtectErrorTypes } from '../..' +import { logger } from '../../../../utils/logger' +import type { Context, CtsToken, LockContext } from '../../identify' +import { + isJsonContainedByQueryTerm, + isJsonContainsQueryTerm, + isJsonPathQueryTerm, + isScalarQueryTerm, +} from '../../query-term-guards' +import type { + Client, + Encrypted, + EncryptedSearchTerm, + QueryTypeName, + JsPlaintext, + QueryOpName, + QueryTerm, +} from '../../types' +import { queryTypeToFfi } from '../../types' +import { noClientError } from '../index' +import { + buildNestedObject, + flattenJson, + pathToSelector, +} from './json-path-utils' +import { ProtectOperation } from './base-operation' + +/** Tracks which items belong to which term for reassembly */ +type JsonEncryptionItem = { + selector: string + isContainment: boolean + plaintext: JsPlaintext + column: string + table: string + queryOp: QueryOpName +} + +/** + * Helper to check if a scalar term has an explicit queryType + */ +function hasExplicitQueryType( + term: QueryTerm, +): term is QueryTerm & { queryType: QueryTypeName } { + return 'queryType' in term && term.queryType !== undefined +} + +/** + * Helper function to encrypt batch query terms + */ +async function encryptBatchQueryTermsHelper( + client: Client, + terms: readonly QueryTerm[], + metadata: Record | undefined, + lockContextData: { context: Context; ctsToken: CtsToken } | undefined, +): Promise { + if (!client) { + throw noClientError() + } + + // Partition terms by type + // Scalar terms WITH queryType → encryptQueryBulk (explicit control) + const scalarWithQueryType: Array<{ term: QueryTerm; index: number }> = [] + // Scalar terms WITHOUT queryType → encryptBulk (auto-infer) + const scalarAutoInfer: Array<{ term: QueryTerm; index: number }> = [] + const jsonItemsWithIndex: JsonEncryptionItem[] = [] + + for (let i = 0; i < terms.length; i++) { + const term = terms[i] + + if (isScalarQueryTerm(term)) { + if (hasExplicitQueryType(term)) { + scalarWithQueryType.push({ term, index: i }) + } else { + scalarAutoInfer.push({ term, index: i }) + } + } else if (isJsonContainsQueryTerm(term)) { + // Validate ste_vec index + const columnConfig = term.column.build() + if (!columnConfig.indexes.ste_vec) { + throw new Error( + `Column "${term.column.getName()}" does not have ste_vec index configured. Use .searchableJson() when defining the column.`, + ) + } + + const prefix = `${term.table.tableName}/${term.column.getName()}` + const pairs = flattenJson(term.contains, prefix) + for (const pair of pairs) { + jsonItemsWithIndex.push({ + selector: pair.selector, + isContainment: true, + plaintext: pair.value, + column: term.column.getName(), + table: term.table.tableName, + queryOp: 'default', + }) + } + } else if (isJsonContainedByQueryTerm(term)) { + // Validate ste_vec index + const columnConfig = term.column.build() + if (!columnConfig.indexes.ste_vec) { + throw new Error( + `Column "${term.column.getName()}" does not have ste_vec index configured. Use .searchableJson() when defining the column.`, + ) + } + + const prefix = `${term.table.tableName}/${term.column.getName()}` + const pairs = flattenJson(term.containedBy, prefix) + for (const pair of pairs) { + jsonItemsWithIndex.push({ + selector: pair.selector, + isContainment: true, + plaintext: pair.value, + column: term.column.getName(), + table: term.table.tableName, + queryOp: 'default', + }) + } + } else if (isJsonPathQueryTerm(term)) { + // Validate ste_vec index + const columnConfig = term.column.build() + if (!columnConfig.indexes.ste_vec) { + throw new Error( + `Column "${term.column.getName()}" does not have ste_vec index configured. Use .searchableJson() when defining the column.`, + ) + } + + const prefix = `${term.table.tableName}/${term.column.getName()}` + + if (term.value !== undefined) { + const pathArray = Array.isArray(term.path) + ? term.path + : term.path.split('.') + const wrappedValue = buildNestedObject(pathArray, term.value) + jsonItemsWithIndex.push({ + selector: pathToSelector(term.path, prefix), + isContainment: false, + plaintext: wrappedValue, + column: term.column.getName(), + table: term.table.tableName, + queryOp: 'default', + }) + } + // Path-only terms (no value) don't need encryption + } + } + + // Encrypt scalar terms WITH explicit queryType using encryptQueryBulk + const scalarExplicitEncrypted = + scalarWithQueryType.length > 0 + ? await encryptQueryBulk(client, { + queries: scalarWithQueryType.map(({ term }) => { + if (!isScalarQueryTerm(term)) + throw new Error('Expected scalar term') + const query = { + plaintext: term.value, + column: term.column.getName(), + table: term.table.tableName, + indexType: queryTypeToFfi[term.queryType!], + queryOp: term.queryOp, + } + if (lockContextData) { + return { ...query, lockContext: lockContextData.context } + } + return query + }), + ...(lockContextData && { serviceToken: lockContextData.ctsToken }), + unverifiedContext: metadata, + }) + : [] + + // Encrypt scalar terms WITHOUT queryType using encryptBulk (auto-infer) + const scalarAutoInferEncrypted = + scalarAutoInfer.length > 0 + ? await encryptBulk(client, { + plaintexts: scalarAutoInfer.map(({ term }) => { + if (!isScalarQueryTerm(term)) + throw new Error('Expected scalar term') + const plaintext = { + plaintext: term.value, + column: term.column.getName(), + table: term.table.tableName, + } + if (lockContextData) { + return { ...plaintext, lockContext: lockContextData.context } + } + return plaintext + }), + ...(lockContextData && { serviceToken: lockContextData.ctsToken }), + unverifiedContext: metadata, + }) + : [] + + // Encrypt JSON terms with encryptQueryBulk (searchableJson index) + const jsonEncrypted = + jsonItemsWithIndex.length > 0 + ? await encryptQueryBulk(client, { + queries: jsonItemsWithIndex.map((item) => { + const query = { + plaintext: item.plaintext, + column: item.column, + table: item.table, + indexType: queryTypeToFfi.searchableJson, + queryOp: item.queryOp, + } + if (lockContextData) { + return { ...query, lockContext: lockContextData.context } + } + return query + }), + ...(lockContextData && { serviceToken: lockContextData.ctsToken }), + unverifiedContext: metadata, + }) + : [] + + // Reassemble results in original order + const results: EncryptedSearchTerm[] = new Array(terms.length) + let scalarExplicitIdx = 0 + let scalarAutoInferIdx = 0 + let jsonIdx = 0 + + for (let i = 0; i < terms.length; i++) { + const term = terms[i] + + if (isScalarQueryTerm(term)) { + // Determine which result array to pull from based on whether term had explicit queryType + let encrypted: Encrypted + if (hasExplicitQueryType(term)) { + encrypted = scalarExplicitEncrypted[scalarExplicitIdx] + scalarExplicitIdx++ + } else { + encrypted = scalarAutoInferEncrypted[scalarAutoInferIdx] + scalarAutoInferIdx++ + } + + if (term.returnType === 'composite-literal') { + results[i] = `(${JSON.stringify(JSON.stringify(encrypted))})` + } else if (term.returnType === 'escaped-composite-literal') { + results[i] = + `${JSON.stringify(`(${JSON.stringify(JSON.stringify(encrypted))})`)}` + } else { + results[i] = encrypted + } + } else if (isJsonContainsQueryTerm(term)) { + const prefix = `${term.table.tableName}/${term.column.getName()}` + const pairs = flattenJson(term.contains, prefix) + const svEntries: Array> = [] + + for (const pair of pairs) { + svEntries.push({ + ...jsonEncrypted[jsonIdx], + s: pair.selector, + }) + jsonIdx++ + } + + results[i] = { sv: svEntries } as Encrypted + } else if (isJsonContainedByQueryTerm(term)) { + const prefix = `${term.table.tableName}/${term.column.getName()}` + const pairs = flattenJson(term.containedBy, prefix) + const svEntries: Array> = [] + + for (const pair of pairs) { + svEntries.push({ + ...jsonEncrypted[jsonIdx], + s: pair.selector, + }) + jsonIdx++ + } + + results[i] = { sv: svEntries } as Encrypted + } else if (isJsonPathQueryTerm(term)) { + const prefix = `${term.table.tableName}/${term.column.getName()}` + + if (term.value !== undefined) { + const selector = pathToSelector(term.path, prefix) + results[i] = { + ...jsonEncrypted[jsonIdx], + s: selector, + } as Encrypted + jsonIdx++ + } else { + const selector = pathToSelector(term.path, prefix) + results[i] = { s: selector } as Encrypted + } + } + } + + return results +} + +/** + * @internal + * Operation for encrypting multiple query terms in batch. + * See {@link ProtectClient.encryptQuery} for the public interface. + */ +export class BatchEncryptQueryOperation extends ProtectOperation< + EncryptedSearchTerm[] +> { + private client: Client + private terms: readonly QueryTerm[] + + constructor(client: Client, terms: readonly QueryTerm[]) { + super() + this.client = client + this.terms = terms + } + + public async execute(): Promise> { + logger.debug('Encrypting batch query terms', { + termCount: this.terms.length, + }) + + return await withResult( + async () => { + const { metadata } = this.getAuditData() + return await encryptBatchQueryTermsHelper( + this.client, + this.terms, + metadata, + undefined, + ) + }, + (error) => ({ + type: ProtectErrorTypes.EncryptionError, + message: error.message, + }), + ) + } +} diff --git a/packages/protect/src/ffi/operations/encrypt-query.ts b/packages/protect/src/ffi/operations/encrypt-query.ts new file mode 100644 index 00000000..cccb24e8 --- /dev/null +++ b/packages/protect/src/ffi/operations/encrypt-query.ts @@ -0,0 +1,206 @@ +import { type Result, withResult } from '@byteslice/result' +import { + type JsPlaintext, + encryptBulk, + encryptQuery as ffiEncryptQuery, +} from '@cipherstash/protect-ffi' +import type { + ProtectColumn, + ProtectTable, + ProtectTableColumn, + ProtectValue, +} from '@cipherstash/schema' +import { type ProtectError, ProtectErrorTypes } from '../..' +import { logger } from '../../../../utils/logger' +import type { LockContext } from '../../identify' +import type { + Client, + EncryptQueryOptions, + Encrypted, + QueryTypeName, + QueryOpName, +} from '../../types' +import { queryTypeToFfi } from '../../types' +import { noClientError } from '../index' +import { ProtectOperation } from './base-operation' + +/** + * @internal + * Operation for encrypting a single query term. + * When queryType is provided, uses explicit query type control via ffiEncryptQuery. + * When queryType is omitted, auto-infers from column config via encryptBulk. + * See {@link ProtectClient.encryptQuery} for the public interface and documentation. + */ +export class EncryptQueryOperation extends ProtectOperation { + private client: Client + private plaintext: JsPlaintext | null + private column: ProtectColumn | ProtectValue + private table: ProtectTable + private queryType?: QueryTypeName + private queryOp?: QueryOpName + + constructor( + client: Client, + plaintext: JsPlaintext | null, + opts: EncryptQueryOptions, + ) { + super() + this.client = client + this.plaintext = plaintext + this.column = opts.column + this.table = opts.table + this.queryType = opts.queryType + this.queryOp = opts.queryOp + } + + public withLockContext( + lockContext: LockContext, + ): EncryptQueryOperationWithLockContext { + return new EncryptQueryOperationWithLockContext(this, lockContext) + } + + public async execute(): Promise> { + logger.debug('Encrypting query WITHOUT a lock context', { + column: this.column.getName(), + table: this.table.tableName, + queryType: this.queryType, + queryOp: this.queryOp, + }) + + return await withResult( + async () => { + if (!this.client) { + throw noClientError() + } + + if (this.plaintext === null) { + return null + } + + const { metadata } = this.getAuditData() + + // Use explicit query type if provided, otherwise auto-infer via encryptBulk + if (this.queryType !== undefined) { + return await ffiEncryptQuery(this.client, { + plaintext: this.plaintext, + column: this.column.getName(), + table: this.table.tableName, + indexType: queryTypeToFfi[this.queryType], + queryOp: this.queryOp, + unverifiedContext: metadata, + }) + } + + // Auto-infer query type via encryptBulk + const results = await encryptBulk(this.client, { + plaintexts: [ + { + plaintext: this.plaintext, + column: this.column.getName(), + table: this.table.tableName, + }, + ], + unverifiedContext: metadata, + }) + return results[0] + }, + (error) => ({ + type: ProtectErrorTypes.EncryptionError, + message: error.message, + }), + ) + } + + public getOperation(): { + client: Client + plaintext: JsPlaintext | null + column: ProtectColumn | ProtectValue + table: ProtectTable + queryType?: QueryTypeName + queryOp?: QueryOpName + } { + return { + client: this.client, + plaintext: this.plaintext, + column: this.column, + table: this.table, + queryType: this.queryType, + queryOp: this.queryOp, + } + } +} + +export class EncryptQueryOperationWithLockContext extends ProtectOperation { + private operation: EncryptQueryOperation + private lockContext: LockContext + + constructor(operation: EncryptQueryOperation, lockContext: LockContext) { + super() + this.operation = operation + this.lockContext = lockContext + } + + public async execute(): Promise> { + return await withResult( + async () => { + const { client, plaintext, column, table, queryType, queryOp } = + this.operation.getOperation() + + logger.debug('Encrypting query WITH a lock context', { + column: column.getName(), + table: table.tableName, + queryType, + queryOp, + }) + + if (!client) { + throw noClientError() + } + + if (plaintext === null) { + return null + } + + const { metadata } = this.getAuditData() + const context = await this.lockContext.getLockContext() + + if (context.failure) { + throw new Error(`[protect]: ${context.failure.message}`) + } + + // Use explicit query type if provided, otherwise auto-infer via encryptBulk + if (queryType !== undefined) { + return await ffiEncryptQuery(client, { + plaintext, + column: column.getName(), + table: table.tableName, + indexType: queryTypeToFfi[queryType], + queryOp, + lockContext: context.data.context, + serviceToken: context.data.ctsToken, + unverifiedContext: metadata, + }) + } + + // Auto-infer query type via encryptBulk with lock context + const results = await encryptBulk(client, { + plaintexts: [ + { + plaintext, + column: column.getName(), + table: table.tableName, + lockContext: context.data.context, + }, + ], + serviceToken: context.data.ctsToken, + unverifiedContext: metadata, + }) + return results[0] + }, + (error) => ({ + type: ProtectErrorTypes.EncryptionError, + message: error.message, + }), + ) + } +} diff --git a/packages/protect/src/ffi/operations/json-path-utils.ts b/packages/protect/src/ffi/operations/json-path-utils.ts new file mode 100644 index 00000000..b9127742 --- /dev/null +++ b/packages/protect/src/ffi/operations/json-path-utils.ts @@ -0,0 +1,60 @@ +import type { JsonPath } from '../../types' + +/** + * Converts a path to SteVec selector format: prefix/path/to/key + */ +export function pathToSelector(path: JsonPath, prefix: string): string { + const pathArray = Array.isArray(path) ? path : path.split('.') + return `${prefix}/${pathArray.join('/')}` +} + +/** + * Build a nested JSON object from a path array and a leaf value. + * E.g., ['user', 'role'], 'admin' => { user: { role: 'admin' } } + */ +export function buildNestedObject( + path: string[], + value: unknown, +): Record { + if (path.length === 0) { + return value as Record + } + if (path.length === 1) { + return { [path[0]]: value } + } + const [first, ...rest] = path + return { [first]: buildNestedObject(rest, value) } +} + +/** + * Flattens nested JSON into path-value pairs for containment queries. + * Returns the selector and a JSON object containing the value at the path. + */ +export function flattenJson( + obj: Record, + prefix: string, + currentPath: string[] = [], +): Array<{ selector: string; value: Record }> { + const results: Array<{ selector: string; value: Record }> = + [] + + for (const [key, value] of Object.entries(obj)) { + const newPath = [...currentPath, key] + + if (value !== null && typeof value === 'object' && !Array.isArray(value)) { + results.push( + ...flattenJson(value as Record, prefix, newPath), + ) + } else { + // Wrap the primitive value in a JSON object representing its path + // This is needed because ste_vec_term expects JSON objects + const wrappedValue = buildNestedObject(newPath, value) + results.push({ + selector: `${prefix}/${newPath.join('/')}`, + value: wrappedValue, + }) + } + } + + return results +} diff --git a/packages/protect/src/ffi/operations/search-terms.ts b/packages/protect/src/ffi/operations/search-terms.ts index 3949ee2e..cee4c4d6 100644 --- a/packages/protect/src/ffi/operations/search-terms.ts +++ b/packages/protect/src/ffi/operations/search-terms.ts @@ -1,11 +1,249 @@ import { type Result, withResult } from '@byteslice/result' -import { encryptBulk } from '@cipherstash/protect-ffi' +import { encryptBulk, encryptQueryBulk } from '@cipherstash/protect-ffi' import { type ProtectError, ProtectErrorTypes } from '../..' import { logger } from '../../../../utils/logger' -import type { Client, EncryptedSearchTerm, SearchTerm } from '../../types' +import type { Context, CtsToken, LockContext } from '../../identify' +import type { + Client, + Encrypted, + EncryptedSearchTerm, + JsPlaintext, + JsonContainmentSearchTerm, + JsonPathSearchTerm, + QueryOpName, + SearchTerm, + SimpleSearchTerm, +} from '../../types' +import { queryTypeToFfi } from '../../types' import { noClientError } from '../index' +import { buildNestedObject, flattenJson, pathToSelector } from './json-path-utils' import { ProtectOperation } from './base-operation' +/** + * Type guard to check if a search term is a JSON path search term + */ +function isJsonPathTerm(term: SearchTerm): term is JsonPathSearchTerm { + return 'path' in term +} + +/** + * Type guard to check if a search term is a JSON containment search term + */ +function isJsonContainmentTerm( + term: SearchTerm, +): term is JsonContainmentSearchTerm { + return 'containmentType' in term +} + +/** + * Type guard to check if a search term is a simple value search term + */ +function isSimpleSearchTerm(term: SearchTerm): term is SimpleSearchTerm { + return !isJsonPathTerm(term) && !isJsonContainmentTerm(term) +} + +/** Tracks which items belong to which term for reassembly */ +type JsonEncryptionItem = { + termIndex: number + selector: string + isContainment: boolean + plaintext: JsPlaintext + column: string + table: string + queryOp: QueryOpName +} + +/** + * Helper function to encrypt search terms + * Shared logic between SearchTermsOperation and SearchTermsOperationWithLockContext + * @param client The client to use for encryption + * @param terms The search terms to encrypt + * @param metadata Audit metadata for encryption + * @param lockContextData Optional lock context data { context: Context; ctsToken: CtsToken } + */ +async function encryptSearchTermsHelper( + client: Client, + terms: SearchTerm[], + metadata: Record | undefined, + lockContextData: { context: Context; ctsToken: CtsToken } | undefined, +): Promise { + if (!client) { + throw noClientError() + } + + // Partition terms by type + const simpleTermsWithIndex: Array<{ term: SimpleSearchTerm; index: number }> = + [] + const jsonItemsWithIndex: JsonEncryptionItem[] = [] + + for (let i = 0; i < terms.length; i++) { + const term = terms[i] + + if (isSimpleSearchTerm(term)) { + simpleTermsWithIndex.push({ term, index: i }) + } else if (isJsonContainmentTerm(term)) { + // Containment query - validate ste_vec index + const columnConfig = term.column.build() + + if (!columnConfig.indexes.ste_vec) { + throw new Error( + `Column "${term.column.getName()}" does not have ste_vec index configured. Use .searchableJson() when defining the column.`, + ) + } + + // Always use full table/column prefix + const prefix = `${term.table.tableName}/${term.column.getName()}` + + // Flatten and add all leaf values + const pairs = flattenJson(term.value, prefix) + for (const pair of pairs) { + jsonItemsWithIndex.push({ + termIndex: i, + selector: pair.selector, + isContainment: true, + plaintext: pair.value, + column: term.column.getName(), + table: term.table.tableName, + queryOp: 'default', + }) + } + } else if (isJsonPathTerm(term)) { + // Path query - validate ste_vec index + const columnConfig = term.column.build() + + if (!columnConfig.indexes.ste_vec) { + throw new Error( + `Column "${term.column.getName()}" does not have ste_vec index configured. Use .searchableJson() when defining the column.`, + ) + } + + // Always use full table/column prefix + const prefix = `${term.table.tableName}/${term.column.getName()}` + + if (term.value !== undefined) { + // Path query with value - wrap in nested object + const pathArray = Array.isArray(term.path) + ? term.path + : term.path.split('.') + const wrappedValue = buildNestedObject(pathArray, term.value) + jsonItemsWithIndex.push({ + termIndex: i, + selector: pathToSelector(term.path, prefix), + isContainment: false, + plaintext: wrappedValue, + column: term.column.getName(), + table: term.table.tableName, + queryOp: 'default', + }) + } + // Path-only terms (no value) don't need encryption + } + } + + // Encrypt simple terms with encryptBulk + const simpleEncrypted = + simpleTermsWithIndex.length > 0 + ? await encryptBulk(client, { + plaintexts: simpleTermsWithIndex.map(({ term }) => { + const plaintext = { + plaintext: term.value, + column: term.column.getName(), + table: term.table.tableName, + } + // Add lock context if provided + if (lockContextData) { + return { ...plaintext, lockContext: lockContextData.context } + } + return plaintext + }), + ...(lockContextData && { serviceToken: lockContextData.ctsToken }), + unverifiedContext: metadata, + }) + : [] + + // Encrypt JSON terms with encryptQueryBulk (searchableJson index) + const jsonEncrypted = + jsonItemsWithIndex.length > 0 + ? await encryptQueryBulk(client, { + queries: jsonItemsWithIndex.map((item) => { + const query = { + plaintext: item.plaintext, + column: item.column, + table: item.table, + indexType: queryTypeToFfi.searchableJson, + queryOp: item.queryOp, + } + // Add lock context if provided + if (lockContextData) { + return { ...query, lockContext: lockContextData.context } + } + return query + }), + ...(lockContextData && { serviceToken: lockContextData.ctsToken }), + unverifiedContext: metadata, + }) + : [] + + // Reassemble results in original order + const results: EncryptedSearchTerm[] = new Array(terms.length) + let simpleIdx = 0 + let jsonIdx = 0 + + for (let i = 0; i < terms.length; i++) { + const term = terms[i] + + if (isSimpleSearchTerm(term)) { + const encrypted = simpleEncrypted[simpleIdx] + simpleIdx++ + + // Apply return type formatting + if (term.returnType === 'composite-literal') { + results[i] = `(${JSON.stringify(JSON.stringify(encrypted))})` + } else if (term.returnType === 'escaped-composite-literal') { + results[i] = + `${JSON.stringify(`(${JSON.stringify(JSON.stringify(encrypted))})`)}` + } else { + results[i] = encrypted + } + } else if (isJsonContainmentTerm(term)) { + // Gather all encrypted values for this containment term + // Always use full table/column prefix + const prefix = `${term.table.tableName}/${term.column.getName()}` + const pairs = flattenJson(term.value, prefix) + const svEntries: Array> = [] + + for (const pair of pairs) { + svEntries.push({ + ...jsonEncrypted[jsonIdx], + s: pair.selector, + }) + jsonIdx++ + } + + results[i] = { sv: svEntries } as Encrypted + } else if (isJsonPathTerm(term)) { + // Always use full table/column prefix + const prefix = `${term.table.tableName}/${term.column.getName()}` + + if (term.value !== undefined) { + // Path query with value + const selector = pathToSelector(term.path, prefix) + results[i] = { + ...jsonEncrypted[jsonIdx], + s: selector, + } as Encrypted + jsonIdx++ + } else { + // Path-only (no value comparison) + const selector = pathToSelector(term.path, prefix) + results[i] = { s: selector } as Encrypted + } + } + } + + return results +} + export class SearchTermsOperation extends ProtectOperation< EncryptedSearchTerm[] > { @@ -25,32 +263,73 @@ export class SearchTermsOperation extends ProtectOperation< return await withResult( async () => { - if (!this.client) { - throw noClientError() - } - const { metadata } = this.getAuditData() - const encryptedSearchTerms = await encryptBulk(this.client, { - plaintexts: this.terms.map((term) => ({ - plaintext: term.value, - column: term.column.getName(), - table: term.table.tableName, - })), - unverifiedContext: metadata, - }) + // Call helper with no lock context + const results = await encryptSearchTermsHelper( + this.client, + this.terms, + metadata, + undefined, + ) - return this.terms.map((term, index) => { - if (term.returnType === 'composite-literal') { - return `(${JSON.stringify(JSON.stringify(encryptedSearchTerms[index]))})` - } + return results + }, + (error) => ({ + type: ProtectErrorTypes.EncryptionError, + message: error.message, + }), + ) + } - if (term.returnType === 'escaped-composite-literal') { - return `${JSON.stringify(`(${JSON.stringify(JSON.stringify(encryptedSearchTerms[index]))})`)}` - } + public withLockContext( + lockContext: LockContext, + ): SearchTermsOperationWithLockContext { + return new SearchTermsOperationWithLockContext(this, lockContext) + } - return encryptedSearchTerms[index] + public getOperation() { + return { client: this.client, terms: this.terms } + } +} + +export class SearchTermsOperationWithLockContext extends ProtectOperation< + EncryptedSearchTerm[] +> { + private operation: SearchTermsOperation + private lockContext: LockContext + + constructor(operation: SearchTermsOperation, lockContext: LockContext) { + super() + this.operation = operation + this.lockContext = lockContext + } + + public async execute(): Promise> { + return await withResult( + async () => { + const { client, terms } = this.operation.getOperation() + + logger.debug('Creating search terms WITH lock context', { + termCount: terms.length, }) + + const { metadata } = this.getAuditData() + const context = await this.lockContext.getLockContext() + + if (context.failure) { + throw new Error(`[protect]: ${context.failure.message}`) + } + + // Call helper with lock context + const results = await encryptSearchTermsHelper( + client, + terms, + metadata, + { context: context.data.context, ctsToken: context.data.ctsToken }, + ) + + return results }, (error) => ({ type: ProtectErrorTypes.EncryptionError, diff --git a/packages/protect/src/index.ts b/packages/protect/src/index.ts index 54d4a8d9..650cb3c4 100644 --- a/packages/protect/src/index.ts +++ b/packages/protect/src/index.ts @@ -98,6 +98,9 @@ export type { DecryptOperation } from './ffi/operations/decrypt' export type { DecryptModelOperation } from './ffi/operations/decrypt-model' export type { EncryptModelOperation } from './ffi/operations/encrypt-model' export type { EncryptOperation } from './ffi/operations/encrypt' +export type { SearchTermsOperation } from './ffi/operations/search-terms' +export type { EncryptQueryOperation } from './ffi/operations/encrypt-query' +export type { BatchEncryptQueryOperation } from './ffi/operations/batch-encrypt-query' export { csTable, csColumn, csValue } from '@cipherstash/schema' export type { @@ -116,4 +119,55 @@ export type { GetLockContextResponse, } from './identify' export * from './helpers' -export * from './types' + +// Explicitly export only the public types (not internal query types) +export type { + Client, + Encrypted, + EncryptedPayload, + EncryptedData, + SearchTerm, + SimpleSearchTerm, + KeysetIdentifier, + EncryptedSearchTerm, + EncryptPayload, + EncryptOptions, + EncryptQueryOptions, + EncryptedFields, + OtherFields, + DecryptedFields, + Decrypted, + BulkEncryptPayload, + BulkEncryptedData, + BulkDecryptPayload, + BulkDecryptedData, + DecryptionResult, + QuerySearchTerm, + JsonSearchTerm, + JsonPath, + JsonPathSearchTerm, + JsonContainmentSearchTerm, + // New unified QueryTerm types + QueryTerm, + ScalarQueryTermBase, + JsonQueryTermBase, + ScalarQueryTerm, + JsonPathQueryTerm, + JsonContainsQueryTerm, + JsonContainedByQueryTerm, + // Query option types (used in ScalarQueryTerm) + QueryTypeName, + QueryOpName, +} from './types' + +// Export queryTypes constant for explicit query type selection +export { queryTypes } from './types' + +// Export type guards +export { + isScalarQueryTerm, + isJsonPathQueryTerm, + isJsonContainsQueryTerm, + isJsonContainedByQueryTerm, +} from './query-term-guards' +export type { JsPlaintext } from '@cipherstash/protect-ffi' diff --git a/packages/protect/src/query-term-guards.ts b/packages/protect/src/query-term-guards.ts new file mode 100644 index 00000000..b313ddd6 --- /dev/null +++ b/packages/protect/src/query-term-guards.ts @@ -0,0 +1,64 @@ +import type { + JsonContainedByQueryTerm, + JsonContainsQueryTerm, + JsonPathQueryTerm, + QueryTerm, + ScalarQueryTerm, +} from './types' + +/** + * Type guard for scalar query terms. + * Scalar terms have 'value' but not JSON-specific properties (path, contains, containedBy). + * Note: queryType is now optional for scalar terms (auto-inferred when omitted). + */ +export function isScalarQueryTerm(term: QueryTerm): term is ScalarQueryTerm { + return ( + 'value' in term && + !('path' in term) && + !('contains' in term) && + !('containedBy' in term) + ) +} + +/** + * Type guard for JSON path query terms (have path) + */ +export function isJsonPathQueryTerm( + term: QueryTerm, +): term is JsonPathQueryTerm { + return 'path' in term +} + +/** + * Type guard for JSON contains query terms (have contains) + */ +export function isJsonContainsQueryTerm( + term: QueryTerm, +): term is JsonContainsQueryTerm { + return 'contains' in term +} + +/** + * Type guard for JSON containedBy query terms (have containedBy) + */ +export function isJsonContainedByQueryTerm( + term: QueryTerm, +): term is JsonContainedByQueryTerm { + return 'containedBy' in term +} + +/** + * Type guard to check if an array contains QueryTerm objects. + * Checks for QueryTerm-specific properties (column/table) to distinguish + * from JsPlaintext[] which can also be an array of objects. + */ +export function isQueryTermArray( + arr: readonly unknown[], +): arr is readonly QueryTerm[] { + return ( + arr.length > 0 && + typeof arr[0] === 'object' && + arr[0] !== null && + ('column' in arr[0] || 'table' in arr[0]) + ) +} diff --git a/packages/protect/src/types.ts b/packages/protect/src/types.ts index 7dc15705..872e3803 100644 --- a/packages/protect/src/types.ts +++ b/packages/protect/src/types.ts @@ -1,8 +1,68 @@ import type { Encrypted as CipherStashEncrypted, - JsPlaintext, + JsPlaintext as FfiJsPlaintext, newClient, } from '@cipherstash/protect-ffi' + +export type { JsPlaintext } from '@cipherstash/protect-ffi' + +/** + * Query type for query encryption operations. + * Matches the schema builder methods: .orderAndRange(), .freeTextSearch(), .equality(), .searchableJson() + * + * - `'orderAndRange'`: Order-Revealing Encryption for range queries (<, >, BETWEEN) + * {@link https://cipherstash.com/docs/platform/searchable-encryption/supported-queries/range | Range Queries} + * - `'freeTextSearch'`: Fuzzy/substring search + * {@link https://cipherstash.com/docs/platform/searchable-encryption/supported-queries/match | Match Queries} + * - `'equality'`: Exact equality matching + * {@link https://cipherstash.com/docs/platform/searchable-encryption/supported-queries/exact | Exact Queries} + * - `'searchableJson'`: Structured Text Encryption Vector for JSON path/containment queries + * {@link https://cipherstash.com/docs/platform/searchable-encryption/supported-queries/json | JSON Queries} + */ +export type QueryTypeName = 'orderAndRange' | 'freeTextSearch' | 'equality' | 'searchableJson' + +/** + * Internal FFI index type names. + * @internal + */ +export type FfiIndexTypeName = 'ore' | 'match' | 'unique' | 'ste_vec' + +/** + * Query type constants for use with encryptQuery(). + * + * @example + * import { queryTypes } from '@cipherstash/protect' + * await protectClient.encryptQuery('value', { + * column: users.email, + * table: users, + * queryType: queryTypes.freeTextSearch, + * }) + */ +export const queryTypes = { + orderAndRange: 'orderAndRange', + freeTextSearch: 'freeTextSearch', + equality: 'equality', + searchableJson: 'searchableJson', +} as const satisfies Record + +/** + * Maps user-friendly query type names to FFI index type names. + * @internal + */ +export const queryTypeToFfi: Record = { + orderAndRange: 'ore', + freeTextSearch: 'match', + equality: 'unique', + searchableJson: 'ste_vec', +} + +/** + * Query operation type for ste_vec index. + * - 'default': Standard JSON query using column's cast_type + * - 'ste_vec_selector': JSON path selection ($.user.email) + * - 'ste_vec_term': JSON containment (@>) + */ +export type QueryOpName = 'default' | 'ste_vec_selector' | 'ste_vec_term' import type { ProtectColumn, ProtectTable, @@ -33,15 +93,222 @@ export type EncryptedPayload = Encrypted | null export type EncryptedData = Encrypted | null /** - * Represents a value that will be encrypted and used in a search + * Simple search term for basic value encryption (original SearchTerm behavior) */ -export type SearchTerm = { - value: JsPlaintext +export type SimpleSearchTerm = { + value: FfiJsPlaintext column: ProtectColumn table: ProtectTable returnType?: 'eql' | 'composite-literal' | 'escaped-composite-literal' } +/** + * Represents a value that will be encrypted and used in a search. + * Can be a simple value search, JSON path search, or JSON containment search. + */ +export type SearchTerm = + | SimpleSearchTerm + | JsonPathSearchTerm + | JsonContainmentSearchTerm + +/** + * Options for encrypting a query term with encryptQuery(). + * + * When queryType is omitted, the query type is auto-inferred from the column configuration. + * When queryType is provided, it explicitly controls which index to use. + */ +export type EncryptQueryOptions = { + /** The column definition from the schema */ + column: ProtectColumn | ProtectValue + /** The table definition from the schema */ + table: ProtectTable + /** Which query type to use for the query (optional - auto-inferred if omitted) */ + queryType?: QueryTypeName + /** Query operation (defaults to 'default') */ + queryOp?: QueryOpName +} + +/** + * Individual query payload for bulk query operations. + * Used internally for query encryption operations. + * @deprecated This type is not directly used in the public API. Use QueryTerm types with encryptQuery() instead. + */ +export type QuerySearchTerm = { + /** The value to encrypt for querying */ + value: FfiJsPlaintext + /** The column definition */ + column: ProtectColumn | ProtectValue + /** The table definition */ + table: ProtectTable + /** Which query type to use */ + queryType: QueryTypeName + /** Query operation (optional, defaults to 'default') */ + queryOp?: QueryOpName + /** Return format for the encrypted result */ + returnType?: 'eql' | 'composite-literal' | 'escaped-composite-literal' +} + +/** + * Base type for scalar query terms (accepts ProtectColumn | ProtectValue) + */ +export type ScalarQueryTermBase = { + /** The column definition (can be ProtectColumn or ProtectValue) */ + column: ProtectColumn | ProtectValue + /** The table definition */ + table: ProtectTable + /** Return format for the encrypted result */ + returnType?: 'eql' | 'composite-literal' | 'escaped-composite-literal' +} + +/** + * Base type for JSON query terms (requires ProtectColumn for .build() access) + * Note: returnType is not supported for JSON terms as they return structured objects + */ +export type JsonQueryTermBase = { + /** The column definition (must be ProtectColumn with .searchableJson()) */ + column: ProtectColumn + /** The table definition */ + table: ProtectTable +} + +/** + * Scalar query term for standard column queries (equality, orderAndRange, freeTextSearch indexes). + * + * When queryType is omitted, the query type is auto-inferred from the column configuration. + * When queryType is provided, it explicitly controls which index to use. + * + * @example + * ```typescript + * // Auto-infer query type from column config + * const term: ScalarQueryTerm = { + * value: 'admin@example.com', + * column: users.email, + * table: users, + * } + * + * // Explicit query type control + * const term: ScalarQueryTerm = { + * value: 'admin@example.com', + * column: users.email, + * table: users, + * queryType: 'equality', + * } + * ``` + */ +export type ScalarQueryTerm = ScalarQueryTermBase & { + /** The value to encrypt for querying */ + value: FfiJsPlaintext + /** Which query type to use (optional - auto-inferred if omitted) */ + queryType?: QueryTypeName + /** Query operation (optional, defaults to 'default') */ + queryOp?: QueryOpName +} + +/** + * JSON path query term for searchableJson indexed columns. + * Query type is implicitly 'searchableJson'. + * Column must be defined with .searchableJson(). + * + * @example + * ```typescript + * const term: JsonPathQueryTerm = { + * path: 'user.email', + * value: 'admin@example.com', + * column: metadata, + * table: documents, + * } + * ``` + */ +export type JsonPathQueryTerm = JsonQueryTermBase & { + /** The path to navigate to in the JSON */ + path: JsonPath + /** The value to compare at the path (optional, for WHERE clauses) */ + value?: FfiJsPlaintext +} + +/** + * JSON containment query term for @> operator. + * Query type is implicitly 'searchableJson'. + * Column must be defined with .searchableJson(). + * + * @example + * ```typescript + * const term: JsonContainsQueryTerm = { + * contains: { status: 'active', role: 'admin' }, + * column: metadata, + * table: documents, + * } + * ``` + */ +export type JsonContainsQueryTerm = JsonQueryTermBase & { + /** The JSON object to search for (PostgreSQL @> operator) */ + contains: Record +} + +/** + * JSON containment query term for <@ operator. + * Query type is implicitly 'searchableJson'. + * Column must be defined with .searchableJson(). + * + * @example + * ```typescript + * const term: JsonContainedByQueryTerm = { + * containedBy: { permissions: ['read', 'write', 'admin'] }, + * column: metadata, + * table: documents, + * } + * ``` + */ +export type JsonContainedByQueryTerm = JsonQueryTermBase & { + /** The JSON object to be contained by (PostgreSQL <@ operator) */ + containedBy: Record +} + +/** + * Union type for all query term variants in batch encryptQuery operations. + */ +export type QueryTerm = + | ScalarQueryTerm + | JsonPathQueryTerm + | JsonContainsQueryTerm + | JsonContainedByQueryTerm + +/** + * JSON path - either dot-notation string ('user.email') or array of keys (['user', 'email']) + */ +export type JsonPath = string | string[] + +/** + * Search term for JSON containment queries (@> / <@) + */ +export type JsonContainmentSearchTerm = { + /** The JSON object or partial object to search for */ + value: Record + column: ProtectColumn + table: ProtectTable + /** Type of containment: 'contains' for @>, 'contained_by' for <@ */ + containmentType: 'contains' | 'contained_by' + returnType?: 'eql' | 'composite-literal' | 'escaped-composite-literal' +} + +/** + * Search term for JSON path access queries (-> / ->>) + */ +export type JsonPathSearchTerm = { + /** The path to navigate to in the JSON */ + path: JsonPath + /** The value to compare at the path (optional, for WHERE clauses) */ + value?: FfiJsPlaintext + column: ProtectColumn + table: ProtectTable + returnType?: 'eql' | 'composite-literal' | 'escaped-composite-literal' +} + +/** + * Union type for JSON search operations + */ +export type JsonSearchTerm = JsonContainmentSearchTerm | JsonPathSearchTerm + export type KeysetIdentifier = | { name: string @@ -61,7 +328,7 @@ export type EncryptedSearchTerm = Encrypted | string /** * Represents a payload to be encrypted using the `encrypt` function */ -export type EncryptPayload = JsPlaintext | null +export type EncryptPayload = FfiJsPlaintext | null /** * Represents the options for encrypting a payload using the `encrypt` function @@ -102,12 +369,12 @@ export type Decrypted = OtherFields & DecryptedFields */ export type BulkEncryptPayload = Array<{ id?: string - plaintext: JsPlaintext | null + plaintext: FfiJsPlaintext | null }> export type BulkEncryptedData = Array<{ id?: string; data: Encrypted }> export type BulkDecryptPayload = Array<{ id?: string; data: Encrypted }> -export type BulkDecryptedData = Array> +export type BulkDecryptedData = Array> type DecryptionSuccess = { error?: never diff --git a/packages/schema/__tests__/schema.test.ts b/packages/schema/__tests__/schema.test.ts index d1d99a51..7d2a117f 100644 --- a/packages/schema/__tests__/schema.test.ts +++ b/packages/schema/__tests__/schema.test.ts @@ -131,7 +131,7 @@ describe('Schema with nested columns', () => { }) // NOTE: Leaving this test commented out until stevec indexing for JSON is supported. - /*it('should handle ste_vec index for JSON columns', () => { + it('should handle ste_vec index for JSON columns', () => { const users = csTable('users', { json: csColumn('json').dataType('jsonb').searchableJson(), } as const) @@ -142,5 +142,5 @@ describe('Schema with nested columns', () => { expect(config.tables.users.json.indexes.ste_vec?.prefix).toEqual( 'users/json', ) - })*/ + }) }) diff --git a/packages/schema/__tests__/searchable-json.test.ts b/packages/schema/__tests__/searchable-json.test.ts new file mode 100644 index 00000000..ec8187cb --- /dev/null +++ b/packages/schema/__tests__/searchable-json.test.ts @@ -0,0 +1,41 @@ +import { describe, expect, it } from 'vitest' +import { buildEncryptConfig, csColumn, csTable } from '../src' + +describe('searchableJson schema method', () => { + it('should configure ste_vec index with correct prefix', () => { + const users = csTable('users', { + metadata: csColumn('metadata').searchableJson(), + }) + + const config = buildEncryptConfig(users) + + expect(config.tables.users.metadata.cast_as).toBe('json') + expect(config.tables.users.metadata.indexes.ste_vec).toBeDefined() + expect(config.tables.users.metadata.indexes.ste_vec?.prefix).toBe( + 'users/metadata', + ) + }) + + it('should allow chaining with other column methods', () => { + const users = csTable('users', { + data: csColumn('data').searchableJson(), + }) + + const config = buildEncryptConfig(users) + + expect(config.tables.users.data.cast_as).toBe('json') + expect(config.tables.users.data.indexes.ste_vec?.prefix).toBe('users/data') + }) + + it('should work alongside regular encrypted columns', () => { + const users = csTable('users', { + email: csColumn('email').equality(), + metadata: csColumn('metadata').searchableJson(), + }) + + const config = buildEncryptConfig(users) + + expect(config.tables.users.email.indexes.unique).toBeDefined() + expect(config.tables.users.metadata.indexes.ste_vec).toBeDefined() + }) +}) diff --git a/packages/schema/src/index.ts b/packages/schema/src/index.ts index b12b30de..706a2088 100644 --- a/packages/schema/src/index.ts +++ b/packages/schema/src/index.ts @@ -211,13 +211,16 @@ export class ProtectColumn { } /** - * Enable a STE Vec index, uses the column name for the index. + * Enable a STE Vec index for searchable JSON columns. + * This automatically sets the cast_as to 'json' and configures the ste_vec index. + * The prefix is automatically inferred as 'table/column' during build. */ - // NOTE: Leaving this commented out until stevec indexing for JSON is supported. - /*searchableJson() { + searchableJson() { + this.castAsValue = 'json' + // Use column name as temporary prefix; will be replaced with table/column during table build this.indexesValue.ste_vec = { prefix: this.columnName } return this - }*/ + } build() { return { @@ -265,11 +268,8 @@ export class ProtectTable { if (builder instanceof ProtectColumn) { const builtColumn = builder.build() - // Hanlde building the ste_vec index for JSON columns so users don't have to pass the prefix. - if ( - builtColumn.cast_as === 'json' && - builtColumn.indexes.ste_vec?.prefix === 'enabled' - ) { + // Set ste_vec prefix to table/column (overwriting any temporary prefix) + if (builtColumn.indexes.ste_vec) { builtColumns[colName] = { ...builtColumn, indexes: { @@ -342,7 +342,16 @@ export function buildEncryptConfig( for (const tb of protectTables) { const tableDef = tb.build() - config.tables[tableDef.tableName] = tableDef.columns + const tableName = tableDef.tableName + + // Set ste_vec prefix to table/column (overwriting any temporary prefix) + for (const [columnName, columnConfig] of Object.entries(tableDef.columns)) { + if (columnConfig.indexes.ste_vec) { + columnConfig.indexes.ste_vec.prefix = `${tableName}/${columnName}` + } + } + + config.tables[tableName] = tableDef.columns } return config diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3912a3b7..c096e189 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -512,8 +512,8 @@ importers: specifier: ^0.2.0 version: 0.2.2 '@cipherstash/protect-ffi': - specifier: 0.19.0 - version: 0.19.0 + specifier: 0.20.0 + version: 0.20.0 '@cipherstash/schema': specifier: workspace:* version: link:../schema @@ -1061,38 +1061,38 @@ packages: '@changesets/write@0.4.0': resolution: {integrity: sha512-CdTLvIOPiCNuH71pyDu3rA+Q0n65cmAbXnwWH84rKGiFumFzkmHNT8KHTMEchcxN+Kl8I54xGUhJ7l3E7X396Q==} - '@cipherstash/protect-ffi-darwin-arm64@0.19.0': - resolution: {integrity: sha512-0U/paHskpD2SCiy4T4s5Ery112n5MfT3cXudCZ0m82x03SiK5sU9SNtD2tI0tJhcUlDP9TsFUKnYEEZPFR8pUA==} + '@cipherstash/protect-ffi-darwin-arm64@0.20.0': + resolution: {integrity: sha512-XXUBMqKCbOJh9J+iVH9tcBIIFDUqHI5m2ttwDmgCyOALn6wkPSAXqQn32JsFYJa0RsYLjxU5MxvJ+AfTlvMj4Q==} cpu: [arm64] os: [darwin] - '@cipherstash/protect-ffi-darwin-x64@0.19.0': - resolution: {integrity: sha512-gbPomTjvBCO7eZsMLGzMVv0Al/TZQ3SOfLWCRzRdWzff3BIC+wPrqJJBbpxIb/WRG7Ak8ceRSdMkrnhQnlsYHA==} + '@cipherstash/protect-ffi-darwin-x64@0.20.0': + resolution: {integrity: sha512-3wcU4hneNOGFcDAxrxE6o1Swh3xYnuJTu7rA1Txp4STDgb64rhm7otTOgiP0kY82yX++gzU9yZfdR0ceYSBmJQ==} cpu: [x64] os: [darwin] - '@cipherstash/protect-ffi-linux-arm64-gnu@0.19.0': - resolution: {integrity: sha512-z4ZFJGrmxlsZM5arFyLeeiod8z5SONPYLYnQnO+HG9CH+ra2jRhCvA5qvPjF1+/7vL/zpuV+9MhVJGTt7Vo38A==} + '@cipherstash/protect-ffi-linux-arm64-gnu@0.20.0': + resolution: {integrity: sha512-JARa2NnlzpDvWoijuTrDHF8H/IVMeqcuWsEy2oxQI5MkQXL3PrbBwTJ++2oZ835/b6L80xebz6OBNNPTlyJq9Q==} cpu: [arm64] os: [linux] - '@cipherstash/protect-ffi-linux-x64-gnu@0.19.0': - resolution: {integrity: sha512-ZD3YSzGdgtN7Elsp4rKGBREvbhsYNIt5ywnme8JEgVID7UFENQK5WmsLr20ZbMT1C37TaMRS5ZuIS+loSZvE5Q==} + '@cipherstash/protect-ffi-linux-x64-gnu@0.20.0': + resolution: {integrity: sha512-WF0LjsUAV38IDGOcat6NIsEE37dnjV2oG1A5g0vG1SX91nQLWFsH6UaxwGzygOa/NOZKkULdHL16v0ziFntOmg==} cpu: [x64] os: [linux] - '@cipherstash/protect-ffi-linux-x64-musl@0.19.0': - resolution: {integrity: sha512-dngMn6EP2016fwJMg8yeZiJJ/lDOiZ5lkA8fMrVxkr/pv6t7x8m1pdbh4TuLA4OSozm2MLXFu/SZInPwdWZu/w==} + '@cipherstash/protect-ffi-linux-x64-musl@0.20.0': + resolution: {integrity: sha512-EDaX+cUORQxzREC5aZ1XuJRrycvAC1Fx2F4glb3XMACTCZXVVA7KPD5SJRTIWmPuAjHOGo8ZdXcvfjA0Xo7bDw==} cpu: [x64] os: [linux] - '@cipherstash/protect-ffi-win32-x64-msvc@0.19.0': - resolution: {integrity: sha512-A0WaKj+8WtO+synaMUbOy4a34/s7urJemXj5nC/8EKS8ppGcAJR5pZqV4+RV57j0pQSSR52BAvAenuQEGyKZPA==} + '@cipherstash/protect-ffi-win32-x64-msvc@0.20.0': + resolution: {integrity: sha512-5lTJVKwpoOpnKQGBnhVl0FwMV+eiqpoMMmQoqBreZwNOF/MwrI6f0gfyEz9oG+3tnKQrMcJ+X4HMU1RKPDRKpQ==} cpu: [x64] os: [win32] - '@cipherstash/protect-ffi@0.19.0': - resolution: {integrity: sha512-UfPwO2axmi4O18Wwv87wDg1aGU1RHIEZoWtb/nEYWQgXDOhYtKmWcKQic0MMednBeHAF972pNsrw9Dxhs0ZxXw==} + '@cipherstash/protect-ffi@0.20.0': + resolution: {integrity: sha512-SG5I03pqrGeVjC6+s26/fX84+ar+zGv9IDEipdFBB2ZYjEXuGE/dPd//AcF+jJU4Alldtt95cv0wIXMQbfWXCw==} '@clerk/backend@2.28.0': resolution: {integrity: sha512-rd0hWrU7VES/CEYwnyaXDDHzDXYIaSzI5G03KLUfxLyOQSChU0ZUeViDYyXEsjZgAQqiUP1TFykh9JU2YlaNYg==} @@ -1589,6 +1589,12 @@ packages: peerDependencies: eslint: ^6.0.0 || ^7.0.0 || >=8.0.0 + '@eslint-community/eslint-utils@4.9.1': + resolution: {integrity: sha512-phrYmNiYppR7znFEdqgfWHXR6NCkZEK7hwWDHZUjit/2/U0r6XvkDl0SYnoM51Hq7FhCGdLDT6zxCCOY1hexsQ==} + engines: {node: ^12.22.0 || ^14.17.0 || >=16.0.0} + peerDependencies: + eslint: ^6.0.0 || ^7.0.0 || >=8.0.0 + '@eslint-community/regexpp@4.12.2': resolution: {integrity: sha512-EriSTlt5OC9/7SXkRSCAhfSxxoSUgBm33OH+IkwbdpgoqsSsUg7y3uh+IICI/Qg4BBWr3U2i39RpmycbxMq4ew==} engines: {node: ^12.0.0 || ^14.0.0 || >=16.0.0} @@ -4247,8 +4253,8 @@ packages: engines: {node: '>=4'} hasBin: true - esquery@1.6.0: - resolution: {integrity: sha512-ca9pw9fomFcKPvFLXhBKUK90ZvGibiGOvRJNbjljY7s7uq/5YO4BOzcYtJqExdx99rF6aAcnRxHmcUHcz6sQsg==} + esquery@1.7.0: + resolution: {integrity: sha512-Ap6G0WQwcU/LHsvLwON1fAQX9Zp0A2Y6Y/cJBl9r/JbW90Zyg4/zbG6zzKa2OTALELarYHmKu0GhpM5EO+7T0g==} engines: {node: '>=0.10'} esrecurse@4.3.0: @@ -7432,34 +7438,34 @@ snapshots: human-id: 4.1.3 prettier: 2.8.8 - '@cipherstash/protect-ffi-darwin-arm64@0.19.0': + '@cipherstash/protect-ffi-darwin-arm64@0.20.0': optional: true - '@cipherstash/protect-ffi-darwin-x64@0.19.0': + '@cipherstash/protect-ffi-darwin-x64@0.20.0': optional: true - '@cipherstash/protect-ffi-linux-arm64-gnu@0.19.0': + '@cipherstash/protect-ffi-linux-arm64-gnu@0.20.0': optional: true - '@cipherstash/protect-ffi-linux-x64-gnu@0.19.0': + '@cipherstash/protect-ffi-linux-x64-gnu@0.20.0': optional: true - '@cipherstash/protect-ffi-linux-x64-musl@0.19.0': + '@cipherstash/protect-ffi-linux-x64-musl@0.20.0': optional: true - '@cipherstash/protect-ffi-win32-x64-msvc@0.19.0': + '@cipherstash/protect-ffi-win32-x64-msvc@0.20.0': optional: true - '@cipherstash/protect-ffi@0.19.0': + '@cipherstash/protect-ffi@0.20.0': dependencies: '@neon-rs/load': 0.1.82 optionalDependencies: - '@cipherstash/protect-ffi-darwin-arm64': 0.19.0 - '@cipherstash/protect-ffi-darwin-x64': 0.19.0 - '@cipherstash/protect-ffi-linux-arm64-gnu': 0.19.0 - '@cipherstash/protect-ffi-linux-x64-gnu': 0.19.0 - '@cipherstash/protect-ffi-linux-x64-musl': 0.19.0 - '@cipherstash/protect-ffi-win32-x64-msvc': 0.19.0 + '@cipherstash/protect-ffi-darwin-arm64': 0.20.0 + '@cipherstash/protect-ffi-darwin-x64': 0.20.0 + '@cipherstash/protect-ffi-linux-arm64-gnu': 0.20.0 + '@cipherstash/protect-ffi-linux-x64-gnu': 0.20.0 + '@cipherstash/protect-ffi-linux-x64-musl': 0.20.0 + '@cipherstash/protect-ffi-win32-x64-msvc': 0.20.0 '@clerk/backend@2.28.0(react-dom@19.2.3(react@19.2.3))(react@19.2.3)': dependencies: @@ -7763,6 +7769,11 @@ snapshots: eslint: 9.39.2(jiti@2.6.1) eslint-visitor-keys: 3.4.3 + '@eslint-community/eslint-utils@4.9.1(eslint@9.39.2(jiti@2.6.1))': + dependencies: + eslint: 9.39.2(jiti@2.6.1) + eslint-visitor-keys: 3.4.3 + '@eslint-community/regexpp@4.12.2': {} '@eslint/config-array@0.21.1': @@ -10402,7 +10413,7 @@ snapshots: eslint@9.39.2(jiti@2.6.1): dependencies: - '@eslint-community/eslint-utils': 4.9.0(eslint@9.39.2(jiti@2.6.1)) + '@eslint-community/eslint-utils': 4.9.1(eslint@9.39.2(jiti@2.6.1)) '@eslint-community/regexpp': 4.12.2 '@eslint/config-array': 0.21.1 '@eslint/config-helpers': 0.4.2 @@ -10422,7 +10433,7 @@ snapshots: eslint-scope: 8.4.0 eslint-visitor-keys: 4.2.1 espree: 10.4.0 - esquery: 1.6.0 + esquery: 1.7.0 esutils: 2.0.3 fast-deep-equal: 3.1.3 file-entry-cache: 8.0.0 @@ -10449,7 +10460,7 @@ snapshots: esprima@4.0.1: {} - esquery@1.6.0: + esquery@1.7.0: dependencies: estraverse: 5.3.0