From 967905f4f0cd167da4626a1f55148957914b32fc Mon Sep 17 00:00:00 2001 From: Hasan Jamil Date: Mon, 9 Sep 2024 15:48:30 +0530 Subject: [PATCH 01/24] feat(oracle): initial commit for vectors --- src/data-types.js | 59 ++++++++++- src/dialects/oracle/data-types.js | 21 +++- src/dialects/oracle/index.js | 5 +- src/dialects/oracle/query-generator.js | 130 ++++++++++++++++++++++++- 4 files changed, 210 insertions(+), 5 deletions(-) diff --git a/src/data-types.js b/src/data-types.js index f75ee2124ba7..04007927cc86 100644 --- a/src/data-types.js +++ b/src/data-types.js @@ -11,6 +11,7 @@ const { logger } = require('./utils/logger'); const warnings = {}; const { classToInvokable } = require('./utils/class-to-invokable'); const { joinSQLFragments } = require('./utils/join-sql-fragments'); +const Utils = require('./utils'); class ABSTRACT { toString(options) { @@ -957,6 +958,61 @@ class TSVECTOR extends ABSTRACT { } } +/** + * The VECTOR type stores vectors. + * + * Only available for Oracle Database >= 23ai + * + */ +class VECTOR extends ABSTRACT { + constructor(dimension, format) { + const options = typeof dimension === 'object' && dimension || { dimension, format }; + super(); + this.options = options; + this._length = typeof dimension === 'object' && dimension.dimension || dimension; + this._format = typeof dimension === 'object' && dimension.format || format; + // this.cosineDistance = cosineDistance; + // this.vectorDistance = vectorDistance; + // this.innerProduct = innerProduct; + // this.l1Distance = l1Distance; + // this.l2Distance = l2Distance; + // this.vectorDistance = vectorDistance; + } + validate(value) { + if (!Array.isArray(value)) { + throw new sequelizeErrors.ValidationError(util.format('%j is not a valid array', value)); + } + return true; + } + + cosineDistance(column, value, sequelize) { + return distance('COSINE_DISTANCE', column, value, sequelize); + } + + + innerProduct(column, value, sequelize) { + return distance('INNER_PRODUCT', column, value, sequelize); + } + + l1Distance(column, value, sequelize) { + return distance('L1_DISTANCE', column, value, sequelize); + } + + l2Distance(column, value, sequelize) { + return distance('L2_DISTANCE', column, value, sequelize); + } + + vectorDistance(column, value, sequelize) { + return distance('vector_distance', column, value, sequelize); + } +} + +function distance(distanceType, column, value, sequelize) { + const quotedColumn = column instanceof Utils.Literal ? column.val : sequelize.dialect.queryGenerator.quoteIdentifier(column); + const val = `VECTOR('[${value}]', ${value.length})`; + return `${distanceType}(${quotedColumn}, ${val})`; +} + /** * A convenience class holding commonly used data types. The data types are used when defining a new model using `Sequelize.define`, like this: * ```js @@ -1041,7 +1097,8 @@ const DataTypes = module.exports = { INET, MACADDR, CITEXT, - TSVECTOR + TSVECTOR, + VECTOR }; _.each(DataTypes, (dataType, name) => { diff --git a/src/dialects/oracle/data-types.js b/src/dialects/oracle/data-types.js index 2bb99a00debc..35a66bc728f4 100644 --- a/src/dialects/oracle/data-types.js +++ b/src/dialects/oracle/data-types.js @@ -30,6 +30,7 @@ module.exports = BaseTypes => { BaseTypes.REAL.types.oracle = ['BINARY_DOUBLE']; BaseTypes.DOUBLE.types.oracle = ['BINARY_DOUBLE']; BaseTypes.JSON.types.oracle = ['BLOB']; + BaseTypes.VECTOR.types.oracle = ['VECTOR']; BaseTypes.GEOMETRY.types.oracle = false; class STRING extends BaseTypes.STRING { @@ -459,6 +460,23 @@ module.exports = BaseTypes => { DATEONLY.prototype.escape = false; + class VECTOR extends BaseTypes.VECTOR { + toSql() { + if (this._length && this._format) { + return `VECTOR(${this._length}, ${this._format})`; + } if (this._length) { + return `VECTOR(${this._length}, *)`; + } + + return 'VECTOR(*, *)'; + } + + _getBindDef(oracledb) { + return { type: oracledb.DB_TYPE_VECTOR }; + } + + } + return { BOOLEAN, 'DOUBLE PRECISION': DOUBLE, @@ -481,6 +499,7 @@ module.exports = BaseTypes => { CHAR, JSON: JSONTYPE, REAL, - DECIMAL + DECIMAL, + VECTOR }; }; diff --git a/src/dialects/oracle/index.js b/src/dialects/oracle/index.js index 2e03d9f1585b..80a30b11d78a 100644 --- a/src/dialects/oracle/index.js +++ b/src/dialects/oracle/index.js @@ -36,8 +36,9 @@ OracleDialect.prototype.supports = _.merge(_.cloneDeep(AbstractDialect.prototype collate: false, length: false, parser: false, - type: false, - using: false + type: true, + operator: false, + using: true }, constraints: { restrict: false diff --git a/src/dialects/oracle/query-generator.js b/src/dialects/oracle/query-generator.js index 4392d722b003..2d3a134c2ee9 100644 --- a/src/dialects/oracle/query-generator.js +++ b/src/dialects/oracle/query-generator.js @@ -7,6 +7,7 @@ const DataTypes = require('../../data-types'); const AbstractQueryGenerator = require('../abstract/query-generator'); const _ = require('lodash'); const util = require('util'); +const Model = require('../../model'); const Transaction = require('../../transaction'); /** @@ -387,7 +388,134 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { if (typeof tableName !== 'string' && attributes.name) { attributes.name = `${tableName.schema}.${attributes.name}`; } - return super.addIndexQuery(tableName, attributes, options, rawTablename); + + options = options || {}; + + if (!Array.isArray(attributes)) { + options = attributes; + attributes = undefined; + } else { + options.fields = attributes; + } + + options.prefix = options.prefix || rawTablename || tableName; + if (options.prefix && typeof options.prefix === 'string') { + options.prefix = options.prefix.replace(/\./g, '_'); + options.prefix = options.prefix.replace(/("|')/g, ''); + } + + const fieldsSql = options.fields.map(field => { + if (field instanceof Utils.SequelizeMethod) { + return this.handleSequelizeMethod(field); + } + if (typeof field === 'string') { + field = { + name: field + }; + } + let result = ''; + + if (field.attribute) { + field.name = field.attribute; + } + + if (!field.name) { + throw new Error(`The following index field has no name: ${util.inspect(field)}`); + } + + result += this.quoteIdentifier(field.name); + + // if (this._dialect.supports.index.collate && field.collate) { + // result += ` COLLATE ${this.quoteIdentifier(field.collate)}`; + // } + + // if (this._dialect.supports.index.operator) { + // const operator = field.operator || options.operator; + // if (operator) { + // result += ` ${operator}`; + // } + // } + + if (this._dialect.supports.index.length && field.length) { + result += `(${field.length})`; + } + + if (field.order) { + result += ` ${field.order}`; + } + + return result; + }); + + if (!options.name) { + // Mostly for cases where addIndex is called directly by the user without an options object (for example in migrations) + // All calls that go through sequelize should already have a name + options = Utils.nameIndex(options, options.prefix); + } + + options = Model._conformIndex(options); + + // if (!this._dialect.supports.index.type) { + // delete options.type; + // } + + // if (options.where) { + // options.where = this.whereQuery(options.where); + // } + + if (typeof tableName === 'string') { + tableName = this.quoteIdentifiers(tableName); + } else { + tableName = this.quoteTable(tableName); + } + + let ind = ['CREATE']; + + if (options.type === 'VECTOR') { + let idxParameter = 'PARAMETERS (type '; + if (options.parameter) { + if (options.using === 'hnsw') { + idxParameter += 'hnsw'; + if (options.parameter.neighbor) { + idxParameter += `, neighbor ${options.parameter.neighbor}`; + } + if (options.parameter.efconstruction) { + idxParameter += `, efconstruction ${options.parameter.efconstruction}`; + } + } else { + idxParameter += 'ivf'; + if (options.parameter.partitions) { + idxParameter += `, NEIGHBOR PARTITION ${options.parameter.partitions}`; + } else if (options.parameter.samplesPerPartition) { + idxParameter += `, SAMPLES_PER_PARTITION ${options.parameter.samplesPerPartition}`; + } else if (options.parameter.minVectors) { + idxParameter += `, MIN_VECORS_PER_PARTITIONS ${options.parameter.minVectors}`; + } + } + idxParameter += ')'; + } + ind = ind.concat( + options.type, 'INDEX', + this.quoteIdentifiers(options.name), + `ON ${tableName}`, + `(${fieldsSql.join(', ')})`, + 'ORAGANIZATION ', + options.using === 'hnsw' ? 'INMEMORY NEIGHBOR GRAPH ' : 'NEIGHBOR PARTITION GRAPH ', + options.distance ? `WITH DISTANCE ${options.distance}` : 'WITH DISTANCE COSINE', + //with target accuracy + options.parameter ? idxParameter : '' + ); + } else { + ind = ind.concat( + options.unique ? 'UNIQUE' : '', + 'INDEX', + this.quoteIdentifiers(options.name), + `ON ${tableName}`, + `(${fieldsSql.join(', ')})` + ); + } + + return _.compact(ind).join(' '); } addConstraintQuery(tableName, options) { From 97453781d74c622be55f9bda1079172e29eefb00 Mon Sep 17 00:00:00 2001 From: Hasan Jamil Date: Mon, 16 Sep 2024 14:58:25 +0530 Subject: [PATCH 02/24] feat(oracle): remove commented code --- src/data-types.js | 6 ------ 1 file changed, 6 deletions(-) diff --git a/src/data-types.js b/src/data-types.js index 04007927cc86..dd2d3777927a 100644 --- a/src/data-types.js +++ b/src/data-types.js @@ -971,12 +971,6 @@ class VECTOR extends ABSTRACT { this.options = options; this._length = typeof dimension === 'object' && dimension.dimension || dimension; this._format = typeof dimension === 'object' && dimension.format || format; - // this.cosineDistance = cosineDistance; - // this.vectorDistance = vectorDistance; - // this.innerProduct = innerProduct; - // this.l1Distance = l1Distance; - // this.l2Distance = l2Distance; - // this.vectorDistance = vectorDistance; } validate(value) { if (!Array.isArray(value)) { From f912f9b8be35e5af4a7b7a2725a0357269d56ac4 Mon Sep 17 00:00:00 2001 From: Hasan Jamil Date: Tue, 24 Sep 2024 13:03:35 +0530 Subject: [PATCH 03/24] feat(oracle): add validation --- package.json | 2 +- src/data-types.js | 2 +- src/dialects/oracle/data-types.js | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index bc69e634ae69..442216d09504 100644 --- a/package.json +++ b/package.json @@ -109,7 +109,7 @@ "mysql2": "^2.3.3", "node-hook": "^1.0.0", "nyc": "^15.1.0", - "oracledb": "^5.5.0", + "oracledb": "^6.6.0", "p-map": "^4.0.0", "p-props": "^4.0.0", "p-settle": "^4.1.1", diff --git a/src/data-types.js b/src/data-types.js index dd2d3777927a..3cdde92f573a 100644 --- a/src/data-types.js +++ b/src/data-types.js @@ -973,7 +973,7 @@ class VECTOR extends ABSTRACT { this._format = typeof dimension === 'object' && dimension.format || format; } validate(value) { - if (!Array.isArray(value)) { + if (!ArrayBuffer.isView(value)) { throw new sequelizeErrors.ValidationError(util.format('%j is not a valid array', value)); } return true; diff --git a/src/dialects/oracle/data-types.js b/src/dialects/oracle/data-types.js index 35a66bc728f4..b5d49167c9b8 100644 --- a/src/dialects/oracle/data-types.js +++ b/src/dialects/oracle/data-types.js @@ -461,6 +461,9 @@ module.exports = BaseTypes => { DATEONLY.prototype.escape = false; class VECTOR extends BaseTypes.VECTOR { + constructor(dimension, format) { + super(dimension, format); + } toSql() { if (this._length && this._format) { return `VECTOR(${this._length}, ${this._format})`; From 4b91c3d16a5151dd8472814ac4691cf22395720e Mon Sep 17 00:00:00 2001 From: Hasan Jamil Date: Tue, 24 Sep 2024 15:57:17 +0530 Subject: [PATCH 04/24] feat(oracle): move code to racle layer --- src/data-types.js | 28 ---------------------------- src/dialects/oracle/data-types.js | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 28 deletions(-) diff --git a/src/data-types.js b/src/data-types.js index 3cdde92f573a..ec6d01a2bda5 100644 --- a/src/data-types.js +++ b/src/data-types.js @@ -11,7 +11,6 @@ const { logger } = require('./utils/logger'); const warnings = {}; const { classToInvokable } = require('./utils/class-to-invokable'); const { joinSQLFragments } = require('./utils/join-sql-fragments'); -const Utils = require('./utils'); class ABSTRACT { toString(options) { @@ -978,33 +977,6 @@ class VECTOR extends ABSTRACT { } return true; } - - cosineDistance(column, value, sequelize) { - return distance('COSINE_DISTANCE', column, value, sequelize); - } - - - innerProduct(column, value, sequelize) { - return distance('INNER_PRODUCT', column, value, sequelize); - } - - l1Distance(column, value, sequelize) { - return distance('L1_DISTANCE', column, value, sequelize); - } - - l2Distance(column, value, sequelize) { - return distance('L2_DISTANCE', column, value, sequelize); - } - - vectorDistance(column, value, sequelize) { - return distance('vector_distance', column, value, sequelize); - } -} - -function distance(distanceType, column, value, sequelize) { - const quotedColumn = column instanceof Utils.Literal ? column.val : sequelize.dialect.queryGenerator.quoteIdentifier(column); - const val = `VECTOR('[${value}]', ${value.length})`; - return `${distanceType}(${quotedColumn}, ${val})`; } /** diff --git a/src/dialects/oracle/data-types.js b/src/dialects/oracle/data-types.js index b5d49167c9b8..5cca73e6e9b6 100644 --- a/src/dialects/oracle/data-types.js +++ b/src/dialects/oracle/data-types.js @@ -4,6 +4,7 @@ const moment = require('moment'); const momentTz = require('moment-timezone'); +const Utils = require('../../utils'); module.exports = BaseTypes => { const warn = BaseTypes.ABSTRACT.warn.bind( @@ -463,6 +464,8 @@ module.exports = BaseTypes => { class VECTOR extends BaseTypes.VECTOR { constructor(dimension, format) { super(dimension, format); + this._length = typeof dimension === 'object' && dimension.dimension || dimension; + this._format = typeof dimension === 'object' && dimension.format || format; } toSql() { if (this._length && this._format) { @@ -478,6 +481,33 @@ module.exports = BaseTypes => { return { type: oracledb.DB_TYPE_VECTOR }; } + + } + + BaseTypes.VECTOR.prototype.cosineDistance = function cosineDistance(column, value, sequelize) { + return distance('COSINE_DISTANCE', column, value, sequelize); + }; + + BaseTypes.VECTOR.prototype.innerProduct = function innerProduct(column, value, sequelize) { + return distance('INNER_PRODUCT', column, value, sequelize); + }; + + BaseTypes.VECTOR.prototype.l1Distance = function l1Distance(column, value, sequelize) { + return distance('L1_DISTANCE', column, value, sequelize); + }; + + BaseTypes.VECTOR.prototype.l2Distance = function l2Distance(column, value, sequelize) { + return distance('L2_DISTANCE', column, value, sequelize); + }; + + BaseTypes.VECTOR.prototype.vectorDistance = function vectorDistance(column, value, sequelize) { + return distance('vector_distance', column, value, sequelize); + }; + + function distance(distanceType, column, value, sequelize) { + const quotedColumn = column instanceof Utils.Literal ? column.val : sequelize.dialect.queryGenerator.quoteIdentifier(column); + const val = `VECTOR('[${value}]', ${value.length})`; + return `${distanceType}(${quotedColumn}, ${val})`; } return { From c7b887f2ebe4b3e6018717226bc0534012a4a2df Mon Sep 17 00:00:00 2001 From: Hasan Jamil Date: Fri, 27 Sep 2024 12:22:12 +0530 Subject: [PATCH 05/24] feat(oracle): add hnsw by default for indexing --- src/dialects/oracle/query-generator.js | 1 + 1 file changed, 1 insertion(+) diff --git a/src/dialects/oracle/query-generator.js b/src/dialects/oracle/query-generator.js index 2d3a134c2ee9..ce1d30bffcd6 100644 --- a/src/dialects/oracle/query-generator.js +++ b/src/dialects/oracle/query-generator.js @@ -473,6 +473,7 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { if (options.type === 'VECTOR') { let idxParameter = 'PARAMETERS (type '; + options.using = options.using || 'hnsw'; if (options.parameter) { if (options.using === 'hnsw') { idxParameter += 'hnsw'; From ce2b4effab95da485805a99d5b24d5a1c737b8e9 Mon Sep 17 00:00:00 2001 From: Hasan Jamil Date: Mon, 30 Sep 2024 11:51:59 +0530 Subject: [PATCH 06/24] feat(oracle): add test cases, type definition --- src/data-types.d.ts | 5 +++ src/data-types.js | 6 +-- src/dialects/oracle/data-types.js | 10 ++--- test/unit/dialects/oracle/vector.test.js | 54 ++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 10 deletions(-) create mode 100644 test/unit/dialects/oracle/vector.test.js diff --git a/src/data-types.d.ts b/src/data-types.d.ts index 2eb62626a331..083f9b7959e5 100644 --- a/src/data-types.d.ts +++ b/src/data-types.d.ts @@ -614,5 +614,10 @@ export const CITEXT: AbstractDataTypeConstructor; */ export const TSVECTOR: AbstractDataTypeConstructor; +/** + * VECTOR. Only available in Oracle Database. + */ +export const VECTOR: AbstractDataTypeConstructor; + // umzug compatibility export type DataTypeAbstract = AbstractDataTypeConstructor; diff --git a/src/data-types.js b/src/data-types.js index ec6d01a2bda5..406e2237b2ff 100644 --- a/src/data-types.js +++ b/src/data-types.js @@ -965,11 +965,11 @@ class TSVECTOR extends ABSTRACT { */ class VECTOR extends ABSTRACT { constructor(dimension, format) { - const options = typeof dimension === 'object' && dimension || { dimension, format }; super(); + const options = typeof dimension === 'object' && dimension || { dimension, format }; this.options = options; - this._length = typeof dimension === 'object' && dimension.dimension || dimension; - this._format = typeof dimension === 'object' && dimension.format || format; + this._format = options.format; + this._length = options.dimension; } validate(value) { if (!ArrayBuffer.isView(value)) { diff --git a/src/dialects/oracle/data-types.js b/src/dialects/oracle/data-types.js index 5cca73e6e9b6..4fc92c99d617 100644 --- a/src/dialects/oracle/data-types.js +++ b/src/dialects/oracle/data-types.js @@ -462,15 +462,11 @@ module.exports = BaseTypes => { DATEONLY.prototype.escape = false; class VECTOR extends BaseTypes.VECTOR { - constructor(dimension, format) { - super(dimension, format); - this._length = typeof dimension === 'object' && dimension.dimension || dimension; - this._format = typeof dimension === 'object' && dimension.format || format; - } toSql() { if (this._length && this._format) { - return `VECTOR(${this._length}, ${this._format})`; - } if (this._length) { + return `VECTOR(${this._length}, ${this._format.toUpperCase()})`; + } + if (this._length) { return `VECTOR(${this._length}, *)`; } diff --git a/test/unit/dialects/oracle/vector.test.js b/test/unit/dialects/oracle/vector.test.js new file mode 100644 index 000000000000..7772ab2bb7c1 --- /dev/null +++ b/test/unit/dialects/oracle/vector.test.js @@ -0,0 +1,54 @@ +'use strict'; + +const Support = require('../../support'); +const DataTypes = require('sequelize/lib/data-types'); +const expectsql = Support.expectsql; +const current = Support.sequelize; +const sql = current.dialect.queryGenerator; + +if (current.dialect.name === 'oracle') { + describe('VECTOR datatype', () => { + const FooUser = current.define('user', { + vecCol: { + type: DataTypes.VECTOR, + allowNull: false + } + }); + + it('creates table with vector datatype', () => { + expectsql(sql.createTableQuery(FooUser.getTableName(), sql.attributesToSQL(FooUser.rawAttributes), { }), { + default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "vecCol" VECTOR(*, *) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); + }); + + }); + + describe('VECTOR datatype with dimension and format', () => { + const FooUser = current.define('user', { + vecCol: { + type: DataTypes.VECTOR(3, 'float32'), + allowNull: false + } + }); + + it('creates table with vector datatype', () => { + expectsql(sql.createTableQuery(FooUser.getTableName(), sql.attributesToSQL(FooUser.rawAttributes), { }), { + default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "vecCol" VECTOR(3, FLOAT32) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); + }); + + }); + + describe('VECTOR datatype(binary)', () => { + const FooUser = current.define('user', { + vecCol: { + type: DataTypes.VECTOR(16, 'binary'), + allowNull: false + } + }); + + it('creates table with vector datatype', () => { + expectsql(sql.createTableQuery(FooUser.getTableName(), sql.attributesToSQL(FooUser.rawAttributes), { }), { + default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "vecCol" VECTOR(16, BINARY) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); + }); + + }); +} \ No newline at end of file From 27a33c66ae5f4e13f50d2870442f184b5b209e96 Mon Sep 17 00:00:00 2001 From: Hasan Jamil Date: Mon, 30 Sep 2024 15:57:27 +0530 Subject: [PATCH 07/24] feat(oracle): add index test-cases --- src/dialects/oracle/query-generator.js | 8 ++++--- test/unit/dialects/oracle/vector.test.js | 28 +++++++++++++++++++++++- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/src/dialects/oracle/query-generator.js b/src/dialects/oracle/query-generator.js index ce1d30bffcd6..60ab3c4631dd 100644 --- a/src/dialects/oracle/query-generator.js +++ b/src/dialects/oracle/query-generator.js @@ -487,9 +487,11 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { idxParameter += 'ivf'; if (options.parameter.partitions) { idxParameter += `, NEIGHBOR PARTITION ${options.parameter.partitions}`; - } else if (options.parameter.samplesPerPartition) { + } + if (options.parameter.samplesPerPartition) { idxParameter += `, SAMPLES_PER_PARTITION ${options.parameter.samplesPerPartition}`; - } else if (options.parameter.minVectors) { + } + if (options.parameter.minVectors) { idxParameter += `, MIN_VECORS_PER_PARTITIONS ${options.parameter.minVectors}`; } } @@ -502,7 +504,7 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { `(${fieldsSql.join(', ')})`, 'ORAGANIZATION ', options.using === 'hnsw' ? 'INMEMORY NEIGHBOR GRAPH ' : 'NEIGHBOR PARTITION GRAPH ', - options.distance ? `WITH DISTANCE ${options.distance}` : 'WITH DISTANCE COSINE', + options.distance ? `WITH DISTANCE ${options.distance}` : '', //with target accuracy options.parameter ? idxParameter : '' ); diff --git a/test/unit/dialects/oracle/vector.test.js b/test/unit/dialects/oracle/vector.test.js index 7772ab2bb7c1..b4352c1cbc8f 100644 --- a/test/unit/dialects/oracle/vector.test.js +++ b/test/unit/dialects/oracle/vector.test.js @@ -1,5 +1,4 @@ 'use strict'; - const Support = require('../../support'); const DataTypes = require('sequelize/lib/data-types'); const expectsql = Support.expectsql; @@ -51,4 +50,31 @@ if (current.dialect.name === 'oracle') { }); }); + + describe('Vector Index', () => { + it('default', () => { + expectsql(sql.addIndexQuery('Foo', ['vec1'], { type: 'VECTOR' }), { + default: 'CREATE VECTOR INDEX "foo_vec1" ON "Foo" ("vec1") ORAGANIZATION INMEMORY NEIGHBOR GRAPH' }); + }); + + it('type and using(hnsw)', () => { + expectsql(sql.addIndexQuery('foo', ['vec1'], { type: 'VECTOR', using: 'hnsw' }), { + default: 'CREATE VECTOR INDEX "foo_vec1" ON "foo" ("vec1") ORAGANIZATION INMEMORY NEIGHBOR GRAPH' }); + }); + + it('type and using(ivf)', () => { + expectsql(sql.addIndexQuery('foo', ['vec1'], { type: 'VECTOR', using: 'ivf' }), { + default: 'CREATE VECTOR INDEX "foo_vec1" ON "foo" ("vec1") ORAGANIZATION NEIGHBOR PARTITION GRAPH' }); + }); + + it('hnsw parameter', () => { + expectsql(sql.addIndexQuery('foo', ['vec1'], { type: 'VECTOR', using: 'hnsw', parameter: { neighbor: 10, efconstruction: 10 } }), { + default: 'CREATE VECTOR INDEX "foo_vec1" ON "foo" ("vec1") ORAGANIZATION INMEMORY NEIGHBOR GRAPH PARAMETERS (type hnsw, neighbor 10, efconstruction 10)' }); + }); + + it('ivf parameter', () => { + expectsql(sql.addIndexQuery('foo', ['vec1'], { type: 'VECTOR', using: 'ivf', parameter: { partitions: 5, samplesPerPartition: 10, minVectors: 10 } }), { + default: 'CREATE VECTOR INDEX "foo_vec1" ON "foo" ("vec1") ORAGANIZATION NEIGHBOR PARTITION GRAPH PARAMETERS (type ivf, NEIGHBOR PARTITION 5, SAMPLES_PER_PARTITION 10, MIN_VECORS_PER_PARTITIONS 10)' }); + }); + }); } \ No newline at end of file From c461d72abfd44a93a19c7c645aae5e1f85c3e904 Mon Sep 17 00:00:00 2001 From: Hasan Jamil Date: Thu, 24 Oct 2024 11:32:48 +0530 Subject: [PATCH 08/24] feat(oracle): add test cases for where and orderby --- test/unit/dialects/oracle/vector.test.js | 164 ++++++++++++++++------- 1 file changed, 113 insertions(+), 51 deletions(-) diff --git a/test/unit/dialects/oracle/vector.test.js b/test/unit/dialects/oracle/vector.test.js index b4352c1cbc8f..7e50aa82d40a 100644 --- a/test/unit/dialects/oracle/vector.test.js +++ b/test/unit/dialects/oracle/vector.test.js @@ -1,80 +1,142 @@ 'use strict'; + +const util = require('util'); const Support = require('../../support'); const DataTypes = require('sequelize/lib/data-types'); const expectsql = Support.expectsql; const current = Support.sequelize; const sql = current.dialect.queryGenerator; +const Op = Support.Sequelize.Op; if (current.dialect.name === 'oracle') { - describe('VECTOR datatype', () => { - const FooUser = current.define('user', { - vecCol: { - type: DataTypes.VECTOR, - allowNull: false - } - }); + describe('VECTORS', () => { + describe('VECTOR datatype', () => { + const FooUser = current.define('user', { + vecCol: { + type: DataTypes.VECTOR, + allowNull: false + } + }); + + it('creates table with vector datatype', () => { + expectsql(sql.createTableQuery(FooUser.getTableName(), sql.attributesToSQL(FooUser.rawAttributes), { }), { + default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "vecCol" VECTOR(*, *) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); + }); - it('creates table with vector datatype', () => { - expectsql(sql.createTableQuery(FooUser.getTableName(), sql.attributesToSQL(FooUser.rawAttributes), { }), { - default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "vecCol" VECTOR(*, *) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); }); - }); + describe('VECTOR datatype with dimension and format', () => { + const FooUser = current.define('user', { + vecCol: { + type: DataTypes.VECTOR(3, 'float32'), + allowNull: false + } + }); - describe('VECTOR datatype with dimension and format', () => { - const FooUser = current.define('user', { - vecCol: { - type: DataTypes.VECTOR(3, 'float32'), - allowNull: false - } - }); + it('creates table with vector datatype', () => { + expectsql(sql.createTableQuery(FooUser.getTableName(), sql.attributesToSQL(FooUser.rawAttributes), { }), { + default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "vecCol" VECTOR(3, FLOAT32) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); + }); - it('creates table with vector datatype', () => { - expectsql(sql.createTableQuery(FooUser.getTableName(), sql.attributesToSQL(FooUser.rawAttributes), { }), { - default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "vecCol" VECTOR(3, FLOAT32) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); }); - }); + describe('VECTOR datatype(binary)', () => { + const FooUser = current.define('user', { + vecCol: { + type: DataTypes.VECTOR(16, 'binary'), + allowNull: false + } + }); - describe('VECTOR datatype(binary)', () => { - const FooUser = current.define('user', { - vecCol: { - type: DataTypes.VECTOR(16, 'binary'), - allowNull: false - } - }); + it('creates table with vector datatype', () => { + expectsql(sql.createTableQuery(FooUser.getTableName(), sql.attributesToSQL(FooUser.rawAttributes), { }), { + default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "vecCol" VECTOR(16, BINARY) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); + }); - it('creates table with vector datatype', () => { - expectsql(sql.createTableQuery(FooUser.getTableName(), sql.attributesToSQL(FooUser.rawAttributes), { }), { - default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "vecCol" VECTOR(16, BINARY) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); }); - }); + describe('Vector Index', () => { + it('default', () => { + expectsql(sql.addIndexQuery('Foo', ['vec1'], { type: 'VECTOR' }), { + default: 'CREATE VECTOR INDEX "foo_vec1" ON "Foo" ("vec1") ORAGANIZATION INMEMORY NEIGHBOR GRAPH' }); + }); - describe('Vector Index', () => { - it('default', () => { - expectsql(sql.addIndexQuery('Foo', ['vec1'], { type: 'VECTOR' }), { - default: 'CREATE VECTOR INDEX "foo_vec1" ON "Foo" ("vec1") ORAGANIZATION INMEMORY NEIGHBOR GRAPH' }); - }); + it('type and using(hnsw)', () => { + expectsql(sql.addIndexQuery('foo', ['vec1'], { type: 'VECTOR', using: 'hnsw' }), { + default: 'CREATE VECTOR INDEX "foo_vec1" ON "foo" ("vec1") ORAGANIZATION INMEMORY NEIGHBOR GRAPH' }); + }); - it('type and using(hnsw)', () => { - expectsql(sql.addIndexQuery('foo', ['vec1'], { type: 'VECTOR', using: 'hnsw' }), { - default: 'CREATE VECTOR INDEX "foo_vec1" ON "foo" ("vec1") ORAGANIZATION INMEMORY NEIGHBOR GRAPH' }); - }); + it('type and using(ivf)', () => { + expectsql(sql.addIndexQuery('foo', ['vec1'], { type: 'VECTOR', using: 'ivf' }), { + default: 'CREATE VECTOR INDEX "foo_vec1" ON "foo" ("vec1") ORAGANIZATION NEIGHBOR PARTITION GRAPH' }); + }); - it('type and using(ivf)', () => { - expectsql(sql.addIndexQuery('foo', ['vec1'], { type: 'VECTOR', using: 'ivf' }), { - default: 'CREATE VECTOR INDEX "foo_vec1" ON "foo" ("vec1") ORAGANIZATION NEIGHBOR PARTITION GRAPH' }); + it('hnsw parameter', () => { + expectsql(sql.addIndexQuery('foo', ['vec1'], { type: 'VECTOR', using: 'hnsw', parameter: { neighbor: 10, efconstruction: 10 } }), { + default: 'CREATE VECTOR INDEX "foo_vec1" ON "foo" ("vec1") ORAGANIZATION INMEMORY NEIGHBOR GRAPH PARAMETERS (type hnsw, neighbor 10, efconstruction 10)' }); + }); + + it('ivf parameter', () => { + expectsql(sql.addIndexQuery('foo', ['vec1'], { type: 'VECTOR', using: 'ivf', parameter: { partitions: 5, samplesPerPartition: 10, minVectors: 10 } }), { + default: 'CREATE VECTOR INDEX "foo_vec1" ON "foo" ("vec1") ORAGANIZATION NEIGHBOR PARTITION GRAPH PARAMETERS (type ivf, NEIGHBOR PARTITION 5, SAMPLES_PER_PARTITION 10, MIN_VECORS_PER_PARTITIONS 10)' }); + }); }); - it('hnsw parameter', () => { - expectsql(sql.addIndexQuery('foo', ['vec1'], { type: 'VECTOR', using: 'hnsw', parameter: { neighbor: 10, efconstruction: 10 } }), { - default: 'CREATE VECTOR INDEX "foo_vec1" ON "foo" ("vec1") ORAGANIZATION INMEMORY NEIGHBOR GRAPH PARAMETERS (type hnsw, neighbor 10, efconstruction 10)' }); + describe('Vector where clause', () => { + const val = [1, 2, 3]; + + const testsql = function(key, value, options, expectation) { + if (expectation === undefined) { + expectation = options; + options = undefined; + } + + it(`${String(key)}: ${util.inspect(value, { depth: 10 })}${options && `, ${util.inspect(options)}` || ''}`, () => { + return expectsql(sql.whereItemQuery(key, value, options), expectation); + }); + }; + + testsql(current.literal(`${DataTypes.VECTOR().vectorDistance('embedding', val, current)}`), { + [Op.lt]: 2 + }, { + oracle: 'vector_distance("embedding", VECTOR(\'[1,2,3]\', 3)) < 2' + }); }); - it('ivf parameter', () => { - expectsql(sql.addIndexQuery('foo', ['vec1'], { type: 'VECTOR', using: 'ivf', parameter: { partitions: 5, samplesPerPartition: 10, minVectors: 10 } }), { - default: 'CREATE VECTOR INDEX "foo_vec1" ON "foo" ("vec1") ORAGANIZATION NEIGHBOR PARTITION GRAPH PARAMETERS (type ivf, NEIGHBOR PARTITION 5, SAMPLES_PER_PARTITION 10, MIN_VECORS_PER_PARTITIONS 10)' }); + describe('order by distances', () => { + const val = [1, 2, 3, 4]; + const testsql = (options, expectation) => { + const model = options.model; + + it(util.inspect(options, { depth: 2 }), () => { + return expectsql( + sql.selectQuery( + options.table || model && model.getTableName(), + options, + options.model + ), + expectation + ); + }); + }; + + const User = Support.sequelize.define('User', { + embedding: { + type: DataTypes.VECTOR(4) + } + }, { + tableName: 'user' + }); + + testsql({ + model: User, + attributes: ['embedding'], + order: [ + current.literal(`${DataTypes.VECTOR().vectorDistance('embedding', val, current)}`) + ] + }, { + oracle: 'SELECT "embedding" FROM "user" "User" ORDER BY vector_distance("embedding", VECTOR(\'[1,2,3,4]\', 4));' + }); }); }); } \ No newline at end of file From d0a62a3bbc6679faf42208d352dd13fe3ca3ad48 Mon Sep 17 00:00:00 2001 From: Hasan Date: Tue, 19 Nov 2024 02:22:03 +0530 Subject: [PATCH 09/24] feat(oracle): add exports --- src/index.mjs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/index.mjs b/src/index.mjs index 5752a6422721..406364477ada 100644 --- a/src/index.mjs +++ b/src/index.mjs @@ -56,6 +56,7 @@ export const INET = Pkg.INET; export const MACADDR = Pkg.MACADDR; export const CITEXT = Pkg.CITEXT; export const TSVECTOR = Pkg.TSVECTOR; +export const VECTOR = Pkg.VECTOR; // export * from './lib/model'; export const Model = Pkg.Model; From eeac603581bb61883fca6e419987a1b17e2814c4 Mon Sep 17 00:00:00 2001 From: Hasan Date: Wed, 20 Nov 2024 14:57:20 +0530 Subject: [PATCH 10/24] feat(oracle): move similarity search functions to QueryGenerator --- src/dialects/oracle/data-types.js | 30 +---------- src/dialects/oracle/query-generator.js | 21 ++++++++ src/sequelize.js | 75 ++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 29 deletions(-) diff --git a/src/dialects/oracle/data-types.js b/src/dialects/oracle/data-types.js index 4fc92c99d617..022f26932b62 100644 --- a/src/dialects/oracle/data-types.js +++ b/src/dialects/oracle/data-types.js @@ -470,40 +470,12 @@ module.exports = BaseTypes => { return `VECTOR(${this._length}, *)`; } - return 'VECTOR(*, *)'; + return 'VECTOR'; } _getBindDef(oracledb) { return { type: oracledb.DB_TYPE_VECTOR }; } - - - } - - BaseTypes.VECTOR.prototype.cosineDistance = function cosineDistance(column, value, sequelize) { - return distance('COSINE_DISTANCE', column, value, sequelize); - }; - - BaseTypes.VECTOR.prototype.innerProduct = function innerProduct(column, value, sequelize) { - return distance('INNER_PRODUCT', column, value, sequelize); - }; - - BaseTypes.VECTOR.prototype.l1Distance = function l1Distance(column, value, sequelize) { - return distance('L1_DISTANCE', column, value, sequelize); - }; - - BaseTypes.VECTOR.prototype.l2Distance = function l2Distance(column, value, sequelize) { - return distance('L2_DISTANCE', column, value, sequelize); - }; - - BaseTypes.VECTOR.prototype.vectorDistance = function vectorDistance(column, value, sequelize) { - return distance('vector_distance', column, value, sequelize); - }; - - function distance(distanceType, column, value, sequelize) { - const quotedColumn = column instanceof Utils.Literal ? column.val : sequelize.dialect.queryGenerator.quoteIdentifier(column); - const val = `VECTOR('[${value}]', ${value.length})`; - return `${distanceType}(${quotedColumn}, ${val})`; } return { diff --git a/src/dialects/oracle/query-generator.js b/src/dialects/oracle/query-generator.js index 60ab3c4631dd..cf5286488328 100644 --- a/src/dialects/oracle/query-generator.js +++ b/src/dialects/oracle/query-generator.js @@ -1414,6 +1414,27 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { authTestQuery() { return 'SELECT 1+1 AS result FROM DUAL'; } + + cosineDistance(column, value) { + return `COSINE_DISTANCE(${this.quoteIdentifier(column)}, VECTOR('[${value}]'))`; + } + + innerProduct(column, value) { + return `INNER_PRODUCT(${this.quoteIdentifier(column)}, VECTOR('[${value}]'))`; + } + + l1Distance(column, value) { + return `L1_DISTANCE(${this.quoteIdentifier(column)}, VECTOR('[${value}]'))`; + } + + l2Distance(column, value) { + return `L2_DISTANCE(${this.quoteIdentifier(column)}, VECTOR('[${value}]'))`; + } + + vectorDistance(column, value) { + return `VECTOR_DISTANCE(${this.quoteIdentifier(column)}, VECTOR('[${value}]'))`; + } + } /* istanbul ignore next */ diff --git a/src/sequelize.js b/src/sequelize.js index 3098dd0c81ea..d5dd531b2378 100644 --- a/src/sequelize.js +++ b/src/sequelize.js @@ -990,6 +990,81 @@ class Sequelize { return this.fn('RAND'); } + /** + * Generates the cosineDistance clause for Vector Columns + * + * @param {string} column + * @param {Array} value + * + * @returns {string} + */ + cosineDistance(column, value) { + if (['oracle'].includes(this.getDialect())) { + return this.dialect.queryGenerator.cosineDistance(column, value); + } + return ''; + } + + /** + * Generates the innerProduct clause for Vector Columns + * + * @param {string} column + * @param {Array} value + * + * @returns {string} + */ + innerProduct(column, value) { + if (['oracle'].includes(this.getDialect())) { + return this.dialect.queryGenerator.innerProduct(column, value); + } + return ''; + } + + /** + * Generates the l1Distance clause for Vector Columns + * + * @param {string} column + * @param {Array} value + * + * @returns {string} + */ + l1Distance(column, value) { + if (['oracle'].includes(this.getDialect())) { + return this.dialect.queryGenerator.l1Distance(column, value); + } + return ''; + } + + /** + * Generates the cl2Distance clause for Vector Columns + * + * @param {string} column + * @param {Array} value + * + * @returns {string} + */ + l2Distance(column, value) { + if (['oracle'].includes(this.getDialect())) { + return this.dialect.queryGenerator.l2Distance(column, value); + } + return ''; + } + + /** + * Generates the vectorDistance clause for Vector Columns + * + * @param {string} column + * @param {Array} value + * + * @returns {string} + */ + vectorDistance(column, value) { + if (['oracle'].includes(this.getDialect())) { + return this.dialect.queryGenerator.vectorDistance(column, value); + } + return ''; + } + /** * Creates an object representing a database function. This can be used in search queries, both in where and order parts, and as default values in column definitions. * If you want to refer to columns in your function, you should use `sequelize.col`, so that the columns are properly interpreted as columns and not a strings. From 432cfb5fbaa6614f0094f1b416c6208859753d01 Mon Sep 17 00:00:00 2001 From: Hasan Date: Wed, 20 Nov 2024 14:58:37 +0530 Subject: [PATCH 11/24] feat(oracle): remove imports in datatype --- src/dialects/oracle/data-types.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dialects/oracle/data-types.js b/src/dialects/oracle/data-types.js index 022f26932b62..f708d32ff1cf 100644 --- a/src/dialects/oracle/data-types.js +++ b/src/dialects/oracle/data-types.js @@ -4,7 +4,6 @@ const moment = require('moment'); const momentTz = require('moment-timezone'); -const Utils = require('../../utils'); module.exports = BaseTypes => { const warn = BaseTypes.ABSTRACT.warn.bind( From 72b0373f9c14d0736a28b7927341e564448b1490 Mon Sep 17 00:00:00 2001 From: Hasan Date: Fri, 22 Nov 2024 15:33:06 +0530 Subject: [PATCH 12/24] feat(oracle): move vector functions to use sequelize.fn() --- src/dialects/oracle/query-generator.js | 62 +++++++++--------------- src/sequelize.js | 10 ++-- test/unit/dialects/oracle/vector.test.js | 20 ++++---- 3 files changed, 37 insertions(+), 55 deletions(-) diff --git a/src/dialects/oracle/query-generator.js b/src/dialects/oracle/query-generator.js index cf5286488328..1635399df48b 100644 --- a/src/dialects/oracle/query-generator.js +++ b/src/dialects/oracle/query-generator.js @@ -425,17 +425,6 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { result += this.quoteIdentifier(field.name); - // if (this._dialect.supports.index.collate && field.collate) { - // result += ` COLLATE ${this.quoteIdentifier(field.collate)}`; - // } - - // if (this._dialect.supports.index.operator) { - // const operator = field.operator || options.operator; - // if (operator) { - // result += ` ${operator}`; - // } - // } - if (this._dialect.supports.index.length && field.length) { result += `(${field.length})`; } @@ -455,14 +444,6 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { options = Model._conformIndex(options); - // if (!this._dialect.supports.index.type) { - // delete options.type; - // } - - // if (options.where) { - // options.where = this.whereQuery(options.where); - // } - if (typeof tableName === 'string') { tableName = this.quoteIdentifiers(tableName); } else { @@ -1268,6 +1249,28 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { } } } + const vectorFunctions = [ + 'COSINE_DISTANCE', + 'INNER_PRODUCT', + 'L1_DISTANCE', + 'L2_DISTANCE', + 'VECTOR_DISTANCE' + ]; + if (smth instanceof Utils.Fn && vectorFunctions.includes(smth.fn)) { + smth.args[0] = this.quoteIdentifier(smth.args[0]); + smth.args[1] = `VECTOR('[${smth.args[1]}]')`; + return `${smth.fn}(${ + smth.args.map(arg => { + if (arg instanceof Utils.SequelizeMethod) { + return this.handleSequelizeMethod(arg, tableName, factory, options, prepend); + } + if (_.isPlainObject(arg)) { + return this.whereItemsQuery(arg); + } + return arg; + }).join(', ') + })`; + } return super.handleSequelizeMethod(smth, tableName, factory, options, prepend); } @@ -1414,27 +1417,6 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { authTestQuery() { return 'SELECT 1+1 AS result FROM DUAL'; } - - cosineDistance(column, value) { - return `COSINE_DISTANCE(${this.quoteIdentifier(column)}, VECTOR('[${value}]'))`; - } - - innerProduct(column, value) { - return `INNER_PRODUCT(${this.quoteIdentifier(column)}, VECTOR('[${value}]'))`; - } - - l1Distance(column, value) { - return `L1_DISTANCE(${this.quoteIdentifier(column)}, VECTOR('[${value}]'))`; - } - - l2Distance(column, value) { - return `L2_DISTANCE(${this.quoteIdentifier(column)}, VECTOR('[${value}]'))`; - } - - vectorDistance(column, value) { - return `VECTOR_DISTANCE(${this.quoteIdentifier(column)}, VECTOR('[${value}]'))`; - } - } /* istanbul ignore next */ diff --git a/src/sequelize.js b/src/sequelize.js index d5dd531b2378..e66acb198627 100644 --- a/src/sequelize.js +++ b/src/sequelize.js @@ -1000,7 +1000,7 @@ class Sequelize { */ cosineDistance(column, value) { if (['oracle'].includes(this.getDialect())) { - return this.dialect.queryGenerator.cosineDistance(column, value); + return this.fn('COSINE_DISTANCE', column, value); } return ''; } @@ -1015,7 +1015,7 @@ class Sequelize { */ innerProduct(column, value) { if (['oracle'].includes(this.getDialect())) { - return this.dialect.queryGenerator.innerProduct(column, value); + return this.fn('INNER_PRODUCT', column, value); } return ''; } @@ -1030,7 +1030,7 @@ class Sequelize { */ l1Distance(column, value) { if (['oracle'].includes(this.getDialect())) { - return this.dialect.queryGenerator.l1Distance(column, value); + return this.fn('L1_DISTANCE', column, value); } return ''; } @@ -1045,7 +1045,7 @@ class Sequelize { */ l2Distance(column, value) { if (['oracle'].includes(this.getDialect())) { - return this.dialect.queryGenerator.l2Distance(column, value); + return this.fn('L2_DISTANCE', column, value); } return ''; } @@ -1060,7 +1060,7 @@ class Sequelize { */ vectorDistance(column, value) { if (['oracle'].includes(this.getDialect())) { - return this.dialect.queryGenerator.vectorDistance(column, value); + return this.fn('VECTOR_DISTANCE', column, value); } return ''; } diff --git a/test/unit/dialects/oracle/vector.test.js b/test/unit/dialects/oracle/vector.test.js index 7e50aa82d40a..289c11238c8d 100644 --- a/test/unit/dialects/oracle/vector.test.js +++ b/test/unit/dialects/oracle/vector.test.js @@ -12,7 +12,7 @@ if (current.dialect.name === 'oracle') { describe('VECTORS', () => { describe('VECTOR datatype', () => { const FooUser = current.define('user', { - vecCol: { + embedding: { type: DataTypes.VECTOR, allowNull: false } @@ -20,14 +20,14 @@ if (current.dialect.name === 'oracle') { it('creates table with vector datatype', () => { expectsql(sql.createTableQuery(FooUser.getTableName(), sql.attributesToSQL(FooUser.rawAttributes), { }), { - default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "vecCol" VECTOR(*, *) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); + default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "embedding" VECTOR NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); }); }); describe('VECTOR datatype with dimension and format', () => { const FooUser = current.define('user', { - vecCol: { + embedding: { type: DataTypes.VECTOR(3, 'float32'), allowNull: false } @@ -35,14 +35,14 @@ if (current.dialect.name === 'oracle') { it('creates table with vector datatype', () => { expectsql(sql.createTableQuery(FooUser.getTableName(), sql.attributesToSQL(FooUser.rawAttributes), { }), { - default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "vecCol" VECTOR(3, FLOAT32) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); + default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "embedding" VECTOR(3, FLOAT32) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); }); }); describe('VECTOR datatype(binary)', () => { const FooUser = current.define('user', { - vecCol: { + embedding: { type: DataTypes.VECTOR(16, 'binary'), allowNull: false } @@ -50,7 +50,7 @@ if (current.dialect.name === 'oracle') { it('creates table with vector datatype', () => { expectsql(sql.createTableQuery(FooUser.getTableName(), sql.attributesToSQL(FooUser.rawAttributes), { }), { - default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "vecCol" VECTOR(16, BINARY) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); + default: 'BEGIN EXECUTE IMMEDIATE \'CREATE TABLE "users" ("id" NUMBER(*,0) GENERATED BY DEFAULT ON NULL AS IDENTITY, "embedding" VECTOR(16, BINARY) NOT NULL, "createdAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL, "updatedAt" TIMESTAMP WITH LOCAL TIME ZONE NOT NULL,PRIMARY KEY ("id"))\'; EXCEPTION WHEN OTHERS THEN IF SQLCODE != -955 THEN RAISE; END IF; END;' }); }); }); @@ -96,10 +96,10 @@ if (current.dialect.name === 'oracle') { }); }; - testsql(current.literal(`${DataTypes.VECTOR().vectorDistance('embedding', val, current)}`), { + testsql(current.fn('VECTOR_DISTANCE', 'embedding', val), { [Op.lt]: 2 }, { - oracle: 'vector_distance("embedding", VECTOR(\'[1,2,3]\', 3)) < 2' + oracle: 'VECTOR_DISTANCE("embedding", VECTOR(\'[1,2,3]\')) < 2' }); }); @@ -132,10 +132,10 @@ if (current.dialect.name === 'oracle') { model: User, attributes: ['embedding'], order: [ - current.literal(`${DataTypes.VECTOR().vectorDistance('embedding', val, current)}`) + current.fn('VECTOR_DISTANCE', 'embedding', val) ] }, { - oracle: 'SELECT "embedding" FROM "user" "User" ORDER BY vector_distance("embedding", VECTOR(\'[1,2,3,4]\', 4));' + oracle: 'SELECT "embedding" FROM "user" "User" ORDER BY VECTOR_DISTANCE("embedding", VECTOR(\'[1,2,3,4]\'));' }); }); }); From d58b1a4c65fc8f24dc2fbb6ab2b217b348d9badf Mon Sep 17 00:00:00 2001 From: Hasan Date: Mon, 25 Nov 2024 15:36:46 +0530 Subject: [PATCH 13/24] feat(oracle): update validations --- src/data-types.js | 6 ------ src/dialects/oracle/data-types.js | 11 +++++++++++ src/dialects/oracle/query-generator.js | 16 +++++++--------- src/sequelize.js | 20 ++++++++++---------- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/src/data-types.js b/src/data-types.js index 406e2237b2ff..6f9fd98e9662 100644 --- a/src/data-types.js +++ b/src/data-types.js @@ -971,12 +971,6 @@ class VECTOR extends ABSTRACT { this._format = options.format; this._length = options.dimension; } - validate(value) { - if (!ArrayBuffer.isView(value)) { - throw new sequelizeErrors.ValidationError(util.format('%j is not a valid array', value)); - } - return true; - } } /** diff --git a/src/dialects/oracle/data-types.js b/src/dialects/oracle/data-types.js index f708d32ff1cf..e69a4ec82bf9 100644 --- a/src/dialects/oracle/data-types.js +++ b/src/dialects/oracle/data-types.js @@ -2,9 +2,12 @@ 'use strict'; +const util = require('util'); const moment = require('moment'); const momentTz = require('moment-timezone'); +const sequelizeErrors = require('../../errors'); + module.exports = BaseTypes => { const warn = BaseTypes.ABSTRACT.warn.bind( undefined, @@ -472,6 +475,14 @@ module.exports = BaseTypes => { return 'VECTOR'; } + validate(value) { + // BYTES_PER_ELEMENT is static property only available in typedArrays. + if (!value.constructor.BYTE_PER_ELEMENT) { + throw new sequelizeErrors.ValidationError(util.format('%j is not a valid array', value)); + } + return true; + } + _getBindDef(oracledb) { return { type: oracledb.DB_TYPE_VECTOR }; } diff --git a/src/dialects/oracle/query-generator.js b/src/dialects/oracle/query-generator.js index 1635399df48b..9285e6f6aa1f 100644 --- a/src/dialects/oracle/query-generator.js +++ b/src/dialects/oracle/query-generator.js @@ -8,6 +8,7 @@ const AbstractQueryGenerator = require('../abstract/query-generator'); const _ = require('lodash'); const util = require('util'); const Model = require('../../model'); +const sequelizeErrors = require('../../errors'); const Transaction = require('../../transaction'); /** @@ -1257,18 +1258,15 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { 'VECTOR_DISTANCE' ]; if (smth instanceof Utils.Fn && vectorFunctions.includes(smth.fn)) { + // The first argument is expected to be column name + // The second argument is expected to be array. smth.args[0] = this.quoteIdentifier(smth.args[0]); + if (!Array.isArray(smth.args[1])) { + throw new sequelizeErrors.ValidationError(util.format('%j is not a valid array', smth.args[1])); + } smth.args[1] = `VECTOR('[${smth.args[1]}]')`; return `${smth.fn}(${ - smth.args.map(arg => { - if (arg instanceof Utils.SequelizeMethod) { - return this.handleSequelizeMethod(arg, tableName, factory, options, prepend); - } - if (_.isPlainObject(arg)) { - return this.whereItemsQuery(arg); - } - return arg; - }).join(', ') + smth.args.join(', ') })`; } return super.handleSequelizeMethod(smth, tableName, factory, options, prepend); diff --git a/src/sequelize.js b/src/sequelize.js index e66acb198627..03d49b30c703 100644 --- a/src/sequelize.js +++ b/src/sequelize.js @@ -996,13 +996,13 @@ class Sequelize { * @param {string} column * @param {Array} value * - * @returns {string} + * @returns {Sequelize.fn} */ cosineDistance(column, value) { if (['oracle'].includes(this.getDialect())) { return this.fn('COSINE_DISTANCE', column, value); } - return ''; + throw new Error(`cosineDistance for Dialect "${this.getDialect()}" is not implemented`); } /** @@ -1011,13 +1011,13 @@ class Sequelize { * @param {string} column * @param {Array} value * - * @returns {string} + * @returns {Sequelize.fn} */ innerProduct(column, value) { if (['oracle'].includes(this.getDialect())) { return this.fn('INNER_PRODUCT', column, value); } - return ''; + throw new Error(`innerProduct for Dialect "${this.getDialect()}" is not implemented`); } /** @@ -1026,13 +1026,13 @@ class Sequelize { * @param {string} column * @param {Array} value * - * @returns {string} + * @returns {Sequelize.fn} */ l1Distance(column, value) { if (['oracle'].includes(this.getDialect())) { return this.fn('L1_DISTANCE', column, value); } - return ''; + throw new Error(`l1Distance for Dialect "${this.getDialect()}" is not implemented`); } /** @@ -1041,13 +1041,13 @@ class Sequelize { * @param {string} column * @param {Array} value * - * @returns {string} + * @returns {Sequelize.fn} */ l2Distance(column, value) { if (['oracle'].includes(this.getDialect())) { return this.fn('L2_DISTANCE', column, value); } - return ''; + throw new Error(`l2Distance for Dialect "${this.getDialect()}" is not implemented`); } /** @@ -1056,13 +1056,13 @@ class Sequelize { * @param {string} column * @param {Array} value * - * @returns {string} + * @returns {Sequelize.fn} */ vectorDistance(column, value) { if (['oracle'].includes(this.getDialect())) { return this.fn('VECTOR_DISTANCE', column, value); } - return ''; + throw new Error(`vectorDistance for Dialect "${this.getDialect()}" is not implemented`); } /** From 59be00f91c8173baf7ea119d153673b05996d4bf Mon Sep 17 00:00:00 2001 From: Hasan Date: Wed, 27 Nov 2024 01:53:21 +0530 Subject: [PATCH 14/24] feat(oracle): fix review comments --- src/dialects/oracle/data-types.js | 10 +++++++++- src/dialects/oracle/query-generator.js | 3 +++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/dialects/oracle/data-types.js b/src/dialects/oracle/data-types.js index e69a4ec82bf9..1c5933426142 100644 --- a/src/dialects/oracle/data-types.js +++ b/src/dialects/oracle/data-types.js @@ -477,12 +477,20 @@ module.exports = BaseTypes => { validate(value) { // BYTES_PER_ELEMENT is static property only available in typedArrays. - if (!value.constructor.BYTE_PER_ELEMENT) { + if (!value.constructor.BYTE_PER_ELEMENT || !Array.isArray(value)) { throw new sequelizeErrors.ValidationError(util.format('%j is not a valid array', value)); } return true; } + _stringify(value, options) { + if (Array.isArray(value)) { + return Float64Array.from(value, val => val); + } + + return value; + } + _getBindDef(oracledb) { return { type: oracledb.DB_TYPE_VECTOR }; } diff --git a/src/dialects/oracle/query-generator.js b/src/dialects/oracle/query-generator.js index 9285e6f6aa1f..53532b9c36fd 100644 --- a/src/dialects/oracle/query-generator.js +++ b/src/dialects/oracle/query-generator.js @@ -1258,6 +1258,9 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { 'VECTOR_DISTANCE' ]; if (smth instanceof Utils.Fn && vectorFunctions.includes(smth.fn)) { + if (smth.args.length > 2) { + throw new Error('Too many arguments passed to similarity search function'); + } // The first argument is expected to be column name // The second argument is expected to be array. smth.args[0] = this.quoteIdentifier(smth.args[0]); From 86b6d02fdee332cba9546a80b21b488f35ab0ad1 Mon Sep 17 00:00:00 2001 From: Hasan Date: Wed, 27 Nov 2024 02:07:16 +0530 Subject: [PATCH 15/24] feat(oracle): fix review comments --- src/data-types.js | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/data-types.js b/src/data-types.js index 6f9fd98e9662..aacf05a67979 100644 --- a/src/data-types.js +++ b/src/data-types.js @@ -964,9 +964,18 @@ class TSVECTOR extends ABSTRACT { * */ class VECTOR extends ABSTRACT { - constructor(dimension, format) { + /** + * @param {object} options type options + * @param {number} [options.dimension] dimension of vector column + * @param {string} [options.format] format of vector, i.e. INT8, FLOAT32, FLOAT64, BINARY + */ + constructor(options = {}) { super(); - const options = typeof dimension === 'object' && dimension || { dimension, format }; + if (typeof options === 'number') { + options = { + dimension: options + }; + } this.options = options; this._format = options.format; this._length = options.dimension; From 2418ca59a53ec84707f2c8bb8b4114c3b55ac767 Mon Sep 17 00:00:00 2001 From: Hasan Date: Mon, 2 Dec 2024 22:35:45 +0530 Subject: [PATCH 16/24] feat(oracle): fix validation condition and limit test case --- src/dialects/oracle/query-generator.js | 4 ++- test/unit/dialects/oracle/vector.test.js | 45 +++++++++++++++++++++--- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/dialects/oracle/query-generator.js b/src/dialects/oracle/query-generator.js index 53532b9c36fd..bcdada0cc3c0 100644 --- a/src/dialects/oracle/query-generator.js +++ b/src/dialects/oracle/query-generator.js @@ -1265,7 +1265,9 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { // The second argument is expected to be array. smth.args[0] = this.quoteIdentifier(smth.args[0]); if (!Array.isArray(smth.args[1])) { - throw new sequelizeErrors.ValidationError(util.format('%j is not a valid array', smth.args[1])); + if (typeof smth.args[1] === 'string' && !smth.args[1].startsWith('VECTOR')) { + throw new sequelizeErrors.ValidationError(util.format('%j is not a valid array', smth.args[1])); + } } smth.args[1] = `VECTOR('[${smth.args[1]}]')`; return `${smth.fn}(${ diff --git a/test/unit/dialects/oracle/vector.test.js b/test/unit/dialects/oracle/vector.test.js index 289c11238c8d..22f677225dcd 100644 --- a/test/unit/dialects/oracle/vector.test.js +++ b/test/unit/dialects/oracle/vector.test.js @@ -83,7 +83,7 @@ if (current.dialect.name === 'oracle') { }); describe('Vector where clause', () => { - const val = [1, 2, 3]; + const queryVector = [1, 2, 3]; const testsql = function(key, value, options, expectation) { if (expectation === undefined) { @@ -96,7 +96,7 @@ if (current.dialect.name === 'oracle') { }); }; - testsql(current.fn('VECTOR_DISTANCE', 'embedding', val), { + testsql(current.fn('VECTOR_DISTANCE', 'embedding', queryVector), { [Op.lt]: 2 }, { oracle: 'VECTOR_DISTANCE("embedding", VECTOR(\'[1,2,3]\')) < 2' @@ -104,7 +104,7 @@ if (current.dialect.name === 'oracle') { }); describe('order by distances', () => { - const val = [1, 2, 3, 4]; + const queryVector = [1, 2, 3, 4]; const testsql = (options, expectation) => { const model = options.model; @@ -132,11 +132,48 @@ if (current.dialect.name === 'oracle') { model: User, attributes: ['embedding'], order: [ - current.fn('VECTOR_DISTANCE', 'embedding', val) + current.fn('VECTOR_DISTANCE', 'embedding', queryVector) ] }, { oracle: 'SELECT "embedding" FROM "user" "User" ORDER BY VECTOR_DISTANCE("embedding", VECTOR(\'[1,2,3,4]\'));' }); }); + + describe('limit', () => { + const queryVector = [1, 2, 3, 4]; + const testsql = (options, expectation) => { + const model = options.model; + + it(util.inspect(options, { depth: 2 }), () => { + return expectsql( + sql.selectQuery( + options.table || model && model.getTableName(), + options, + options.model + ), + expectation + ); + }); + }; + + const User = Support.sequelize.define('User', { + embedding: { + type: DataTypes.VECTOR(4) + } + }, { + tableName: 'user' + }); + + testsql({ + model: User, + attributes: ['embedding'], + order: [ + current.fn('VECTOR_DISTANCE', 'embedding', queryVector) + ], + limit: 5 + }, { + oracle: 'SELECT "embedding" FROM "user" "User" ORDER BY VECTOR_DISTANCE("embedding", VECTOR(\'[1,2,3,4]\')) OFFSET 0 ROWS FETCH NEXT 5 ROWS ONLY;' + }); + }); }); } \ No newline at end of file From bf09c80bd570c93cddf9849a36b53020a33ad6d0 Mon Sep 17 00:00:00 2001 From: Hasan Date: Tue, 3 Dec 2024 17:16:22 +0530 Subject: [PATCH 17/24] feat(oracle): revert breaking changes --- src/data-types.js | 13 ++----------- src/dialects/oracle/query-generator.js | 5 ----- test/unit/dialects/oracle/vector.test.js | 2 ++ 3 files changed, 4 insertions(+), 16 deletions(-) diff --git a/src/data-types.js b/src/data-types.js index aacf05a67979..6f9fd98e9662 100644 --- a/src/data-types.js +++ b/src/data-types.js @@ -964,18 +964,9 @@ class TSVECTOR extends ABSTRACT { * */ class VECTOR extends ABSTRACT { - /** - * @param {object} options type options - * @param {number} [options.dimension] dimension of vector column - * @param {string} [options.format] format of vector, i.e. INT8, FLOAT32, FLOAT64, BINARY - */ - constructor(options = {}) { + constructor(dimension, format) { super(); - if (typeof options === 'number') { - options = { - dimension: options - }; - } + const options = typeof dimension === 'object' && dimension || { dimension, format }; this.options = options; this._format = options.format; this._length = options.dimension; diff --git a/src/dialects/oracle/query-generator.js b/src/dialects/oracle/query-generator.js index bcdada0cc3c0..5a80cbfd41e4 100644 --- a/src/dialects/oracle/query-generator.js +++ b/src/dialects/oracle/query-generator.js @@ -1264,11 +1264,6 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { // The first argument is expected to be column name // The second argument is expected to be array. smth.args[0] = this.quoteIdentifier(smth.args[0]); - if (!Array.isArray(smth.args[1])) { - if (typeof smth.args[1] === 'string' && !smth.args[1].startsWith('VECTOR')) { - throw new sequelizeErrors.ValidationError(util.format('%j is not a valid array', smth.args[1])); - } - } smth.args[1] = `VECTOR('[${smth.args[1]}]')`; return `${smth.fn}(${ smth.args.join(', ') diff --git a/test/unit/dialects/oracle/vector.test.js b/test/unit/dialects/oracle/vector.test.js index 22f677225dcd..e220912c3b62 100644 --- a/test/unit/dialects/oracle/vector.test.js +++ b/test/unit/dialects/oracle/vector.test.js @@ -1,3 +1,5 @@ +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved + 'use strict'; const util = require('util'); From 1e8f81518fb778a5c042a59a06432d1aa4eb1e2e Mon Sep 17 00:00:00 2001 From: Hasan Date: Mon, 9 Dec 2024 14:28:18 +0530 Subject: [PATCH 18/24] feat(oracle): fix review comments --- src/dialects/oracle/data-types.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/dialects/oracle/data-types.js b/src/dialects/oracle/data-types.js index 1c5933426142..5328bc1943ba 100644 --- a/src/dialects/oracle/data-types.js +++ b/src/dialects/oracle/data-types.js @@ -5,7 +5,6 @@ const util = require('util'); const moment = require('moment'); const momentTz = require('moment-timezone'); - const sequelizeErrors = require('../../errors'); module.exports = BaseTypes => { @@ -487,7 +486,6 @@ module.exports = BaseTypes => { if (Array.isArray(value)) { return Float64Array.from(value, val => val); } - return value; } From d7d0735bf28e2083eebe7bf4c2b8c76fc7e03976 Mon Sep 17 00:00:00 2001 From: Hasan Date: Tue, 10 Dec 2024 15:25:13 +0530 Subject: [PATCH 19/24] feat(oracle): add integration test --- src/dialects/oracle/query-generator.js | 17 ++- .../dialects/oracle/vector.test.js | 112 ++++++++++++++++++ 2 files changed, 123 insertions(+), 6 deletions(-) create mode 100644 test/integration/dialects/oracle/vector.test.js diff --git a/src/dialects/oracle/query-generator.js b/src/dialects/oracle/query-generator.js index 5a80cbfd41e4..babe10463dc9 100644 --- a/src/dialects/oracle/query-generator.js +++ b/src/dialects/oracle/query-generator.js @@ -8,7 +8,6 @@ const AbstractQueryGenerator = require('../abstract/query-generator'); const _ = require('lodash'); const util = require('util'); const Model = require('../../model'); -const sequelizeErrors = require('../../errors'); const Transaction = require('../../transaction'); /** @@ -426,10 +425,6 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { result += this.quoteIdentifier(field.name); - if (this._dialect.supports.index.length && field.length) { - result += `(${field.length})`; - } - if (field.order) { result += ` ${field.order}`; } @@ -1261,10 +1256,20 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { if (smth.args.length > 2) { throw new Error('Too many arguments passed to similarity search function'); } + + if (typeof smth.args[1] === 'string') { + if (!smth.args[1].startsWith('VECTOR')) { + throw new Error('Unexpected second argument'); + } + } else if (!Array.isArray(smth.args[1])) { + throw new Error('Unexpected second argument'); + } // The first argument is expected to be column name // The second argument is expected to be array. smth.args[0] = this.quoteIdentifier(smth.args[0]); - smth.args[1] = `VECTOR('[${smth.args[1]}]')`; + if (Array.isArray(smth.args[1])) { + smth.args[1] = `VECTOR('[${smth.args[1]}]')`; + } return `${smth.fn}(${ smth.args.join(', ') })`; diff --git a/test/integration/dialects/oracle/vector.test.js b/test/integration/dialects/oracle/vector.test.js new file mode 100644 index 000000000000..d7942a9bd6f5 --- /dev/null +++ b/test/integration/dialects/oracle/vector.test.js @@ -0,0 +1,112 @@ +// Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved + +'use strict'; + +const chai = require('chai'), + Sequelize = require('sequelize'), + Op = Sequelize.Op, + expect = chai.expect, + Support = require('../../support'), + current = Support.sequelize, + DataTypes = require('sequelize/lib/data-types'), + dialect = Support.getTestDialect(), + semver = require('semver'); + +if (dialect === 'oracle') { + + describe('vectors', () => { + before(async function() { + const version = await current.queryInterface.databaseVersion(); + const supportedVersion = '23.4.0'; + if (semver.gte(version, supportedVersion) === false) { + this.skip(); + } + }); + + describe('findAll', () => { + beforeEach(async function() { + this.Item = this.sequelize.define('Item', { + embeddings: DataTypes.VECTOR(4) + }); + + await this.Item.sync({ force: true }); + + await this.Item.create({ embeddings: new Float32Array([1, 1, 1, 1]) }); + await this.Item.create({ embeddings: new Float32Array([1, 2, 3, 3]) }); + }); + + it('fetches the rows from database', async function() { + const Item = this.sequelize.define('Item', { embeddings: Sequelize.VECTOR(4) }); + const result = await Item.findAll(); + expect(result.length).to.equal(2); + }); + + it('returns typed array for vector column', async function() { + const Item = this.sequelize.define('Item', { embeddings: Sequelize.VECTOR(4) }); + const result = await Item.findAll(); + // typed array property that differentiate it from other buffer view. + expect(result[0].getDataValue('embeddings').BYTES_PER_ELEMENT).to.equal(4); + }); + }); + + describe('similarity search functions', () => { + beforeEach(async function() { + this.Item = this.sequelize.define('Item', { + embeddings: DataTypes.VECTOR(3) + }); + + await this.Item.sync({ force: true }); + + await this.Item.create({ embeddings: new Float32Array([1, 1, 1]) }); + await this.Item.create({ embeddings: new Float32Array([5, 5, 5]) }); + await this.Item.create({ embeddings: new Float32Array([10, 10, 10]) }); + await this.Item.create({ embeddings: new Float32Array([1, 2, 3]) }); + }); + + it('l1 distance', async function() { + const Item = this.sequelize.define('Item', { embeddings: Sequelize.VECTOR(3) }); + const queryVector = [1, 2, 3]; + const result = await Item.findAll({ + where: current.where(current.fn('L1_DISTANCE', 'embeddings', queryVector), { + [Op.lt]: 2 + }) + }); + expect(result.length).to.equal(1); + }); + + it('l2 distance', async function() { + const Item = this.sequelize.define('Item', { embeddings: Sequelize.VECTOR(3) }); + const queryVector = [1, 2, 3]; + const result = await Item.findAll({ + where: current.where(current.fn('L2_DISTANCE', 'embeddings', queryVector), { + [Op.lt]: 3 + }) + }); + expect(result.length).to.equal(2); + }); + + it('inner product', async function() { + const Item = this.sequelize.define('Item', { embeddings: Sequelize.VECTOR(3) }); + const queryVector = [1, 2, 3]; + const result = await Item.findAll({ + where: current.where(current.fn('INNER_PRODUCT', 'embeddings', queryVector), { + [Op.lt]: 3 + }) + }); + expect(result.length).to.equal(0); + }); + + it('cosine distance', async function() { + const Item = this.sequelize.define('Item', { embeddings: Sequelize.VECTOR(3) }); + const queryVector = [1, 2, 3]; + const result = await Item.findAll({ + where: current.where(current.fn('COSINE_DISTANCE', 'embeddings', queryVector), { + [Op.gt]: 1 + }) + }); + expect(result.length).to.equal(0); + }); + }); + + }); +} From 18d4ecdbc867129d07020e20591c1e8c599bdebb Mon Sep 17 00:00:00 2001 From: Hasan Date: Thu, 16 Jan 2025 12:32:48 +0530 Subject: [PATCH 20/24] feat(oracle): add target accuracy --- src/dialects/oracle/query-generator.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dialects/oracle/query-generator.js b/src/dialects/oracle/query-generator.js index babe10463dc9..1b58d2222cec 100644 --- a/src/dialects/oracle/query-generator.js +++ b/src/dialects/oracle/query-generator.js @@ -482,7 +482,7 @@ export class OracleQueryGenerator extends AbstractQueryGenerator { 'ORAGANIZATION ', options.using === 'hnsw' ? 'INMEMORY NEIGHBOR GRAPH ' : 'NEIGHBOR PARTITION GRAPH ', options.distance ? `WITH DISTANCE ${options.distance}` : '', - //with target accuracy + options.accuracy ? `WITH TARGET ACCURACY ${options.accuracy}` : '', options.parameter ? idxParameter : '' ); } else { From 0e9392009d55016a0c86252b69defbfa993a8b2b Mon Sep 17 00:00:00 2001 From: Hasan Date: Tue, 4 Feb 2025 22:45:48 +0530 Subject: [PATCH 21/24] feat(oracle): bump up oracledb --- yarn.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yarn.lock b/yarn.lock index df0630d12ab0..9d0cdf4fcd78 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7047,10 +7047,10 @@ optionator@^0.9.1: type-check "^0.4.0" word-wrap "^1.2.3" -oracledb@^5.5.0: - version "5.5.0" - resolved "https://registry.yarnpkg.com/oracledb/-/oracledb-5.5.0.tgz#0cf9af5d0c0815f74849ae9ed56aee823514d71b" - integrity sha512-i5cPvMENpZP8nnqptB6l0pjiOyySj1IISkbM4Hr3yZEDdANo2eezarwZb9NQ8fTh5pRjmgpZdSyIbnn9N3AENw== +oracledb@^6.6.0: + version "6.7.1" + resolved "https://registry.yarnpkg.com/oracledb/-/oracledb-6.7.1.tgz#a014132257db91f8de11010fc89c917d53b7e1d6" + integrity sha512-dOXqgJ8re9kw5Nv9zLlljhBcjsGCUELDD/VEecQlXl2R8IMNxJqdceR+VeKS4gAyVNzq0MOz45lXOiC8OqG1qw== ordered-read-streams@^1.0.0: version "1.0.1" From c3f71e50d66c1af80135f0530aba86c395e3d174 Mon Sep 17 00:00:00 2001 From: Hasan Date: Tue, 4 Feb 2025 23:18:11 +0530 Subject: [PATCH 22/24] fix(oracle): pin oracledb to '6.6.0' to avoid failures in node 10 --- package.json | 2 +- yarn.lock | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/package.json b/package.json index 442216d09504..7bc27e082919 100644 --- a/package.json +++ b/package.json @@ -109,7 +109,7 @@ "mysql2": "^2.3.3", "node-hook": "^1.0.0", "nyc": "^15.1.0", - "oracledb": "^6.6.0", + "oracledb": "6.6.0", "p-map": "^4.0.0", "p-props": "^4.0.0", "p-settle": "^4.1.1", diff --git a/yarn.lock b/yarn.lock index 9d0cdf4fcd78..c8eb408f3804 100644 --- a/yarn.lock +++ b/yarn.lock @@ -7047,10 +7047,10 @@ optionator@^0.9.1: type-check "^0.4.0" word-wrap "^1.2.3" -oracledb@^6.6.0: - version "6.7.1" - resolved "https://registry.yarnpkg.com/oracledb/-/oracledb-6.7.1.tgz#a014132257db91f8de11010fc89c917d53b7e1d6" - integrity sha512-dOXqgJ8re9kw5Nv9zLlljhBcjsGCUELDD/VEecQlXl2R8IMNxJqdceR+VeKS4gAyVNzq0MOz45lXOiC8OqG1qw== +oracledb@6.6.0: + version "6.6.0" + resolved "https://registry.yarnpkg.com/oracledb/-/oracledb-6.6.0.tgz#bb40adbe81a84a1e544c48af9f120c61f030e936" + integrity sha512-T3dx+o3j+tVN53wQyr4yGTmoPHLy+a2V8yb1T2PmWrsj3ZlSt2Yu1BgV2yTDqnmBZYpRi/I3yJXRCOHHD7PiyA== ordered-read-streams@^1.0.0: version "1.0.1" From d70af7a47c51560beaa03771edd9ad974052ab3a Mon Sep 17 00:00:00 2001 From: Hasan Date: Wed, 5 Feb 2025 10:52:04 +0530 Subject: [PATCH 23/24] feat(oracle): bump up the node version --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index da4cf40d6a91..897bb8501e9c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,7 +55,7 @@ jobs: fail-fast: false matrix: oracle-version: [18, 23] - node-version: [10, 18] + node-version: [14.9, 18] name: Oracle DB ${{ matrix.oracle-version }} (Node ${{ matrix.node-version }}) runs-on: ubuntu-latest env: From 598a2264a022b2fd9a6da6079617d7f7ec287ec3 Mon Sep 17 00:00:00 2001 From: Hasan Date: Wed, 5 Feb 2025 11:18:55 +0530 Subject: [PATCH 24/24] fix(oracle): revert backnode version --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 897bb8501e9c..da4cf40d6a91 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -55,7 +55,7 @@ jobs: fail-fast: false matrix: oracle-version: [18, 23] - node-version: [14.9, 18] + node-version: [10, 18] name: Oracle DB ${{ matrix.oracle-version }} (Node ${{ matrix.node-version }}) runs-on: ubuntu-latest env: