diff --git a/sql/go.mod b/sql/go.mod index f54163cc9..9fe476739 100644 --- a/sql/go.mod +++ b/sql/go.mod @@ -5,6 +5,7 @@ go 1.25.0 replace github.com/getsentry/sentry-go => ../ require ( + github.com/DataDog/go-sqllexer v0.2.1 github.com/getsentry/sentry-go v0.46.0 github.com/stretchr/testify v1.11.1 ) diff --git a/sql/go.sum b/sql/go.sum index 6ffb7eff9..e98ac4eff 100644 --- a/sql/go.sum +++ b/sql/go.sum @@ -1,3 +1,5 @@ +github.com/DataDog/go-sqllexer v0.2.1 h1:al1RMRTxPoAa1P/RTu7D5yVXC6wCLwTRYLVAVH5MLjQ= +github.com/DataDog/go-sqllexer v0.2.1/go.mod h1:3xTFXBU69vUikYpESggScvC0RKYA7ZIdVrIkLwUOWdE= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= diff --git a/sql/integration_test.go b/sql/integration_test.go index 445992e28..1c6191e5d 100644 --- a/sql/integration_test.go +++ b/sql/integration_test.go @@ -142,13 +142,13 @@ func TestIntegration_EmitsQueryAndExecSpans(t *testing.T) { assert.Equal(t, sentrysql.SpanOrigin, gotExec.Origin) assert.Equal(t, sentrysql.SpanOrigin, gotQuery.Origin) - assert.Equal(t, "INSERT INTO t VALUES (1)", gotExec.Description) + assert.Equal(t, "INSERT INTO t VALUES (?)", gotExec.Description) assert.Equal(t, "SELECT * FROM t", gotQuery.Description) assert.Equal(t, sentry.SpanStatusOK, gotExec.Status) assert.Equal(t, sentry.SpanStatusOK, gotQuery.Status) assert.NotEmpty(t, gotExec.Data["db.system.name"]) - assert.Equal(t, "INSERT INTO t VALUES (1)", gotExec.Data["db.query.text"]) + assert.Equal(t, "INSERT INTO t VALUES (?)", gotExec.Data["db.query.text"]) assert.Equal(t, "SELECT * FROM t", gotQuery.Data["db.query.text"]) }, tracingOpts()) }) diff --git a/sql/internal/dbsystem/dbsystem.go b/sql/internal/dbsystem/dbsystem.go new file mode 100644 index 000000000..0814f7b7b --- /dev/null +++ b/sql/internal/dbsystem/dbsystem.go @@ -0,0 +1,42 @@ +package dbsystem + +// Name identifies the DBMS for the db.system span attribute. Use one of the +// provided constants, or a custom value that matches the Sentry Queries module +// expectations. +type Name string + +// Known system names. This is not exhaustive; pass a custom string via +// Name("…") for databases not listed here. +const ( + PostgreSQL Name = "postgresql" + MySQL Name = "mysql" + MariaDB Name = "mariadb" + SQLite Name = "sqlite" + MSSQL Name = "mssql" + Oracle Name = "oracle" + Clickhouse Name = "clickhouse" + Snowflake Name = "snowflake" +) + +var driverNameToSystem = map[string]Name{ + "postgres": PostgreSQL, + "pgx": PostgreSQL, + "cloudsqlpostgres": PostgreSQL, + "mysql": MySQL, + "mariadb": MariaDB, + "sqlite": SQLite, + "sqlite3": SQLite, + "sqlserver": MSSQL, + "mssql": MSSQL, + "oracle": Oracle, + "godror": Oracle, + "goora": Oracle, + "oci8": Oracle, + "clickhouse": Clickhouse, + "snowflake": Snowflake, +} + +func FromDriverName(name string) (Name, bool) { + sys, ok := driverNameToSystem[name] + return sys, ok +} diff --git a/sql/obfuscation_integration_test.go b/sql/obfuscation_integration_test.go new file mode 100644 index 000000000..f9a76bcca --- /dev/null +++ b/sql/obfuscation_integration_test.go @@ -0,0 +1,44 @@ +package sentrysql + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestObfuscationDatabaseSystemMappings(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + system DatabaseSystem + input string + want string + }{ + { + name: "mariadb uses mysql lexer mode", + system: SystemMariaDB, + input: `SELECT * FROM t WHERE name = "alice"`, + want: "SELECT * FROM t WHERE name = ?", + }, + { + name: "sqlite treats double-quoted tokens as values", + system: SystemSQLite, + input: `SELECT "users"."name" FROM "users" WHERE id = 1`, + want: `SELECT ? FROM ? WHERE id = ?`, + }, + { + name: "clickhouse uses generic lexer mode", + system: SystemClickhouse, + input: `SELECT "users"."name" FROM "users" WHERE id = 1`, + want: `SELECT "users"."name" FROM "users" WHERE id = ?`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := (&config{system: tt.system, obfuscatorDBMS: obfuscatorDBMS(tt.system)}).obfuscateQuery(tt.input) + assert.Equal(t, tt.want, got) + }) + } +} diff --git a/sql/options.go b/sql/options.go index f601bc8db..66aa1fac6 100644 --- a/sql/options.go +++ b/sql/options.go @@ -3,68 +3,42 @@ package sentrysql import ( "errors" "fmt" + "strings" + + sqllexer "github.com/DataDog/go-sqllexer" + "github.com/getsentry/sentry-go/sql/internal/dbsystem" ) -// DatabaseSystem identifies the DBMS for the db.system span attribute. Use one -// of the provided constants, or a custom value that matches the Sentry Queries -// module expectations. -type DatabaseSystem string +type DatabaseSystem = dbsystem.Name -// Known DatabaseSystem values. This is not exhaustive; pass a custom string -// via DatabaseSystem("…") for databases not listed here. const ( - SystemPostgreSQL DatabaseSystem = "postgresql" - SystemMySQL DatabaseSystem = "mysql" - SystemMariaDB DatabaseSystem = "mariadb" - SystemSQLite DatabaseSystem = "sqlite" - SystemMSSQL DatabaseSystem = "mssql" - SystemOracle DatabaseSystem = "oracle" - SystemClickhouse DatabaseSystem = "clickhouse" - SystemSnowflake DatabaseSystem = "snowflake" + SystemPostgreSQL = dbsystem.PostgreSQL + SystemMySQL = dbsystem.MySQL + SystemMariaDB = dbsystem.MariaDB + SystemSQLite = dbsystem.SQLite + SystemMSSQL = dbsystem.MSSQL + SystemOracle = dbsystem.Oracle + SystemClickhouse = dbsystem.Clickhouse + SystemSnowflake = dbsystem.Snowflake ) -// driverNameToSystem is a best effort map of common Go SQL driver registration names. -var driverNameToSystem = map[string]DatabaseSystem{ - // PostgreSQL and flavors - "postgres": SystemPostgreSQL, - "pgx": SystemPostgreSQL, - "cloudsqlpostgres": SystemPostgreSQL, - // MySQL / MariaDB - "mysql": SystemMySQL, - "mariadb": SystemMariaDB, - // SQLite - "sqlite": SystemSQLite, - "sqlite3": SystemSQLite, - // MS SQL Server - "sqlserver": SystemMSSQL, - "mssql": SystemMSSQL, - // Oracle - "oracle": SystemOracle, - "godror": SystemOracle, - "goora": SystemOracle, - "oci8": SystemOracle, - // Others - "clickhouse": SystemClickhouse, - "snowflake": SystemSnowflake, -} - func systemFromDriverName(name string) (DatabaseSystem, bool) { - sys, ok := driverNameToSystem[name] - return sys, ok + return dbsystem.FromDriverName(name) } // Option configures sql wrappers. type Option func(*config) type config struct { - system DatabaseSystem - dbName string - dbUser string - driverName string - host string - port int - socketAddress string - socketPort int + system DatabaseSystem + dbName string + dbUser string + driverName string + host string + port int + socketAddress string + socketPort int + obfuscatorDBMS sqllexer.DBMSType } // WithDatabaseSystem sets the db.system span attribute. Prefer one of the @@ -130,5 +104,116 @@ func newConfig(opts []Option) *config { opt(c) } } + c.obfuscatorDBMS = obfuscatorDBMS(c.system) return c } + +func (c *config) obfuscateQuery(query string) string { + if c == nil { + return query + } + + w := queryObfuscator{ + lexer: newQueryLexer(query, c.obfuscatorDBMS), + sqlite: c.system == SystemSQLite, + capacity: len(query), + } + return w.run() +} + +func newQueryLexer(query string, dbms sqllexer.DBMSType) *sqllexer.Lexer { + if dbms == "" { + return sqllexer.New(query) + } + return sqllexer.New(query, sqllexer.WithDBMS(dbms)) +} + +type queryObfuscator struct { + lexer *sqllexer.Lexer + sqlite bool + capacity int + out strings.Builder + prevPlaceholder bool + prevSQLiteQuoted bool + pendingSQLiteDot bool +} + +func (o *queryObfuscator) run() string { + o.out.Grow(o.capacity) + + for { + tok := o.lexer.Scan() + switch tok.Type { + case sqllexer.EOF: + o.flushSQLiteDot() + return strings.TrimSpace(o.out.String()) + case sqllexer.COMMENT, sqllexer.MULTILINE_COMMENT: + continue + case sqllexer.NUMBER, sqllexer.STRING, sqllexer.INCOMPLETE_STRING, sqllexer.DOLLAR_QUOTED_STRING, sqllexer.DOLLAR_QUOTED_FUNCTION: + o.writePlaceholder(o.sqlite && tok.Type == sqllexer.STRING) + case sqllexer.QUOTED_IDENT: + if o.sqlite { + o.writePlaceholder(true) + continue + } + o.writeValue(tok.Value) + case sqllexer.PUNCTUATION: + if o.sqlite && tok.Value == "." && o.prevSQLiteQuoted { + o.pendingSQLiteDot = true + continue + } + o.writeValue(tok.Value) + default: + o.writeValue(tok.Value) + } + } +} + +func (o *queryObfuscator) writePlaceholder(sqliteQuoted bool) { + if o.pendingSQLiteDot { + if sqliteQuoted && o.prevSQLiteQuoted { + o.pendingSQLiteDot = false + return + } + o.out.WriteByte('.') + o.pendingSQLiteDot = false + } + if o.prevPlaceholder { + return + } + o.out.WriteByte('?') + o.prevPlaceholder = true + o.prevSQLiteQuoted = sqliteQuoted +} + +func (o *queryObfuscator) writeValue(value string) { + o.flushSQLiteDot() + o.out.WriteString(value) + o.prevPlaceholder = false + o.prevSQLiteQuoted = false +} + +func (o *queryObfuscator) flushSQLiteDot() { + if !o.pendingSQLiteDot { + return + } + o.out.WriteByte('.') + o.pendingSQLiteDot = false +} + +func obfuscatorDBMS(system DatabaseSystem) sqllexer.DBMSType { + switch system { + case SystemPostgreSQL: + return sqllexer.DBMSPostgres + case SystemMySQL, SystemMariaDB, SystemSQLite: + return sqllexer.DBMSMySQL + case SystemMSSQL: + return sqllexer.DBMSSQLServer + case SystemOracle: + return sqllexer.DBMSOracle + case SystemSnowflake: + return sqllexer.DBMSSnowflake + default: + return "" + } +} diff --git a/sql/span.go b/sql/span.go index ff08ab428..f1bed868b 100644 --- a/sql/span.go +++ b/sql/span.go @@ -21,12 +21,16 @@ func startSpan(ctx context.Context, cfg *config, op, query string) *sentry.Span return nil } + description := query + if cfg != nil { + description = cfg.obfuscateQuery(query) + } + span := parent.StartChild(op, - sentry.WithDescription(query), + sentry.WithDescription(description), sentry.WithSpanOrigin(SpanOrigin), ) - - span.SetData("db.query.text", query) + span.SetData("db.query.text", description) if cfg != nil { if cfg.system != "" { @@ -55,11 +59,12 @@ func startSpan(ctx context.Context, cfg *config, op, query string) *sentry.Span } } - // TODO: on the next PR we add the query parser, we then need to set: + // TODO: add the remaining db span attributes once we have a proper query + // analyzer for them: // - db.operation.name // - db.query.summary // - db.collection.name - // - db.query.parameter. PII gate + // - db.query.parameter. behind the PII gate return span } diff --git a/sql/span_test.go b/sql/span_test.go index 22a17b3f9..8fbf3e6d1 100644 --- a/sql/span_test.go +++ b/sql/span_test.go @@ -35,8 +35,8 @@ func TestStartSpan_SpanData(t *testing.T) { span := startSpan(parent.Context(), cfg, opQuery, "SELECT 1") require.NotNil(t, span) - assert.Equal(t, "SELECT 1", span.Description) - assert.Equal(t, "SELECT 1", span.Data["db.query.text"]) + assert.Equal(t, "SELECT ?", span.Description) + assert.Equal(t, "SELECT ?", span.Data["db.query.text"]) assert.Equal(t, "postgresql", span.Data["db.system.name"]) assert.Equal(t, "pgx", span.Data["db.driver.name"]) assert.Equal(t, "appdb", span.Data["db.namespace"])