Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import java.util.HashMap;
import java.util.Map;
import lombok.Getter;

/**
* Holds the params that need to be set in the PreparedStatement for constructing the final SQL
Expand Down Expand Up @@ -29,6 +30,18 @@ public static Builder newBuilder() {
return new Builder();
}

/** Wrapper class to hold array parameter metadata for PostgreSQL array binding */
@Getter
public static class ArrayParam {
private final Object[] values;
private final String sqlType;

public ArrayParam(Object[] values, String sqlType) {
this.values = values;
this.sqlType = sqlType;
}
}

public static class Builder {

private int nextIndex;
Expand All @@ -44,6 +57,11 @@ public Builder addObjectParam(Object paramValue) {
return this;
}

public Builder addArrayParam(Object[] values, String sqlType) {
objectParams.put(nextIndex++, new ArrayParam(values, sqlType));
return this;
}

public Params build() {
return new Params(objectParams);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public String parse(

/**
* Generates SQL for scalar IN operator (used when JSONB array field has been unnested). Example:
* "props_dot_source-loc" IN (?::jsonb, ?::jsonb)
* "props_dot_source-loc" = ANY(ARRAY[?::jsonb, ?::jsonb])
*
* <p>Note: After unnesting with jsonb_array_elements(), each row contains a JSONB scalar value.
* We cast the parameters to jsonb for direct JSONB-to-JSONB comparison, which works for all JSONB
Expand All @@ -86,7 +86,7 @@ private String prepareFilterStringForScalarInOperator(
.collect(Collectors.joining(", "));

// Direct JSONB comparison - no text conversion needed
return String.format("%s IN (%s)", parsedLhs, placeholders);
return String.format("%s = ANY(ARRAY[%s])", parsedLhs, placeholders);
}

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
package org.hypertrace.core.documentstore.postgres.query.v1.parser.filter;

import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.hypertrace.core.documentstore.expression.impl.JsonFieldType;
import org.hypertrace.core.documentstore.expression.impl.JsonIdentifierExpression;
Expand All @@ -11,12 +10,13 @@
* Optimized parser for IN operations on JSON primitive fields (string, number, boolean) with proper
* type casting.
*
* <p>Generates efficient SQL using {@code ->>} operator with appropriate PostgreSQL casting:
* <p>Generates efficient SQL using {@code ->>} operator with {@code = ANY(ARRAY[])} and appropriate
* PostgreSQL casting:
*
* <ul>
* <li><b>STRING:</b> {@code "document" ->> 'item' IN ('Soap', 'Shampoo')}
* <li><b>NUMBER:</b> {@code CAST("document" ->> 'price' AS NUMERIC) IN (10, 20)}
* <li><b>BOOLEAN:</b> {@code CAST("document" ->> 'active' AS BOOLEAN) IN (true, false)}
* <li><b>STRING:</b> {@code "document" ->> 'item' = ANY(ARRAY['Soap', 'Shampoo'])}
* <li><b>NUMBER:</b> {@code CAST("document" ->> 'price' AS NUMERIC) = ANY(ARRAY[10, 20])}
* <li><b>BOOLEAN:</b> {@code CAST("document" ->> 'active' AS BOOLEAN) = ANY(ARRAY[true, false])}
* </ul>
*
* <p>This is much more efficient than the defensive approach that checks both array and scalar
Expand Down Expand Up @@ -61,14 +61,15 @@ private String prepareFilterStringForInOperator(
final JsonFieldType fieldType,
final Params.Builder paramsBuilder) {

String placeholders =
StreamSupport.stream(parsedRhs.spliterator(), false)
.map(
value -> {
paramsBuilder.addObjectParam(value);
return "?";
})
.collect(Collectors.joining(", "));
Object[] values = StreamSupport.stream(parsedRhs.spliterator(), false).toArray();

if (values.length == 0) {
// return FALSE
return "1 = 0";
}

String sqlType = mapJsonFieldTypeToSqlType(fieldType);
paramsBuilder.addArrayParam(values, sqlType);

// Apply appropriate casting based on field type
String lhsWithCast = parsedLhs;
Expand All @@ -77,8 +78,18 @@ private String prepareFilterStringForInOperator(
} else if (fieldType == JsonFieldType.BOOLEAN) {
lhsWithCast = String.format("CAST(%s AS BOOLEAN)", parsedLhs);
}
// STRING or null fieldType: no casting needed
return String.format("%s = ANY(?)", lhsWithCast);
}

return String.format("%s IN (%s)", lhsWithCast, placeholders);
private String mapJsonFieldTypeToSqlType(JsonFieldType fieldType) {
switch (fieldType) {
case NUMBER:
return "float8";
case BOOLEAN:
return "bool";
case STRING:
default:
return "text";
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import java.util.stream.StreamSupport;
import org.hypertrace.core.documentstore.expression.impl.ArrayIdentifierExpression;
import org.hypertrace.core.documentstore.expression.impl.RelationalExpression;
import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.nonjson.field.PostgresArrayTypeExtractor;
import org.hypertrace.core.documentstore.postgres.query.v1.parser.filter.nonjson.field.PostgresTypeExtractor;

/**
* Handles EQ/NEQ operations on top-level array columns when RHS is also an array, using exact
Expand Down Expand Up @@ -43,7 +43,7 @@ public String parse(
.collect(Collectors.joining(", "));

ArrayIdentifierExpression arrayExpr = (ArrayIdentifierExpression) expression.getLhs();
String arrayTypeCast = arrayExpr.accept(new PostgresArrayTypeExtractor());
String arrayTypeCast = arrayExpr.accept(PostgresTypeExtractor.arrayType());

// Generate: tags = ARRAY[?, ?]::text[]
if (arrayTypeCast != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,11 @@ public String parse(
}

// Field is NOT unnested - use array containment operator
String arrayTypeCast = expression.getLhs().accept(new PostgresArrayTypeExtractor());
String arrayType = expression.getLhs().accept(PostgresTypeExtractor.arrayType());
// Fallback to text[] if type is unknown
String typeCast = (arrayType != null) ? arrayType : "text[]";

// Use ARRAY[?, ?, ...] syntax with appropriate type cast
if (arrayTypeCast != null && arrayTypeCast.equals("text[]")) {
return String.format("%s @> ARRAY[%s]::text[]", parsedLhs, placeholders);
} else if (arrayTypeCast != null) {
// INTEGER/BOOLEAN/DOUBLE arrays: Use the correct type cast
return String.format("%s @> ARRAY[%s]::%s", parsedLhs, placeholders, arrayTypeCast);
} else {
// Fallback: use text[] cast
return String.format("%s @> ARRAY[%s]::text[]", parsedLhs, placeholders);
}
return String.format("%s @> ARRAY[%s]::%s", parsedLhs, placeholders, typeCast);
}

private Iterable<Object> normalizeToIterable(final Object value) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,36 @@
/**
* PostgreSQL-specific data types with their SQL type strings.
*
* <p>This enum maps generic {@link DataType} values to PostgreSQL-specific type strings used in SQL
* queries for type casting.
* <p>This enum maps generic {@link DataType} values to PostgreSQL internal type names, which work
* for both JDBC's {@code Connection.createArrayOf()} and SQL type casting.
*/
public enum PostgresDataType {
TEXT("text"),
INTEGER("integer"),
BIGINT("bigint"),
REAL("real"),
DOUBLE_PRECISION("double precision"),
BOOLEAN("boolean"),
INTEGER("int4"),
BIGINT("int8"),
REAL("float4"),
DOUBLE_PRECISION("float8"),
BOOLEAN("bool"),
TIMESTAMPTZ("timestamptz"),
DATE("date"),
UNKNOWN("unknown");
UNKNOWN(null);

private final String sqlType;

PostgresDataType(String sqlType) {
this.sqlType = sqlType;
}

/**
* Returns the PostgreSQL type name for use with JDBC's createArrayOf() and SQL casting.
*
* @return The type name (e.g., "int4", "float8", "text")
*/
public String getSqlType() {
return sqlType;
}

/** Returns the array type for SQL casting (e.g., "int4[]", "text[]"). */
public String getArraySqlType() {
return sqlType + "[]";
}
Expand All @@ -38,7 +44,6 @@ public String getArraySqlType() {
*
* @param dataType the generic data type
* @return the corresponding PostgresDataType, or null if UNSPECIFIED
* @throws IllegalArgumentException if the DataType is unknown
*/
public static PostgresDataType fromDataType(DataType dataType) {
switch (dataType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,79 +31,101 @@ public String parse(
final String parsedLhs = expression.getLhs().accept(context.lhsParser());
final Iterable<Object> parsedRhs = expression.getRhs().accept(context.rhsParser());

// Extract element type from expression metadata for type-safe query generation
String sqlType = expression.getLhs().accept(PostgresTypeExtractor.scalarType());

// Check if this field has been unnested - if so, treat it as a scalar
ArrayIdentifierExpression arrayExpr = (ArrayIdentifierExpression) expression.getLhs();
String fieldName = arrayExpr.getName();
if (context.getPgColumnNames().containsKey(fieldName)) {
// Field is unnested - each element is now a scalar, not an array
// Use scalar IN operator instead of array overlap
return prepareFilterStringForScalarInOperator(
parsedLhs, parsedRhs, context.getParamsBuilder());
parsedLhs, parsedRhs, sqlType, context.getParamsBuilder());
}

// Field is NOT unnested - use array overlap logic
String arrayTypeCast = expression.getLhs().accept(new PostgresArrayTypeExtractor());
return prepareFilterStringForArrayInOperator(
parsedLhs, parsedRhs, arrayTypeCast, context.getParamsBuilder());
parsedLhs, parsedRhs, sqlType, context.getParamsBuilder());
}

/**
* Generates SQL for scalar IN operator (used when array field has been unnested). Example:
* "tags_unnested" IN (?, ?, ?)
* "tags_unnested" = ANY(?)
*/
private String prepareFilterStringForScalarInOperator(
final String parsedLhs,
final Iterable<Object> parsedRhs,
final String sqlType,
final Params.Builder paramsBuilder) {
// If type is specified, use optimized ANY(ARRAY[]) syntax
// Otherwise, fall back to traditional IN (?, ?, ?) for backward compatibility
if (sqlType != null) {
Object[] values = StreamSupport.stream(parsedRhs.spliterator(), false).toArray();

String placeholders =
StreamSupport.stream(parsedRhs.spliterator(), false)
.map(
value -> {
paramsBuilder.addObjectParam(value);
return "?";
})
.collect(Collectors.joining(", "));
if (values.length == 0) {
return "1 = 0";
}

// Scalar IN operator for unnested array elements
return String.format("%s IN (%s)", parsedLhs, placeholders);
paramsBuilder.addArrayParam(values, sqlType);
return String.format("%s = ANY(?)", parsedLhs);
} else {
return prepareFilterStringFallback(parsedLhs, parsedRhs, paramsBuilder, "%s IN (%s)");
}
}

/**
* Generates SQL for array overlap operator (used for non-unnested array fields). Example: "tags"
* && ARRAY[?, ?]::text[]
* && ?
*
* <p>Uses a single array parameter.
*/
private String prepareFilterStringForArrayInOperator(
final String parsedLhs,
final Iterable<Object> parsedRhs,
final String arrayType,
final String sqlType,
final Params.Builder paramsBuilder) {
// If type is specified, use optimized array overlap with typed array
// Otherwise, fall back to jsonb-based approach for backward compatibility
if (sqlType != null) {
Object[] values = StreamSupport.stream(parsedRhs.spliterator(), false).toArray();

String placeholders =
if (values.length == 0) {
return "1 = 0";

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

curious, what is this for?

}

paramsBuilder.addArrayParam(values, sqlType);
return String.format("%s && ?", parsedLhs);
} else {
// Fallback: cast both sides to text[] for backward compatibility with any array type
return prepareFilterStringFallback(
parsedLhs, parsedRhs, paramsBuilder, "%s::text[] && ARRAY[%s]::text[]");
}
}

/**
* Fallback method using traditional (?, ?, ?) syntax for backward compatibility when type
* information is not available.
*/
private String prepareFilterStringFallback(
final String parsedLhs,
final Iterable<Object> parsedRhs,
final Params.Builder paramsBuilder,
final String formatPattern) {

String collect =
StreamSupport.stream(parsedRhs.spliterator(), false)
.map(
value -> {
paramsBuilder.addObjectParam(value);
val -> {
paramsBuilder.addObjectParam(val);
return "?";
})
.collect(Collectors.joining(", "));

// Use array overlap operator for array fields
if (arrayType != null) {
// Type-aware optimization
if (arrayType.equals("text[]")) {
// cast RHS to text[] otherwise JDBC binds it as character varying[].
return String.format("%s && ARRAY[%s]::text[]", parsedLhs, placeholders);
} else {
// INTEGER/BOOLEAN arrays: No casting needed, JDBC binds them correctly
// "numbers" && ARRAY[?, ?] (PostgreSQL infers integer[])
// "flags" && ARRAY[?, ?] (PostgreSQL infers boolean[])
return String.format("%s && ARRAY[%s]", parsedLhs, placeholders);
}
} else {
// Fallback: Cast both LHS and RHS to text[] to avoid type mismatch issues. This has the worst
// performance because casting LHS doesn't let PG use indexes on this col
return String.format("%s::text[] && ARRAY[%s]::text[]", parsedLhs, placeholders);
if (collect.isEmpty()) {
return "1 = 0";
}

return String.format(formatPattern, parsedLhs, collect);
}
}
Loading
Loading