Skip to content
Merged

v9.5.0 #2572

Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ac6c823
fix: Update benchmark module version and add to release checklist
piotrszul Mar 6, 2026
d631d48
feat: Implement FHIRPath select() function for collection projection
piotrszul Feb 25, 2026
5417071
feat: Implement FHIRPath repeatAll() function for recursive traversal
piotrszul Feb 26, 2026
a571f76
fix: Address code review issues in repeatAll() implementation
piotrszul Mar 2, 2026
0f787fc
fix: Preserve extension map in ResourceCollection.copyWith()
piotrszul Mar 2, 2026
29cdb3b
docs: Archiving openspec change for repeat-all-function
piotrszul Mar 6, 2026
49344f9
feat: Add configurable depth limit and infinite recursion detection f…
piotrszul Mar 6, 2026
0c6c68f
fix: Detect self-referential primitive traversal in repeatAll() stati…
piotrszul Mar 6, 2026
3cb38aa
feat: Implement FHIRPath repeat() function with equality-based dedupl…
piotrszul Mar 6, 2026
42c72e3
fix: Narrow exception handling scope in UnresolvedTransformTree and i…
piotrszul Mar 9, 2026
112ea9a
fix: Add indeterminate type guard and resource handling to repeatAll/…
piotrszul Mar 9, 2026
818e347
refactor: Rename maxExtensionDepth to maxUnboundTraversalDepth
piotrszul Mar 9, 2026
a0be725
feat: Unify repeat depth configuration across FHIRPath and ViewDefini…
piotrszul Mar 9, 2026
6e73a06
test: Add unit tests for UnresolvedVariantUnwrap expression
piotrszul Mar 10, 2026
8743210
test: Add unit tests for variantTransformTree and variantUnwrap
piotrszul Mar 10, 2026
4c2d750
fix: Resolve SonarQube issues reported on PR #2566
piotrszul Mar 10, 2026
7d7e241
fix: Prevent Catalyst optimizer from corrupting collected view results
piotrszul Mar 11, 2026
99be9fe
fix: Pass correct dataType in StructProduct to support UnsafeRow data
piotrszul Mar 13, 2026
f9c1512
fix: Use recursive element type context for repeat() column transform
piotrszul Mar 16, 2026
e0bc25f
Merge branch 'issue/2388' into release/9.5.0
johngrimes Mar 16, 2026
b4594e2
Merge branch 'issue/2568' into release/9.5.0
johngrimes Mar 16, 2026
8613197
chore: Archive fix-search-result-full-width change
johngrimes Mar 16, 2026
7c9de99
docs: Document select, repeat, and repeatAll FHIRPath functions
johngrimes Mar 16, 2026
299e4fb
chore: Update SQL on FHIR pointer
johngrimes Mar 16, 2026
86155a0
chore: Update Trivy action to v0.35.0 and CLI to v0.69.3
johngrimes Mar 16, 2026
ca7098e
chore: Suppress non-exploitable Trivy findings
johngrimes Mar 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/actions/trivy-scan/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ runs:
steps:
- name: Run security scan (SARIF output)
id: sarif-scan
uses: aquasecurity/trivy-action@0.33.1
uses: aquasecurity/trivy-action@0.35.0
continue-on-error: true
env:
TRIVY_SKIP_DB_UPDATE: ${{ inputs.skip-db-update }}
Expand All @@ -57,11 +57,11 @@ runs:
skip-dirs: ${{ inputs.skip-dirs }}
exit-code: "1"
severity: MEDIUM,HIGH,CRITICAL
version: "v0.69.0"
version: "v0.69.3"

- name: Run security scan (table output on failure)
if: steps.sarif-scan.outcome == 'failure'
uses: aquasecurity/trivy-action@0.33.1
uses: aquasecurity/trivy-action@0.35.0
env:
TRIVY_SKIP_DB_UPDATE: ${{ inputs.skip-db-update }}
TRIVY_SKIP_JAVA_DB_UPDATE: ${{ inputs.skip-db-update }}
Expand All @@ -76,4 +76,4 @@ runs:
skip-setup-trivy: true
exit-code: "1"
severity: MEDIUM,HIGH,CRITICAL
version: "v0.69.0"
version: "v0.69.3"
5 changes: 5 additions & 0 deletions .trivyignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ CVE-2022-46751
CVE-2023-44981
CVE-2024-23944
CVE-2025-58457
CVE-2026-24281
CVE-2026-24308

# Jersey Client is provided via Spark, not bundled in distribution.
CVE-2025-12383
Expand All @@ -41,3 +43,6 @@ CVE-2025-33042
# Not applicable in library context, user controls all compressed input.
# See: https://github.com/aehrc/pathling/issues/2540
CVE-2025-67721

# jackson-core async parser DoS — Pathling uses only synchronous parsing via HAPI FHIR.
GHSA-72hv-8253-57qq
194 changes: 192 additions & 2 deletions encoders/src/main/java/au/csiro/pathling/encoders/ValueFunctions.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@
import org.apache.spark.sql.Column;
import org.apache.spark.sql.catalyst.expressions.Expression;
import org.apache.spark.sql.classic.ColumnConversions$;
import org.apache.spark.sql.functions;
import org.apache.spark.sql.types.DataType;
import scala.Function1;
import scala.collection.immutable.Seq;
import scala.jdk.javaapi.CollectionConverters;
Expand Down Expand Up @@ -204,14 +206,17 @@ public static Column emptyArrayIfMissingField(@Nonnull final Column value) {
* @param extractor An extraction operation to apply at each node that must return an array type
* @param traversals A list of traversal operations to apply recursively to reach child nodes
* @param maxDepth The maximum recursion depth for same-type traversals to prevent infinite loops
* @param errorOnDepthExhaustion If true, throws an error when same-type depth is exhausted
* instead of returning an empty array
* @return A Column containing an array of all extracted values from the tree traversal
*/
@Nonnull
public static Column transformTree(
@Nonnull final Column value,
@Nonnull final UnaryOperator<Column> extractor,
@Nonnull final List<UnaryOperator<Column>> traversals,
final int maxDepth) {
final int maxDepth,
final boolean errorOnDepthExhaustion) {

final List<Function1<Expression, Expression>> x =
traversals.stream()
Expand All @@ -222,7 +227,192 @@ public static Column transformTree(
final Seq<Function1<Expression, Expression>> scalaSeq = CollectionConverters.asScala(x).toSeq();
return column(
new UnresolvedTransformTree(
expression(value), liftToExpression(extractor)::apply, scalaSeq, maxDepth));
expression(value),
liftToExpression(extractor)::apply,
scalaSeq,
scala.Option.empty(),
maxDepth,
errorOnDepthExhaustion));
}

/**
* Performs a recursive tree traversal that extracts values at each level of a hierarchical
* structure, concatenating all extracted results into a single array.
*
* <p>This overload defaults to silent stop on same-type depth exhaustion.
*
* @param value The starting value column to traverse
* @param extractor An extraction operation to apply at each node that must return an array type
* @param traversals A list of traversal operations to apply recursively to reach child nodes
* @param maxDepth The maximum recursion depth for same-type traversals to prevent infinite loops
* @return A Column containing an array of all extracted values from the tree traversal
*/
@Nonnull
public static Column transformTree(
@Nonnull final Column value,
@Nonnull final UnaryOperator<Column> extractor,
@Nonnull final List<UnaryOperator<Column>> traversals,
final int maxDepth) {
return transformTree(value, extractor, traversals, maxDepth, false);
}

/**
* Wraps a column transform so that it converts each result element to Variant. The wrapped
* transform applies the original transform and then maps each element in the resulting array to a
* Variant via {@code to_variant_object()}, producing {@code Array[Variant]}.
*
* <p>This is useful for making arrays from different nesting levels type-compatible, since all
* Variant arrays share the same element type regardless of the original struct schema.
*
* @param transform The original column transform that produces an array
* @return A new transform that produces {@code Array[Variant]}
*/
@Nonnull
public static UnaryOperator<Column> wrapWithVariant(
@Nonnull final UnaryOperator<Column> transform) {
return c -> functions.transform(transform.apply(c), functions::to_variant_object);
}

/**
* Converts an {@code Array[Variant]} column back to an array of a specific target schema using
* {@code variant_get()}. Each Variant element is decoded using the JSON path {@code "$"} (root)
* and the provided target schema string. Missing fields in shallower nesting levels are filled
* with null.
*
* @param variantArray The column containing {@code Array[Variant]}
* @param targetSchema The Spark SQL schema string for the target element type (e.g., {@code
* "STRUCT<linkId: STRING, type: STRING, item: ARRAY<STRUCT<...>>>"})
* @return A column containing an array with elements decoded to the target schema
*/
@Nonnull
public static Column unwrapVariantArray(
@Nonnull final Column variantArray, @Nonnull final String targetSchema) {
return functions.transform(variantArray, v -> functions.variant_get(v, "$", targetSchema));
}

/**
* Converts an {@code Array[Variant]} column back to an array of a specific target data type using
* {@code variant_get()}. This overload accepts a {@link DataType} and converts it to its DDL
* string representation for use with {@code variant_get()}.
*
* @param variantArray The column containing {@code Array[Variant]}
* @param targetType The Spark {@link DataType} for the target element type
* @return A column containing an array with elements decoded to the target type
*/
@Nonnull
public static Column unwrapVariantArray(
@Nonnull final Column variantArray, @Nonnull final DataType targetType) {
return unwrapVariantArray(variantArray, targetType.sql());
}

/**
* Performs a recursive tree traversal with Variant-based schema unification, collecting all
* extracted values across nesting levels regardless of structural differences.
*
* <p>This method solves the schema divergence problem when traversing self-referential FHIR
* structures. At each nesting level, the extractor may produce arrays with different struct
* schemas (deeper levels have fewer fields due to encoding truncation). To make these arrays
* type-compatible for concatenation, each extracted element is converted to Spark's Variant type
* as an intermediate representation. After the tree traversal collects all results, the final
* {@code Array[Variant]} is converted back to the target schema (from level 0, the fullest
* schema), determined lazily at Catalyst analysis time.
*
* @param value The starting value column to traverse
* @param extractor An extraction operation that produces an array at each node
* @param traversals A list of traversal operations for reaching child nodes
* @param maxDepth The maximum recursion depth for same-type traversals
* @param errorOnDepthExhaustion If true, throws an error when same-type depth is exhausted
* instead of returning an empty array
* @return A Column containing an array of extracted values, all conforming to the level-0 schema
*/
@Nonnull
public static Column variantTransformTree(
@Nonnull final Column value,
@Nonnull final UnaryOperator<Column> extractor,
@Nonnull final List<UnaryOperator<Column>> traversals,
final int maxDepth,
final boolean errorOnDepthExhaustion) {

// Wrap the extractor to convert each result element to Variant.
final UnaryOperator<Column> variantExtractor = wrapWithVariant(extractor);

// Run the tree traversal with Variant-wrapped extractor but raw traversals.
final Expression variantTreeExpr =
expression(
transformTree(value, variantExtractor, traversals, maxDepth, errorOnDepthExhaustion));

// Build a schema reference expression by applying the raw extractor to the value. This
// expression will resolve to the level-0 array type, from which the target element type is
// derived during Catalyst analysis.
final Expression schemaRefExpr = liftToExpression(extractor).apply(expression(value));

// Wrap with deferred Variant unwrapping. The UnresolvedVariantUnwrap expression resolves
// once both the tree result and schema reference are resolved, converting each Variant
// element back to the target struct type.
return variantUnwrap(column(variantTreeExpr), column(schemaRefExpr));
}

/**
* Performs a recursive tree traversal with Variant-based schema unification, collecting all
* extracted values across nesting levels regardless of structural differences.
*
* <p>This overload defaults to silent stop on same-type depth exhaustion.
*
* @param value The starting value column to traverse
* @param extractor An extraction operation that produces an array at each node
* @param traversals A list of traversal operations for reaching child nodes
* @param maxDepth The maximum recursion depth for same-type traversals
* @return A Column containing an array of extracted values, all conforming to the level-0 schema
*/
@Nonnull
public static Column variantTransformTree(
@Nonnull final Column value,
@Nonnull final UnaryOperator<Column> extractor,
@Nonnull final List<UnaryOperator<Column>> traversals,
final int maxDepth) {
return variantTransformTree(value, extractor, traversals, maxDepth, false);
}

/**
* Converts an {@code Array[Variant]} column back to a typed array using deferred schema
* resolution. The target element type is determined at Catalyst analysis time from a schema
* reference expression, which should resolve to the fullest (level-0) array type.
*
* <p>This is used by {@code repeatAll()} to convert the concatenated Variant results from all
* nesting levels back to the target struct type, with missing fields in shallower levels filled
* with null.
*
* @param variantArray The column containing {@code Array[Variant]}
* @param schemaRef A column whose resolved type determines the target element schema (should be
* an array type; the element type will be extracted)
* @param failOnError Whether to throw an error when a Variant element cannot be decoded to the
* target schema. When {@code true}, decoding failures cause a runtime exception; when {@code
* false}, they produce {@code null}.
* @return A column containing an array with elements decoded to the schema reference's type
*/
@Nonnull
public static Column variantUnwrap(
@Nonnull final Column variantArray,
@Nonnull final Column schemaRef,
final boolean failOnError) {
return column(
new UnresolvedVariantUnwrap(expression(variantArray), expression(schemaRef), failOnError));
}

/**
* Converts an {@code Array[Variant]} column back to a typed array using deferred schema
* resolution with strict error handling. Equivalent to calling {@link #variantUnwrap(Column,
* Column, boolean)} with {@code failOnError = true}.
*
* @param variantArray The column containing {@code Array[Variant]}
* @param schemaRef A column whose resolved type determines the target element schema (should be
* an array type; the element type will be extracted)
* @return A column containing an array with elements decoded to the schema reference's type
*/
@Nonnull
public static Column variantUnwrap(
@Nonnull final Column variantArray, @Nonnull final Column schemaRef) {
return variantUnwrap(variantArray, schemaRef, true);
}

/**
Expand Down
Loading
Loading