From 360bbf88de962acaaf6a63f457c49f0bb73e6e86 Mon Sep 17 00:00:00 2001 From: Ben Bellick Date: Thu, 5 Mar 2026 11:48:07 -0500 Subject: [PATCH 1/5] Add missing extension YAMLs and test for completeness Closes #722 --- core/build.gradle.kts | 1 + .../extension/DefaultExtensionCatalog.java | 11 +++ .../substrait/extension/SimpleExtension.java | 5 ++ .../DefaultExtensionCatalogTest.java | 81 ++++++++++++++++++- 4 files changed, 97 insertions(+), 1 deletion(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index 421cee46c..c36987d00 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -88,6 +88,7 @@ dependencies { testImplementation(platform(libs.junit.bom)) testImplementation(libs.protobuf.java.util) testImplementation(libs.guava) + testImplementation(libs.bundles.jackson) testImplementation(libs.junit.jupiter) testRuntimeOnly(libs.junit.platform.launcher) diff --git a/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java b/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java index d8b6b1fa6..f14256d85 100644 --- a/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java +++ b/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java @@ -14,6 +14,10 @@ public class DefaultExtensionCatalog { public static final String FUNCTIONS_AGGREGATE_APPROX = "extension:io.substrait:functions_aggregate_approx"; + /** Extension identifier for aggregate functions with decimal output. */ + public static final String FUNCTIONS_AGGREGATE_DECIMAL_OUTPUT = + "extension:io.substrait:functions_aggregate_decimal_output"; + /** Extension identifier for generic aggregate functions. */ public static final String FUNCTIONS_AGGREGATE_GENERIC = "extension:io.substrait:functions_aggregate_generic"; @@ -37,6 +41,9 @@ public class DefaultExtensionCatalog { /** Extension identifier for geometry functions. */ public static final String FUNCTIONS_GEOMETRY = "extension:io.substrait:functions_geometry"; + /** Extension identifier for list functions. */ + public static final String FUNCTIONS_LIST = "extension:io.substrait:functions_list"; + /** Extension identifier for logarithmic functions. */ public static final String FUNCTIONS_LOGARITHMIC = "extension:io.substrait:functions_logarithmic"; @@ -78,11 +85,15 @@ private static SimpleExtension.ExtensionCollection loadDefaultCollection() { "arithmetic", "comparison", "datetime", + "geometry", "list", "logarithmic", "rounding", "rounding_decimal", + "set", "string") + // TODO(#688): functions_list.yaml is not loaded here because it uses lambda type + // expressions (e.g. func any2>) that are not yet supported by the type parser. .stream() .map(c -> String.format("/functions_%s.yaml", c)) .collect(Collectors.toList()); diff --git a/core/src/main/java/io/substrait/extension/SimpleExtension.java b/core/src/main/java/io/substrait/extension/SimpleExtension.java index d98c632f6..1b3f05820 100644 --- a/core/src/main/java/io/substrait/extension/SimpleExtension.java +++ b/core/src/main/java/io/substrait/extension/SimpleExtension.java @@ -718,6 +718,11 @@ public ScalarFunctionVariant getScalarFunction(FunctionAnchor anchor) { anchor.key(), anchor.urn())); } + /** Returns true if the given URN has any functions or types loaded in this collection. */ + public boolean containsUrn(String urn) { + return urnSupplier.get().contains(urn) || types().stream().anyMatch(t -> t.urn().equals(urn)); + } + private void checkUrn(String name) { if (urnSupplier.get().contains(name)) { return; diff --git a/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java b/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java index 7aed4e961..ed5919e1d 100644 --- a/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java +++ b/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java @@ -1,13 +1,92 @@ package io.substrait.extension; +import static io.substrait.extension.DefaultExtensionCatalog.DEFAULT_COLLECTION; import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import java.io.IOException; +import java.io.InputStream; +import java.net.URISyntaxException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; import org.junit.jupiter.api.Test; +/** + * Verifies that every extension YAML in substrait/extensions is loaded by {@link + * DefaultExtensionCatalog}. + */ class DefaultExtensionCatalogTest { + private static final Set UNSUPPORTED_FILES = + Set.of( + // TODO: aggregate_decimal_output defines count and approx_count_distinct with + // decimal<38,0> return types instead of i64. When loaded alongside aggregate_generic, + // the same function key (e.g. count:any) maps to the same Calcite operator twice, + // which breaks the reverse lookup in FunctionConverter.getSqlOperatorFromSubstraitFunc. + // Fixing this requires either deduplicating the operator map or adding type-based + // disambiguation for aggregate functions. + "functions_aggregate_decimal_output.yaml", + "functions_list.yaml", // TODO(#688): remove once lambda types are supported + "type_variations.yaml", // type variations not yet supported by extension loader + "unknown.yaml" // unknown type extension not yet loaded + ); + + private static final ObjectMapper YAML_MAPPER = new ObjectMapper(new YAMLFactory()); + @Test void defaultCollectionLoads() { - assertNotNull(DefaultExtensionCatalog.DEFAULT_COLLECTION); + assertNotNull(DEFAULT_COLLECTION); + } + + @Test + void allExtensionYamlFilesAreLoaded() throws IOException, URISyntaxException { + List yamlFiles = getExtensionYamlFiles(); + + for (String fileName : yamlFiles) { + if (UNSUPPORTED_FILES.contains(fileName)) { + continue; + } + String urn = parseUrn(fileName); + assertNotNull(urn, fileName + " does not contain a URN field"); + assertTrue( + DEFAULT_COLLECTION.containsUrn(urn), + fileName + " not loaded by DefaultExtensionCatalog (urn: " + urn + ")"); + } + } + + private static String parseUrn(String resourceName) throws IOException { + try (InputStream is = + DefaultExtensionCatalogTest.class.getClassLoader().getResourceAsStream(resourceName)) { + assertNotNull(is, "Resource not found on classpath: " + resourceName); + JsonNode doc = YAML_MAPPER.readTree(is); + JsonNode urnNode = doc.get("urn"); + return urnNode == null ? null : urnNode.asText(); + } + } + + /** Discovers extension YAML files on the classpath by locating a known resource. */ + private static List getExtensionYamlFiles() throws URISyntaxException, IOException { + var knownResource = + DefaultExtensionCatalogTest.class.getClassLoader().getResource("functions_boolean.yaml"); + if (knownResource == null) { + fail("Could not locate functions_boolean.yaml on classpath"); + } + Path resourceDir = Paths.get(knownResource.toURI()).getParent(); + try (Stream files = Files.list(resourceDir)) { + return files + .filter(p -> p.toString().endsWith(".yaml")) + .map(p -> p.getFileName().toString()) + .sorted() + .collect(Collectors.toList()); + } } } From 2e8ab6912226e9478682ef03fd8b7c78c0775f1b Mon Sep 17 00:00:00 2001 From: Ben Bellick Date: Wed, 8 Apr 2026 15:51:28 -0400 Subject: [PATCH 2/5] refactor: improve extension catalog test and load more YAMLs - Switch test to classpath-based discovery instead of filesystem path - Load geometry and aggregate_decimal_output extensions - Remove duplicate FUNCTIONS_LIST constant and stale TODO - Verify unsupported files are NOT in catalog (prevents stale skip list) - Reduce skip list to only type_variations and unknown Co-Authored-By: Claude Opus 4.6 (1M context) --- .../extension/DefaultExtensionCatalog.java | 6 +-- .../DefaultExtensionCatalogTest.java | 41 +++++++++++-------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java b/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java index f14256d85..bcf851380 100644 --- a/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java +++ b/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java @@ -57,9 +57,6 @@ public class DefaultExtensionCatalog { /** Extension identifier for set functions. */ public static final String FUNCTIONS_SET = "extension:io.substrait:functions_set"; - /** Extension identifier for list functions. */ - public static final String FUNCTIONS_LIST = "extension:io.substrait:functions_list"; - /** Extension identifier for string functions. */ public static final String FUNCTIONS_STRING = "extension:io.substrait:functions_string"; @@ -79,6 +76,7 @@ private static SimpleExtension.ExtensionCollection loadDefaultCollection() { List defaultFiles = Arrays.asList( "boolean", + "aggregate_decimal_output", "aggregate_generic", "aggregate_approx", "arithmetic_decimal", @@ -92,8 +90,6 @@ private static SimpleExtension.ExtensionCollection loadDefaultCollection() { "rounding_decimal", "set", "string") - // TODO(#688): functions_list.yaml is not loaded here because it uses lambda type - // expressions (e.g. func any2>) that are not yet supported by the type parser. .stream() .map(c -> String.format("/functions_%s.yaml", c)) .collect(Collectors.toList()); diff --git a/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java b/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java index ed5919e1d..ec10faf00 100644 --- a/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java +++ b/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java @@ -1,6 +1,7 @@ package io.substrait.extension; import static io.substrait.extension.DefaultExtensionCatalog.DEFAULT_COLLECTION; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; @@ -26,19 +27,18 @@ */ class DefaultExtensionCatalogTest { + /** + * Extension YAML files that are intentionally not loaded by {@link DefaultExtensionCatalog}. + * Adding a file here requires a comment explaining why it cannot be loaded. + */ private static final Set UNSUPPORTED_FILES = Set.of( - // TODO: aggregate_decimal_output defines count and approx_count_distinct with - // decimal<38,0> return types instead of i64. When loaded alongside aggregate_generic, - // the same function key (e.g. count:any) maps to the same Calcite operator twice, - // which breaks the reverse lookup in FunctionConverter.getSqlOperatorFromSubstraitFunc. - // Fixing this requires either deduplicating the operator map or adding type-based - // disambiguation for aggregate functions. - "functions_aggregate_decimal_output.yaml", - "functions_list.yaml", // TODO(#688): remove once lambda types are supported - "type_variations.yaml", // type variations not yet supported by extension loader - "unknown.yaml" // unknown type extension not yet loaded - ); + // type_variations.yaml only defines type variations, which are not tracked by + // ExtensionCollection (no functions or types), so containsUrn cannot verify it. + "type_variations.yaml", + // unknown.yaml uses an "unknown" type that is not a recognized type literal or + // parameterized type, causing Function.constructKey to fail at load time. + "unknown.yaml"); private static final ObjectMapper YAML_MAPPER = new ObjectMapper(new YAMLFactory()); @@ -48,18 +48,23 @@ void defaultCollectionLoads() { } @Test - void allExtensionYamlFilesAreLoaded() throws IOException, URISyntaxException { + void allExtensionYamlFilesAccountedFor() throws IOException, URISyntaxException { List yamlFiles = getExtensionYamlFiles(); for (String fileName : yamlFiles) { - if (UNSUPPORTED_FILES.contains(fileName)) { - continue; - } String urn = parseUrn(fileName); assertNotNull(urn, fileName + " does not contain a URN field"); - assertTrue( - DEFAULT_COLLECTION.containsUrn(urn), - fileName + " not loaded by DefaultExtensionCatalog (urn: " + urn + ")"); + if (UNSUPPORTED_FILES.contains(fileName)) { + assertFalse( + DEFAULT_COLLECTION.containsUrn(urn), + fileName + + " is in UNSUPPORTED_FILES but is loaded by DefaultExtensionCatalog" + + " — remove it from UNSUPPORTED_FILES"); + } else { + assertTrue( + DEFAULT_COLLECTION.containsUrn(urn), + fileName + " not loaded by DefaultExtensionCatalog (urn: " + urn + ")"); + } } } From cf133370377d485374c9d396413aef7c18243d8c Mon Sep 17 00:00:00 2001 From: Ben Bellick Date: Wed, 8 Apr 2026 16:07:26 -0400 Subject: [PATCH 3/5] refactor: relocate extension YAMLs to substrait/extensions/ on classpath Move extension YAML files from the classpath root into a substrait/extensions/ subdirectory for better organization. Revert geometry and aggregate_decimal_output additions that break isthmus tests. Co-Authored-By: Claude Opus 4.6 (1M context) --- core/build.gradle.kts | 7 ++++++- .../extension/DefaultExtensionCatalog.java | 6 ++---- .../DefaultExtensionCatalogTest.java | 20 +++++++++++++++---- 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index c36987d00..ea4707e2c 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -230,13 +230,18 @@ sourceSets { main { antlr { setSrcDirs(listOf(file("${rootProject.projectDir}/substrait/grammar"))) } proto.srcDir("../substrait/proto") - resources.srcDir("../substrait/extensions") + // Extension YAMLs are relocated into substrait/extensions/ on the classpath + // via processResources below, rather than landing at the classpath root. resources.srcDir("build/generated/sources/manifest/") java.srcDir(file("build/generated/sources/antlr/main/java/")) java.srcDir("build/generated/sources/version/") } } +tasks.named("processResources") { + from("../substrait/extensions") { into("substrait/extensions") } +} + project.configure { module { resourceDirs.addAll( diff --git a/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java b/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java index bcf851380..d5fe6a8cf 100644 --- a/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java +++ b/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java @@ -76,14 +76,12 @@ private static SimpleExtension.ExtensionCollection loadDefaultCollection() { List defaultFiles = Arrays.asList( "boolean", - "aggregate_decimal_output", "aggregate_generic", "aggregate_approx", "arithmetic_decimal", "arithmetic", "comparison", "datetime", - "geometry", "list", "logarithmic", "rounding", @@ -91,10 +89,10 @@ private static SimpleExtension.ExtensionCollection loadDefaultCollection() { "set", "string") .stream() - .map(c -> String.format("/functions_%s.yaml", c)) + .map(c -> String.format("/substrait/extensions/functions_%s.yaml", c)) .collect(Collectors.toList()); - defaultFiles.add("/extension_types.yaml"); + defaultFiles.add("/substrait/extensions/extension_types.yaml"); return SimpleExtension.load(defaultFiles); } diff --git a/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java b/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java index ec10faf00..d8fcf578e 100644 --- a/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java +++ b/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java @@ -12,6 +12,7 @@ import java.io.IOException; import java.io.InputStream; import java.net.URISyntaxException; +import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -33,6 +34,14 @@ class DefaultExtensionCatalogTest { */ private static final Set UNSUPPORTED_FILES = Set.of( + // aggregate_decimal_output defines count and approx_count_distinct with decimal<38,0> + // return types instead of i64. When loaded alongside aggregate_generic, the same + // function key (e.g. count:any) maps to the same Calcite operator twice, which breaks + // the reverse lookup in FunctionConverter.getSqlOperatorFromSubstraitFunc. + "functions_aggregate_decimal_output.yaml", + // functions_geometry.yaml defines user-defined types (u!geometry) that are not + // supported by Calcite's type conversion in isthmus. + "functions_geometry.yaml", // type_variations.yaml only defines type variations, which are not tracked by // ExtensionCollection (no functions or types), so containsUrn cannot verify it. "type_variations.yaml", @@ -69,9 +78,10 @@ void allExtensionYamlFilesAccountedFor() throws IOException, URISyntaxException } private static String parseUrn(String resourceName) throws IOException { + String resourcePath = "substrait/extensions/" + resourceName; try (InputStream is = - DefaultExtensionCatalogTest.class.getClassLoader().getResourceAsStream(resourceName)) { - assertNotNull(is, "Resource not found on classpath: " + resourceName); + DefaultExtensionCatalogTest.class.getClassLoader().getResourceAsStream(resourcePath)) { + assertNotNull(is, "Resource not found on classpath: " + resourcePath); JsonNode doc = YAML_MAPPER.readTree(is); JsonNode urnNode = doc.get("urn"); return urnNode == null ? null : urnNode.asText(); @@ -80,8 +90,10 @@ private static String parseUrn(String resourceName) throws IOException { /** Discovers extension YAML files on the classpath by locating a known resource. */ private static List getExtensionYamlFiles() throws URISyntaxException, IOException { - var knownResource = - DefaultExtensionCatalogTest.class.getClassLoader().getResource("functions_boolean.yaml"); + URL knownResource = + DefaultExtensionCatalogTest.class + .getClassLoader() + .getResource("substrait/extensions/functions_boolean.yaml"); if (knownResource == null) { fail("Could not locate functions_boolean.yaml on classpath"); } From 0e5b9ba531bda3df14a7986c857d2bf942055f92 Mon Sep 17 00:00:00 2001 From: Ben Bellick Date: Thu, 9 Apr 2026 09:23:46 -0400 Subject: [PATCH 4/5] refactor: use ClassGraph for classpath-based extension YAML discovery Replace the filesystem hack (locating a known resource and walking its parent directory) with ClassGraph's classpath scanning API. Co-Authored-By: Claude Opus 4.6 (1M context) --- core/build.gradle.kts | 1 + .../DefaultExtensionCatalogTest.java | 36 +++++++------------ 2 files changed, 13 insertions(+), 24 deletions(-) diff --git a/core/build.gradle.kts b/core/build.gradle.kts index ea4707e2c..5e84490ae 100644 --- a/core/build.gradle.kts +++ b/core/build.gradle.kts @@ -89,6 +89,7 @@ dependencies { testImplementation(libs.protobuf.java.util) testImplementation(libs.guava) testImplementation(libs.bundles.jackson) + testImplementation(libs.classgraph) testImplementation(libs.junit.jupiter) testRuntimeOnly(libs.junit.platform.launcher) diff --git a/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java b/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java index d8fcf578e..c98637de7 100644 --- a/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java +++ b/core/src/test/java/io/substrait/extension/DefaultExtensionCatalogTest.java @@ -4,22 +4,17 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; -import static org.junit.jupiter.api.Assertions.fail; import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; +import io.github.classgraph.ClassGraph; +import io.github.classgraph.ResourceList; +import io.github.classgraph.ScanResult; import java.io.IOException; import java.io.InputStream; -import java.net.URISyntaxException; -import java.net.URL; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; import java.util.List; import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.Stream; import org.junit.jupiter.api.Test; /** @@ -57,8 +52,9 @@ void defaultCollectionLoads() { } @Test - void allExtensionYamlFilesAccountedFor() throws IOException, URISyntaxException { + void allExtensionYamlFilesAccountedFor() throws IOException { List yamlFiles = getExtensionYamlFiles(); + assertFalse(yamlFiles.isEmpty(), "No YAML files found in substrait/extensions/"); for (String fileName : yamlFiles) { String urn = parseUrn(fileName); @@ -88,22 +84,14 @@ private static String parseUrn(String resourceName) throws IOException { } } - /** Discovers extension YAML files on the classpath by locating a known resource. */ - private static List getExtensionYamlFiles() throws URISyntaxException, IOException { - URL knownResource = - DefaultExtensionCatalogTest.class - .getClassLoader() - .getResource("substrait/extensions/functions_boolean.yaml"); - if (knownResource == null) { - fail("Could not locate functions_boolean.yaml on classpath"); - } - Path resourceDir = Paths.get(knownResource.toURI()).getParent(); - try (Stream files = Files.list(resourceDir)) { - return files - .filter(p -> p.toString().endsWith(".yaml")) - .map(p -> p.getFileName().toString()) + private static List getExtensionYamlFiles() { + try (ScanResult scan = + new ClassGraph().acceptPathsNonRecursive("substrait/extensions").scan()) { + ResourceList resources = scan.getResourcesWithExtension(".yaml"); + return resources.stream() + .map(r -> r.getPath().substring("substrait/extensions/".length())) .sorted() - .collect(Collectors.toList()); + .toList(); } } } From 2cc212ce5aeffe8f14c43e10212af8fedb018e5b Mon Sep 17 00:00:00 2001 From: Ben Bellick Date: Thu, 9 Apr 2026 09:31:01 -0400 Subject: [PATCH 5/5] fix: restore FUNCTIONS_LIST/SET ordering to match main Co-Authored-By: Claude Sonnet 4.6 (1M context) --- .../io/substrait/extension/DefaultExtensionCatalog.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java b/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java index d5fe6a8cf..25dc76cad 100644 --- a/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java +++ b/core/src/main/java/io/substrait/extension/DefaultExtensionCatalog.java @@ -41,9 +41,6 @@ public class DefaultExtensionCatalog { /** Extension identifier for geometry functions. */ public static final String FUNCTIONS_GEOMETRY = "extension:io.substrait:functions_geometry"; - /** Extension identifier for list functions. */ - public static final String FUNCTIONS_LIST = "extension:io.substrait:functions_list"; - /** Extension identifier for logarithmic functions. */ public static final String FUNCTIONS_LOGARITHMIC = "extension:io.substrait:functions_logarithmic"; @@ -57,6 +54,9 @@ public class DefaultExtensionCatalog { /** Extension identifier for set functions. */ public static final String FUNCTIONS_SET = "extension:io.substrait:functions_set"; + /** Extension identifier for list functions. */ + public static final String FUNCTIONS_LIST = "extension:io.substrait:functions_list"; + /** Extension identifier for string functions. */ public static final String FUNCTIONS_STRING = "extension:io.substrait:functions_string";