From 55e17988d86bc272bff84ac45b8a4bd48e9bde5a Mon Sep 17 00:00:00 2001 From: Thejdeep Gudivada Date: Tue, 10 Feb 2026 12:54:29 -0800 Subject: [PATCH 1/4] LIHADOOP-86205: Add view dependency tracking to capture full view lineage chain Adds a ThreadLocal ViewDependencyTracker that records the view dependency chain during Calcite's recursive view expansion. This enables downstream consumers (e.g., Spark lineage) to emit the full view-to-view-to-table dependency graph instead of just flattened base tables. - Add ViewDependency data class and ViewDependencyTracker in coral-common - Instrument ToRelConverter.convertView() to record top-level view entry - Instrument HiveViewExpander.expandView() to record nested view enter/exit - Instrument HiveDbSchema and CoralDatabaseSchema to record base table deps - Add getViewDependencies() API to CoralSpark - Add tests for nested, simple, and base table view dependency scenarios - Add __pycache__/ to .gitignore --- .gitignore | 3 +- .../coral/common/CoralDatabaseSchema.java | 2 + .../linkedin/coral/common/HiveDbSchema.java | 1 + .../linkedin/coral/common/ToRelConverter.java | 9 ++- .../linkedin/coral/common/ViewDependency.java | 55 +++++++++++++ .../coral/common/ViewDependencyTracker.java | 80 +++++++++++++++++++ .../coral/hive/hive2rel/HiveViewExpander.java | 14 +++- .../com/linkedin/coral/spark/CoralSpark.java | 37 ++++++++- .../linkedin/coral/spark/CoralSparkTest.java | 56 +++++++++++++ 9 files changed, 246 insertions(+), 11 deletions(-) create mode 100644 coral-common/src/main/java/com/linkedin/coral/common/ViewDependency.java create mode 100644 coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java diff --git a/.gitignore b/.gitignore index 9264fd73c..bf74bac9b 100644 --- a/.gitignore +++ b/.gitignore @@ -18,4 +18,5 @@ ligradle .DS_Store *.patch */metastore_db -.pyc \ No newline at end of file +.pyc +__pycache__/ \ No newline at end of file diff --git a/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java b/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java index 37b967a63..9860d3b92 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java @@ -96,6 +96,7 @@ public Table getTable(String name) { // Dispatch based on CoralTable implementation type if (coralTable instanceof IcebergTable) { + ViewDependencyTracker.get().recordBaseDependency(dbName, name); return new IcebergCalciteTableAdapter((IcebergTable) coralTable); } else if (coralTable instanceof HiveTable) { HiveTable hiveTable = (HiveTable) coralTable; @@ -103,6 +104,7 @@ public Table getTable(String name) { if (hiveTable.tableType() == VIEW) { return new HiveCalciteViewAdapter(hiveTable, ImmutableList.of(CoralRootSchema.ROOT_SCHEMA, dbName)); } else { + ViewDependencyTracker.get().recordBaseDependency(dbName, name); return new HiveCalciteTableAdapter(hiveTable); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/HiveDbSchema.java b/coral-common/src/main/java/com/linkedin/coral/common/HiveDbSchema.java index 689d40c81..c57f99bac 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/HiveDbSchema.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/HiveDbSchema.java @@ -76,6 +76,7 @@ public Table getTable(String name) { case VIRTUAL_VIEW: return new HiveCalciteViewAdapter(table, ImmutableList.of(HiveSchema.ROOT_SCHEMA, dbName)); default: + ViewDependencyTracker.get().recordBaseDependency(dbName, name); return new HiveCalciteTableAdapter(table); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java index f76150684..c8a934009 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java @@ -162,8 +162,13 @@ public RelNode convertSql(String sql) { * @return Calcite {@link RelNode} representation of hive view definition */ public RelNode convertView(String hiveDbName, String hiveViewName) { - SqlNode sqlNode = processView(hiveDbName, hiveViewName); - return toRel(sqlNode); + ViewDependencyTracker.get().enterView(hiveDbName, hiveViewName); + try { + SqlNode sqlNode = processView(hiveDbName, hiveViewName); + return toRel(sqlNode); + } finally { + ViewDependencyTracker.get().exitView(); + } } // TODO change back to protected once the relevant tests move to the common package diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ViewDependency.java b/coral-common/src/main/java/com/linkedin/coral/common/ViewDependency.java new file mode 100644 index 000000000..34542e2ac --- /dev/null +++ b/coral-common/src/main/java/com/linkedin/coral/common/ViewDependency.java @@ -0,0 +1,55 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.common; + +import java.util.List; +import java.util.Objects; + + +/** + * Represents a single node in the view dependency chain. + * For a view "db.v1" that depends on "db.v2" and "db.t1", + * this would be: ViewDependency("db.v1", ["db.v2", "db.t1"]) + */ +public class ViewDependency { + private final String viewName; + private final List dependencies; + + public ViewDependency(String viewName, List dependencies) { + this.viewName = viewName; + this.dependencies = dependencies; + } + + public String getViewName() { + return viewName; + } + + public List getDependencies() { + return dependencies; + } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + ViewDependency that = (ViewDependency) o; + return Objects.equals(viewName, that.viewName) && Objects.equals(dependencies, that.dependencies); + } + + @Override + public int hashCode() { + return Objects.hash(viewName, dependencies); + } + + @Override + public String toString() { + return "ViewDependency{view=" + viewName + ", deps=" + dependencies + "}"; + } +} diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java b/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java new file mode 100644 index 000000000..56bd52b7f --- /dev/null +++ b/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java @@ -0,0 +1,80 @@ +/** + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. + * Licensed under the BSD-2 Clause license. + * See LICENSE in the project root for license information. + */ +package com.linkedin.coral.common; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + + +/** + * Thread-local tracker that records view to [immediate dependencies] mappings + * during Calcite view expansion. + */ +public class ViewDependencyTracker { + private static final ThreadLocal INSTANCE = + ThreadLocal.withInitial(ViewDependencyTracker::new); + + private final Map> viewDeps = new LinkedHashMap<>(); + + private final Deque expansionStack = new ArrayDeque<>(); + + public static ViewDependencyTracker get() { + return INSTANCE.get(); + } + + public static void reset() { + INSTANCE.remove(); + } + + /** + * Called at the START of expanding a view. + */ + public void enterView(String dbName, String tableName) { + String qualifiedName = dbName + "." + tableName; + // Record this view as a dependency of the current parent + if (!expansionStack.isEmpty()) { + String parent = expansionStack.peek(); + viewDeps.computeIfAbsent(parent, k -> new ArrayList<>()).add(qualifiedName); + } + expansionStack.push(qualifiedName); + } + + /** + * Called when a base table (non-view) is encountered during view expansion. + */ + public void recordBaseDependency(String dbName, String tableName) { + String qualifiedName = dbName + "." + tableName; + if (!expansionStack.isEmpty()) { + String parent = expansionStack.peek(); + viewDeps.computeIfAbsent(parent, k -> new ArrayList<>()).add(qualifiedName); + } + } + + /** + * Called at the END of expanding a view. + */ + public void exitView() { + if (!expansionStack.isEmpty()) { + expansionStack.pop(); + } + } + + /** + * Returns the collected view dependency chain. + * Each entry represents a view and its immediate dependencies (both views and base tables). + */ + public List getViewDependencies() { + List result = new ArrayList<>(); + for (Map.Entry> entry : viewDeps.entrySet()) { + result.add(new ViewDependency(entry.getKey(), entry.getValue())); + } + return result; + } +} diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java index ac81842a9..3b359c12e 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java @@ -1,5 +1,5 @@ /** - * Copyright 2017-2022 LinkedIn Corporation. All rights reserved. + * Copyright 2017-2026 LinkedIn Corporation. All rights reserved. * Licensed under the BSD-2 Clause license. * See LICENSE in the project root for license information. */ @@ -18,6 +18,7 @@ import org.apache.calcite.util.Util; import com.linkedin.coral.common.FuzzyUnionSqlRewriter; +import com.linkedin.coral.common.ViewDependencyTracker; /** @@ -44,8 +45,13 @@ public RelRoot expandView(RelDataType rowType, String queryString, List String dbName = Util.last(schemaPath); String tableName = viewPath.get(0); - SqlNode sqlNode = hiveToRelConverter.processView(dbName, tableName) - .accept(new FuzzyUnionSqlRewriter(tableName, hiveToRelConverter)); - return hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); + ViewDependencyTracker.get().enterView(dbName, tableName); + try { + SqlNode sqlNode = hiveToRelConverter.processView(dbName, tableName) + .accept(new FuzzyUnionSqlRewriter(tableName, hiveToRelConverter)); + return hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); + } finally { + ViewDependencyTracker.get().exitView(); + } } } diff --git a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java index b1694a600..7f7ab93b5 100644 --- a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java +++ b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java @@ -20,6 +20,8 @@ import com.linkedin.coral.com.google.common.collect.ImmutableList; import com.linkedin.coral.common.HiveMetastoreClient; +import com.linkedin.coral.common.ViewDependency; +import com.linkedin.coral.common.ViewDependencyTracker; import com.linkedin.coral.spark.containers.SparkRelInfo; import com.linkedin.coral.spark.containers.SparkUDFInfo; import com.linkedin.coral.spark.dialect.SparkSqlDialect; @@ -43,14 +45,16 @@ public class CoralSpark { private final List baseTables; + private final List viewDependencies; private final List sparkUDFInfoList; private final HiveMetastoreClient hiveMetastoreClient; private final SqlNode sqlNode; private final String sparkSql; - private CoralSpark(List baseTables, List sparkUDFInfoList, String sparkSql, - HiveMetastoreClient hmsClient, SqlNode sqlNode) { + private CoralSpark(List baseTables, List viewDependencies, + List sparkUDFInfoList, String sparkSql, HiveMetastoreClient hmsClient, SqlNode sqlNode) { this.baseTables = baseTables; + this.viewDependencies = viewDependencies; this.sparkUDFInfoList = sparkUDFInfoList; this.sparkSql = sparkSql; this.hiveMetastoreClient = hmsClient; @@ -74,13 +78,16 @@ private CoralSpark(List baseTables, List sparkUDFInfoList, * @return [[CoralSpark]] */ public static CoralSpark create(RelNode irRelNode, HiveMetastoreClient hmsClient) { + // Capture view dependencies that were collected during convertView -> expandView chain + List viewDeps = ViewDependencyTracker.get().getViewDependencies(); + SparkRelInfo sparkRelInfo = IRRelToSparkRelTransformer.transform(irRelNode); Set sparkUDFInfos = sparkRelInfo.getSparkUDFInfos(); RelNode sparkRelNode = sparkRelInfo.getSparkRelNode(); SqlNode sparkSqlNode = constructSparkSqlNode(sparkRelNode, sparkUDFInfos, hmsClient); String sparkSQL = constructSparkSQL(sparkSqlNode); List baseTables = constructBaseTables(sparkRelNode); - return new CoralSpark(baseTables, ImmutableList.copyOf(sparkUDFInfos), sparkSQL, hmsClient, sparkSqlNode); + return new CoralSpark(baseTables, viewDeps, ImmutableList.copyOf(sparkUDFInfos), sparkSQL, hmsClient, sparkSqlNode); } /** @@ -99,6 +106,9 @@ public static CoralSpark create(RelNode irRelNode, Schema schema, HiveMetastoreC } private static CoralSpark createWithAlias(RelNode irRelNode, List aliases, HiveMetastoreClient hmsClient) { + // Capture view dependencies that were collected during convertView -> expandView chain + List viewDeps = ViewDependencyTracker.get().getViewDependencies(); + SparkRelInfo sparkRelInfo = IRRelToSparkRelTransformer.transform(irRelNode); Set sparkUDFInfos = sparkRelInfo.getSparkUDFInfos(); RelNode sparkRelNode = sparkRelInfo.getSparkRelNode(); @@ -111,7 +121,7 @@ private static CoralSpark createWithAlias(RelNode irRelNode, List aliase } String sparkSQL = constructSparkSQL(sparkSqlNode); List baseTables = constructBaseTables(sparkRelNode); - return new CoralSpark(baseTables, ImmutableList.copyOf(sparkUDFInfos), sparkSQL, hmsClient, sparkSqlNode); + return new CoralSpark(baseTables, viewDeps, ImmutableList.copyOf(sparkUDFInfos), sparkSQL, hmsClient, sparkSqlNode); } private static SqlNode constructSparkSqlNode(RelNode sparkRelNode, Set sparkUDFInfos, @@ -165,6 +175,25 @@ public List getBaseTables() { return baseTables; } + /** + * Getter for the view dependency chain collected during view expansion. + * Each {@link ViewDependency} represents a view and its immediate dependencies + * (which can be other views or base tables) in "database_name.table_name" format. + * + *

For example, if view "db.v1" depends on view "db.v2" and table "db.t1", + * and "db.v2" depends on tables "db.t3" and "db.t4", this returns: + *

+   * [ViewDependency("db.v1", ["db.v2", "db.t1"]),
+   *  ViewDependency("db.v2", ["db.t3", "db.t4"])]
+   * 
+ * + * @return List of {@link ViewDependency} representing the view dependency chain, + * or an empty list if no views were involved. + */ + public List getViewDependencies() { + return viewDependencies; + } + /** * Getter for Spark UDF information list: * Additional information required to use an UDF (for details, read [[SparkUDFInfo]]) diff --git a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java index c95f6d7c6..64077cb6f 100644 --- a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java +++ b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java @@ -26,6 +26,8 @@ import org.testng.annotations.Test; import com.linkedin.coral.com.google.common.collect.ImmutableList; +import com.linkedin.coral.common.ViewDependency; +import com.linkedin.coral.common.ViewDependencyTracker; import com.linkedin.coral.hive.hive2rel.functions.StaticHiveFunctionRegistry; import com.linkedin.coral.spark.containers.SparkUDFInfo; import com.linkedin.coral.spark.exceptions.UnsupportedUDFException; @@ -74,6 +76,60 @@ public void testGetBaseTablesFromView() { assertTrue(base_tables.contains("default.bar")); } + @Test + public void testGetViewDependenciesFromNestedView() { + // foo_bar_view depends on foo_view (a view) and bar (a table) + // foo_view depends on foo (a table) + // Expected chain: + // foo_bar_view -> [foo_view, bar] + // foo_view -> [foo] + ViewDependencyTracker.reset(); + RelNode relNode = TestUtils.toRelNode("default", "foo_bar_view"); + CoralSpark coralSpark = createCoralSpark(relNode); + List viewDeps = coralSpark.getViewDependencies(); + + assertFalse(viewDeps.isEmpty(), "Expected view dependencies for nested view"); + + ViewDependency fooBarViewDep = + viewDeps.stream().filter(vd -> vd.getViewName().equals("default.foo_bar_view")).findFirst().orElse(null); + assertNotNull(fooBarViewDep, "Expected dependency entry for foo_bar_view"); + assertTrue(fooBarViewDep.getDependencies().contains("default.foo_view"), "foo_bar_view should depend on foo_view"); + assertTrue(fooBarViewDep.getDependencies().contains("default.bar"), "foo_bar_view should depend on bar"); + + ViewDependency fooViewDep = + viewDeps.stream().filter(vd -> vd.getViewName().equals("default.foo_view")).findFirst().orElse(null); + assertNotNull(fooViewDep, "Expected dependency entry for foo_view"); + assertTrue(fooViewDep.getDependencies().contains("default.foo"), "foo_view should depend on foo"); + } + + @Test + public void testGetViewDependenciesFromSimpleView() { + // foo_view depends only on foo (a base table) + // Expected: single entry foo_view -> [foo] + ViewDependencyTracker.reset(); + RelNode relNode = TestUtils.toRelNode("default", "foo_view"); + CoralSpark coralSpark = createCoralSpark(relNode); + List viewDeps = coralSpark.getViewDependencies(); + + assertFalse(viewDeps.isEmpty(), "Expected view dependencies for simple view"); + assertEquals(viewDeps.size(), 1, "Expected exactly one view dependency entry"); + + ViewDependency dep = viewDeps.get(0); + assertEquals(dep.getViewName(), "default.foo_view"); + assertTrue(dep.getDependencies().contains("default.foo"), "foo_view should depend on foo"); + } + + @Test + public void testGetViewDependenciesFromBaseTable() { + // A base table should have no view dependencies + ViewDependencyTracker.reset(); + RelNode relNode = TestUtils.toRelNode("default", "foo"); + CoralSpark coralSpark = createCoralSpark(relNode); + List viewDeps = coralSpark.getViewDependencies(); + + assertTrue(viewDeps.isEmpty(), "Expected no view dependencies for a base table"); + } + @Test public void testQuotingKeywords() { RelNode relNode = TestUtils.toRelNode("default", "baz_view"); From 588161f702d13022c557d0dfa5d93e31c27ce338 Mon Sep 17 00:00:00 2001 From: Thejdeep Gudivada Date: Thu, 12 Feb 2026 07:09:00 -0800 Subject: [PATCH 2/4] Address comments - add validation for exited view --- .../com/linkedin/coral/common/CoralDatabaseSchema.java | 4 ++-- .../main/java/com/linkedin/coral/common/HiveDbSchema.java | 2 +- .../java/com/linkedin/coral/common/ToRelConverter.java | 6 +++++- .../com/linkedin/coral/common/ViewDependencyTracker.java | 7 ++++--- .../com/linkedin/coral/hive/hive2rel/HiveViewExpander.java | 5 ++++- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java b/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java index 9860d3b92..d496dd320 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java @@ -96,7 +96,7 @@ public Table getTable(String name) { // Dispatch based on CoralTable implementation type if (coralTable instanceof IcebergTable) { - ViewDependencyTracker.get().recordBaseDependency(dbName, name); + ViewDependencyTracker.get().recordBaseTableDependency(dbName, name); return new IcebergCalciteTableAdapter((IcebergTable) coralTable); } else if (coralTable instanceof HiveTable) { HiveTable hiveTable = (HiveTable) coralTable; @@ -104,7 +104,7 @@ public Table getTable(String name) { if (hiveTable.tableType() == VIEW) { return new HiveCalciteViewAdapter(hiveTable, ImmutableList.of(CoralRootSchema.ROOT_SCHEMA, dbName)); } else { - ViewDependencyTracker.get().recordBaseDependency(dbName, name); + ViewDependencyTracker.get().recordBaseTableDependency(dbName, name); return new HiveCalciteTableAdapter(hiveTable); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/HiveDbSchema.java b/coral-common/src/main/java/com/linkedin/coral/common/HiveDbSchema.java index c57f99bac..bed4ba50e 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/HiveDbSchema.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/HiveDbSchema.java @@ -76,7 +76,7 @@ public Table getTable(String name) { case VIRTUAL_VIEW: return new HiveCalciteViewAdapter(table, ImmutableList.of(HiveSchema.ROOT_SCHEMA, dbName)); default: - ViewDependencyTracker.get().recordBaseDependency(dbName, name); + ViewDependencyTracker.get().recordBaseTableDependency(dbName, name); return new HiveCalciteTableAdapter(table); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java index c8a934009..1d86ead0c 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java @@ -11,6 +11,7 @@ import javax.annotation.Nonnull; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import org.apache.calcite.config.CalciteConnectionConfig; @@ -167,7 +168,10 @@ public RelNode convertView(String hiveDbName, String hiveViewName) { SqlNode sqlNode = processView(hiveDbName, hiveViewName); return toRel(sqlNode); } finally { - ViewDependencyTracker.get().exitView(); + String exitedView = ViewDependencyTracker.get().exitView(); + String expectedView = hiveDbName + "." + hiveViewName; + Preconditions.checkState(expectedView.equals(exitedView), + "View mismatch: expected %s but exited with %s", expectedView, exitedView); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java b/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java index 56bd52b7f..c1eeeaff5 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java @@ -49,7 +49,7 @@ public void enterView(String dbName, String tableName) { /** * Called when a base table (non-view) is encountered during view expansion. */ - public void recordBaseDependency(String dbName, String tableName) { + public void recordBaseTableDependency(String dbName, String tableName) { String qualifiedName = dbName + "." + tableName; if (!expansionStack.isEmpty()) { String parent = expansionStack.peek(); @@ -60,10 +60,11 @@ public void recordBaseDependency(String dbName, String tableName) { /** * Called at the END of expanding a view. */ - public void exitView() { + public String exitView() { if (!expansionStack.isEmpty()) { - expansionStack.pop(); + return expansionStack.pop(); } + return null; } /** diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java index 3b359c12e..5587a2721 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java @@ -51,7 +51,10 @@ public RelRoot expandView(RelDataType rowType, String queryString, List .accept(new FuzzyUnionSqlRewriter(tableName, hiveToRelConverter)); return hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); } finally { - ViewDependencyTracker.get().exitView(); + String exitedView = ViewDependencyTracker.get().exitView(); + String expectedView = dbName + "." + tableName; + Preconditions.checkState(expectedView.equals(exitedView), + "View mismatch: expected %s but exited with %s", expectedView, exitedView); } } } From b502f1c4c85ae48ea44c5badb4dd33b1a4b782b2 Mon Sep 17 00:00:00 2001 From: Thejdeep Gudivada Date: Fri, 13 Feb 2026 06:56:01 -0800 Subject: [PATCH 3/4] Prevent too many caller interfaces, introduce withViewExpansion --- .../linkedin/coral/common/ToRelConverter.java | 11 ++---- .../coral/common/ViewDependencyTracker.java | 35 ++++++++++--------- .../coral/hive/hive2rel/HiveViewExpander.java | 10 ++---- 3 files changed, 23 insertions(+), 33 deletions(-) diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java index 1d86ead0c..2bb3419d8 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java @@ -11,7 +11,6 @@ import javax.annotation.Nonnull; -import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import org.apache.calcite.config.CalciteConnectionConfig; @@ -163,16 +162,10 @@ public RelNode convertSql(String sql) { * @return Calcite {@link RelNode} representation of hive view definition */ public RelNode convertView(String hiveDbName, String hiveViewName) { - ViewDependencyTracker.get().enterView(hiveDbName, hiveViewName); - try { + return ViewDependencyTracker.get().withViewExpansion(hiveDbName, hiveViewName, () -> { SqlNode sqlNode = processView(hiveDbName, hiveViewName); return toRel(sqlNode); - } finally { - String exitedView = ViewDependencyTracker.get().exitView(); - String expectedView = hiveDbName + "." + hiveViewName; - Preconditions.checkState(expectedView.equals(exitedView), - "View mismatch: expected %s but exited with %s", expectedView, exitedView); - } + }); } // TODO change back to protected once the relevant tests move to the common package diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java b/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java index c1eeeaff5..72c0f03a3 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java @@ -11,6 +11,7 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.function.Supplier; /** @@ -34,37 +35,37 @@ public static void reset() { } /** - * Called at the START of expanding a view. + * Tracks a view expansion, recording the view as a dependency of any parent view + * currently being expanded, then executing the supplied work within the scope of + * this view. The expansion stack is managed automatically. */ - public void enterView(String dbName, String tableName) { + public T withViewExpansion(String dbName, String tableName, Supplier work) { String qualifiedName = dbName + "." + tableName; - // Record this view as a dependency of the current parent if (!expansionStack.isEmpty()) { String parent = expansionStack.peek(); viewDeps.computeIfAbsent(parent, k -> new ArrayList<>()).add(qualifiedName); } expansionStack.push(qualifiedName); + try { + return work.get(); + } finally { + expansionStack.pop(); + } } /** - * Called when a base table (non-view) is encountered during view expansion. + * Records a base table (non-view) as a dependency of the view currently being expanded. */ public void recordBaseTableDependency(String dbName, String tableName) { String qualifiedName = dbName + "." + tableName; if (!expansionStack.isEmpty()) { String parent = expansionStack.peek(); - viewDeps.computeIfAbsent(parent, k -> new ArrayList<>()).add(qualifiedName); - } - } - - /** - * Called at the END of expanding a view. - */ - public String exitView() { - if (!expansionStack.isEmpty()) { - return expansionStack.pop(); + // Skip self-dependency: this occurs when convertView is called on a base table, + // which pushes the table name onto the stack and then resolves itself. + if (!qualifiedName.equals(parent)) { + viewDeps.computeIfAbsent(parent, k -> new ArrayList<>()).add(qualifiedName); + } } - return null; } /** @@ -74,7 +75,9 @@ public String exitView() { public List getViewDependencies() { List result = new ArrayList<>(); for (Map.Entry> entry : viewDeps.entrySet()) { - result.add(new ViewDependency(entry.getKey(), entry.getValue())); + if (!entry.getValue().isEmpty()) { + result.add(new ViewDependency(entry.getKey(), entry.getValue())); + } } return result; } diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java index 5587a2721..01399f8a8 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java @@ -45,16 +45,10 @@ public RelRoot expandView(RelDataType rowType, String queryString, List String dbName = Util.last(schemaPath); String tableName = viewPath.get(0); - ViewDependencyTracker.get().enterView(dbName, tableName); - try { + return ViewDependencyTracker.get().withViewExpansion(dbName, tableName, () -> { SqlNode sqlNode = hiveToRelConverter.processView(dbName, tableName) .accept(new FuzzyUnionSqlRewriter(tableName, hiveToRelConverter)); return hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); - } finally { - String exitedView = ViewDependencyTracker.get().exitView(); - String expectedView = dbName + "." + tableName; - Preconditions.checkState(expectedView.equals(exitedView), - "View mismatch: expected %s but exited with %s", expectedView, exitedView); - } + }); } } From 991908e98b00ff9f263c54c5342113c37546f88c Mon Sep 17 00:00:00 2001 From: Thejdeep Gudivada Date: Fri, 13 Feb 2026 07:29:49 -0800 Subject: [PATCH 4/4] Store catalog information in deps --- .../coral/common/CoralDatabaseSchema.java | 4 ++-- .../linkedin/coral/common/HiveDbSchema.java | 2 +- .../linkedin/coral/common/ToRelConverter.java | 2 +- .../linkedin/coral/common/ViewDependency.java | 5 +++-- .../coral/common/ViewDependencyTracker.java | 11 ++++++---- .../coral/hive/hive2rel/HiveViewExpander.java | 2 +- .../com/linkedin/coral/spark/CoralSpark.java | 11 +++++----- .../linkedin/coral/spark/CoralSparkTest.java | 20 +++++++++---------- 8 files changed, 31 insertions(+), 26 deletions(-) diff --git a/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java b/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java index d496dd320..5735209a9 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/CoralDatabaseSchema.java @@ -96,7 +96,7 @@ public Table getTable(String name) { // Dispatch based on CoralTable implementation type if (coralTable instanceof IcebergTable) { - ViewDependencyTracker.get().recordBaseTableDependency(dbName, name); + ViewDependencyTracker.get().recordBaseTableDependency(ViewDependencyTracker.OPENHOUSE_CATALOG, dbName, name); return new IcebergCalciteTableAdapter((IcebergTable) coralTable); } else if (coralTable instanceof HiveTable) { HiveTable hiveTable = (HiveTable) coralTable; @@ -104,7 +104,7 @@ public Table getTable(String name) { if (hiveTable.tableType() == VIEW) { return new HiveCalciteViewAdapter(hiveTable, ImmutableList.of(CoralRootSchema.ROOT_SCHEMA, dbName)); } else { - ViewDependencyTracker.get().recordBaseTableDependency(dbName, name); + ViewDependencyTracker.get().recordBaseTableDependency(ViewDependencyTracker.HIVE_CATALOG, dbName, name); return new HiveCalciteTableAdapter(hiveTable); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/HiveDbSchema.java b/coral-common/src/main/java/com/linkedin/coral/common/HiveDbSchema.java index bed4ba50e..e1d9368b5 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/HiveDbSchema.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/HiveDbSchema.java @@ -76,7 +76,7 @@ public Table getTable(String name) { case VIRTUAL_VIEW: return new HiveCalciteViewAdapter(table, ImmutableList.of(HiveSchema.ROOT_SCHEMA, dbName)); default: - ViewDependencyTracker.get().recordBaseTableDependency(dbName, name); + ViewDependencyTracker.get().recordBaseTableDependency(ViewDependencyTracker.HIVE_CATALOG, dbName, name); return new HiveCalciteTableAdapter(table); } } diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java index 2bb3419d8..eb2739c7b 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ToRelConverter.java @@ -162,7 +162,7 @@ public RelNode convertSql(String sql) { * @return Calcite {@link RelNode} representation of hive view definition */ public RelNode convertView(String hiveDbName, String hiveViewName) { - return ViewDependencyTracker.get().withViewExpansion(hiveDbName, hiveViewName, () -> { + return ViewDependencyTracker.get().withViewExpansion(ViewDependencyTracker.HIVE_CATALOG, hiveDbName, hiveViewName, () -> { SqlNode sqlNode = processView(hiveDbName, hiveViewName); return toRel(sqlNode); }); diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ViewDependency.java b/coral-common/src/main/java/com/linkedin/coral/common/ViewDependency.java index 34542e2ac..1b5e8deaf 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ViewDependency.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ViewDependency.java @@ -11,8 +11,9 @@ /** * Represents a single node in the view dependency chain. - * For a view "db.v1" that depends on "db.v2" and "db.t1", - * this would be: ViewDependency("db.v1", ["db.v2", "db.t1"]) + * Names are in "catalog.db.table" format (e.g. "hive.db.v1", "openhouse.db.t1"). + * For a view "hive.db.v1" that depends on "hive.db.v2" and "openhouse.db.t1", + * this would be: ViewDependency("hive.db.v1", ["hive.db.v2", "openhouse.db.t1"]) */ public class ViewDependency { private final String viewName; diff --git a/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java b/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java index 72c0f03a3..3ec8092b4 100644 --- a/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java +++ b/coral-common/src/main/java/com/linkedin/coral/common/ViewDependencyTracker.java @@ -19,6 +19,9 @@ * during Calcite view expansion. */ public class ViewDependencyTracker { + public static final String HIVE_CATALOG = "hive"; + public static final String OPENHOUSE_CATALOG = "openhouse"; + private static final ThreadLocal INSTANCE = ThreadLocal.withInitial(ViewDependencyTracker::new); @@ -39,8 +42,8 @@ public static void reset() { * currently being expanded, then executing the supplied work within the scope of * this view. The expansion stack is managed automatically. */ - public T withViewExpansion(String dbName, String tableName, Supplier work) { - String qualifiedName = dbName + "." + tableName; + public T withViewExpansion(String catalog, String dbName, String tableName, Supplier work) { + String qualifiedName = catalog + "." + dbName + "." + tableName; if (!expansionStack.isEmpty()) { String parent = expansionStack.peek(); viewDeps.computeIfAbsent(parent, k -> new ArrayList<>()).add(qualifiedName); @@ -56,8 +59,8 @@ public T withViewExpansion(String dbName, String tableName, Supplier work /** * Records a base table (non-view) as a dependency of the view currently being expanded. */ - public void recordBaseTableDependency(String dbName, String tableName) { - String qualifiedName = dbName + "." + tableName; + public void recordBaseTableDependency(String catalog, String dbName, String tableName) { + String qualifiedName = catalog + "." + dbName + "." + tableName; if (!expansionStack.isEmpty()) { String parent = expansionStack.peek(); // Skip self-dependency: this occurs when convertView is called on a base table, diff --git a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java index 01399f8a8..38015bb33 100644 --- a/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java +++ b/coral-hive/src/main/java/com/linkedin/coral/hive/hive2rel/HiveViewExpander.java @@ -45,7 +45,7 @@ public RelRoot expandView(RelDataType rowType, String queryString, List String dbName = Util.last(schemaPath); String tableName = viewPath.get(0); - return ViewDependencyTracker.get().withViewExpansion(dbName, tableName, () -> { + return ViewDependencyTracker.get().withViewExpansion(ViewDependencyTracker.HIVE_CATALOG, dbName, tableName, () -> { SqlNode sqlNode = hiveToRelConverter.processView(dbName, tableName) .accept(new FuzzyUnionSqlRewriter(tableName, hiveToRelConverter)); return hiveToRelConverter.getSqlToRelConverter().convertQuery(sqlNode, true, true); diff --git a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java index 7f7ab93b5..a9706bb97 100644 --- a/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java +++ b/coral-spark/src/main/java/com/linkedin/coral/spark/CoralSpark.java @@ -178,13 +178,14 @@ public List getBaseTables() { /** * Getter for the view dependency chain collected during view expansion. * Each {@link ViewDependency} represents a view and its immediate dependencies - * (which can be other views or base tables) in "database_name.table_name" format. + * (which can be other views or base tables) in "catalog.database_name.table_name" format + * (e.g. "hive.db.v1", "openhouse.db.t1"). * - *

For example, if view "db.v1" depends on view "db.v2" and table "db.t1", - * and "db.v2" depends on tables "db.t3" and "db.t4", this returns: + *

For example, if view "hive.db.v1" depends on view "hive.db.v2" and table "openhouse.db.t1", + * and "hive.db.v2" depends on tables "hive.db.t3" and "openhouse.db.t4", this returns: *

-   * [ViewDependency("db.v1", ["db.v2", "db.t1"]),
-   *  ViewDependency("db.v2", ["db.t3", "db.t4"])]
+   * [ViewDependency("hive.db.v1", ["hive.db.v2", "openhouse.db.t1"]),
+   *  ViewDependency("hive.db.v2", ["hive.db.t3", "openhouse.db.t4"])]
    * 
* * @return List of {@link ViewDependency} representing the view dependency chain, diff --git a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java index 64077cb6f..050da49aa 100644 --- a/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java +++ b/coral-spark/src/test/java/com/linkedin/coral/spark/CoralSparkTest.java @@ -81,8 +81,8 @@ public void testGetViewDependenciesFromNestedView() { // foo_bar_view depends on foo_view (a view) and bar (a table) // foo_view depends on foo (a table) // Expected chain: - // foo_bar_view -> [foo_view, bar] - // foo_view -> [foo] + // hive.default.foo_bar_view -> [hive.default.foo_view, hive.default.bar] + // hive.default.foo_view -> [hive.default.foo] ViewDependencyTracker.reset(); RelNode relNode = TestUtils.toRelNode("default", "foo_bar_view"); CoralSpark coralSpark = createCoralSpark(relNode); @@ -91,21 +91,21 @@ public void testGetViewDependenciesFromNestedView() { assertFalse(viewDeps.isEmpty(), "Expected view dependencies for nested view"); ViewDependency fooBarViewDep = - viewDeps.stream().filter(vd -> vd.getViewName().equals("default.foo_bar_view")).findFirst().orElse(null); + viewDeps.stream().filter(vd -> vd.getViewName().equals("hive.default.foo_bar_view")).findFirst().orElse(null); assertNotNull(fooBarViewDep, "Expected dependency entry for foo_bar_view"); - assertTrue(fooBarViewDep.getDependencies().contains("default.foo_view"), "foo_bar_view should depend on foo_view"); - assertTrue(fooBarViewDep.getDependencies().contains("default.bar"), "foo_bar_view should depend on bar"); + assertTrue(fooBarViewDep.getDependencies().contains("hive.default.foo_view"), "foo_bar_view should depend on foo_view"); + assertTrue(fooBarViewDep.getDependencies().contains("hive.default.bar"), "foo_bar_view should depend on bar"); ViewDependency fooViewDep = - viewDeps.stream().filter(vd -> vd.getViewName().equals("default.foo_view")).findFirst().orElse(null); + viewDeps.stream().filter(vd -> vd.getViewName().equals("hive.default.foo_view")).findFirst().orElse(null); assertNotNull(fooViewDep, "Expected dependency entry for foo_view"); - assertTrue(fooViewDep.getDependencies().contains("default.foo"), "foo_view should depend on foo"); + assertTrue(fooViewDep.getDependencies().contains("hive.default.foo"), "foo_view should depend on foo"); } @Test public void testGetViewDependenciesFromSimpleView() { // foo_view depends only on foo (a base table) - // Expected: single entry foo_view -> [foo] + // Expected: single entry hive.default.foo_view -> [hive.default.foo] ViewDependencyTracker.reset(); RelNode relNode = TestUtils.toRelNode("default", "foo_view"); CoralSpark coralSpark = createCoralSpark(relNode); @@ -115,8 +115,8 @@ public void testGetViewDependenciesFromSimpleView() { assertEquals(viewDeps.size(), 1, "Expected exactly one view dependency entry"); ViewDependency dep = viewDeps.get(0); - assertEquals(dep.getViewName(), "default.foo_view"); - assertTrue(dep.getDependencies().contains("default.foo"), "foo_view should depend on foo"); + assertEquals(dep.getViewName(), "hive.default.foo_view"); + assertTrue(dep.getDependencies().contains("hive.default.foo"), "foo_view should depend on foo"); } @Test