datahub-project · treff7es · Jun 2, 2026 · May 27, 2026 · May 27, 2026 · May 27, 2026
diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py b/metadata-ingestion/src/datahub/ingestion/source/sql/teradata.py
@@ -651,6 +651,39 @@ def optimized_get_pk_constraint(
     return {"constrained_columns": index_columns, "name": index_name}
 
 
+def _read_padded_char_field(row: Any, field: str) -> Optional[str]:
+    """Read a CHAR(N) field that Teradata returns space-padded, return it stripped.
+
+    Returns None when the field is missing or not a string — callers should
+    leave any existing value alone in that case."""
+    raw = getattr(row, field, None) if not isinstance(row, dict) else row.get(field)
+    return raw.strip() if isinstance(raw, str) else None
+
+
+def _strip_padded_nullable(row: Any, col_info: Dict[str, Any]) -> None:
+    """Re-derive col_info['nullable'] by stripping CHAR(1) padding from row.Nullable.
+
+    teradatasqlalchemy does a strict `row['Nullable'] == 'Y'` check, but
+    Teradata returns CHAR(1) values space-padded ('Y '/'N '), so the check
+    evaluates False for every nullable column. No-op when the row's Nullable
+    isn't a string (e.g. None for view columns without QVCI enabled)."""
+    val = _read_padded_char_field(row, "Nullable")
+    if val is not None:
+        col_info["nullable"] = val == "Y"
+
+
+def _strip_padded_autoincrement(row: Any, col_info: Dict[str, Any]) -> None:
+    """Re-derive col_info['autoincrement'] by stripping padding from row.IdColType.
+
+    Same root cause as _strip_padded_nullable: teradatasqlalchemy checks
+    `row['IdColType'] in ('GA', 'GD')` strictly, but Teradata returns IdColType
+    space-padded ('GA  '), so the check evaluates False for every identity
+    column. No-op when the raw value isn't a string."""
+    val = _read_padded_char_field(row, "IdColType")
+    if val is not None:
+        col_info["autoincrement"] = val in ("GA", "GD")
+
+
 def optimized_get_columns(
     self: Any,
     connection: Connection,
@@ -749,6 +782,8 @@ def optimized_get_columns(
     for row in res:
         try:
             col_info = self._get_column_info(row)
+            _strip_padded_nullable(row, col_info)
+            _strip_padded_autoincrement(row, col_info)
 
             # Add CommentString as comment field for column description
             if hasattr(row, "CommentString") and row.CommentString:

diff --git a/metadata-ingestion/tests/unit/test_teradata_source.py b/metadata-ingestion/tests/unit/test_teradata_source.py
@@ -4108,3 +4108,100 @@ def test_disabled_cursor_skips_streaming_entirely(self) -> None:
 
         conn.execution_options.assert_not_called()
         assert result is expected
+
+
+class TestCharPaddingFixes:
+    """Teradata returns CHAR(N) values space-padded over the wire. The library's
+    strict comparisons (`row['Nullable'] == 'Y'`, `row['IdColType'] in ('GA',
+    'GD')`) then evaluate False for every column, so genuinely nullable columns
+    hydrate as nullable=False and identity columns hydrate as
+    autoincrement=False. Exercise the fix through optimized_get_columns so we
+    catch a regression at the actual code path."""
+
+    def _call_with_row(self, row, library_col_info=None):
+        """Drive optimized_get_columns with a single mocked row.
+
+        library_col_info simulates what teradatasqlalchemy's _get_column_info
+        would return; the test then asserts our padding fixes override it."""
+        mock_dialect = MagicMock()
+        mock_dialect.default_schema_name = "mydb"
+        mock_dialect._get_column_info.return_value = library_col_info or {
+            "name": "col1",
+            "nullable": False,
+            "autoincrement": False,
+        }
+        mock_dialect.get_schema_columns.return_value = {"my_table": [row]}
+
+        tables_cache: Dict[str, List[TeradataTable]] = {
+            "mydb": [
+                TeradataTable(
+                    database="mydb",
+                    name="my_table",
+                    description=None,
+                    object_type="Table",
+                    create_timestamp=datetime.now(),
+                    last_alter_name=None,
+                    last_alter_timestamp=None,
+                    request_text=None,
+                )
+            ]
+        }
+        return optimized_get_columns(
+            mock_dialect,
+            MagicMock(),
+            "my_table",
+            "mydb",
+            tables_cache=tables_cache,
+        )
+
+    @staticmethod
+    def _row(**fields):
+        """SQLAlchemy Row-like object with attribute access for given fields."""
+        row = MagicMock(spec=list(fields.keys()) + ["CommentString"])
+        for k, v in fields.items():
+            setattr(row, k, v)
+        row.CommentString = None
+        return row
+
+    # Nullable: customer-reported bug
+    def test_padded_nullable_y_hydrates_as_true(self):
+        cols = self._call_with_row(self._row(Nullable="Y "))
+        assert cols[0]["nullable"] is True
+
+    def test_padded_nullable_n_hydrates_as_false(self):
+        cols = self._call_with_row(self._row(Nullable="N "))
+        assert cols[0]["nullable"] is False
+
+    def test_clean_nullable_y_unchanged(self):
+        cols = self._call_with_row(self._row(Nullable="Y"))
+        assert cols[0]["nullable"] is True
+
+    def test_dict_row_padded_nullable(self):
+        """HELP-derived dict rows must work alongside SQLAlchemy Row objects."""
+        cols = self._call_with_row({"Nullable": "Y ", "ColumnName": "col1"})
+        assert cols[0]["nullable"] is True
+
+    # IdColType: latent sibling bug, same root cause
+    def test_padded_idcoltype_hydrates_as_autoincrement_true(self):
+        """The library does `row['IdColType'] in ('GA','GD')`; Teradata returns
+        'GA  ' padded, so the check fails for every identity column."""
+        cols = self._call_with_row(
+            self._row(Nullable="Y", IdColType="GA  "),
+            library_col_info={"name": "col1", "nullable": True, "autoincrement": False},
+        )
+        assert cols[0]["autoincrement"] is True
+
+    def test_padded_idcoltype_gd_hydrates_as_autoincrement_true(self):
+        cols = self._call_with_row(
+            self._row(Nullable="Y", IdColType="GD  "),
+            library_col_info={"name": "col1", "nullable": True, "autoincrement": False},
+        )
+        assert cols[0]["autoincrement"] is True
+
+    def test_no_idcoltype_leaves_autoincrement_alone(self):
+        """Non-identity columns have IdColType=None; we must not overwrite."""
+        cols = self._call_with_row(
+            self._row(Nullable="Y", IdColType=None),
+            library_col_info={"name": "col1", "nullable": True, "autoincrement": False},
+        )
+        assert cols[0]["autoincrement"] is False