From fa5f898f60ce26e7c60464f8e07e4d07514ce5ce Mon Sep 17 00:00:00 2001 From: Arkadiusz Komarzewski Date: Tue, 16 Jul 2024 10:06:00 +0200 Subject: [PATCH 1/3] Allow same names for dimensions and time dimension groups in generated views --- generator/views/lookml_utils.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/generator/views/lookml_utils.py b/generator/views/lookml_utils.py index e4a37c4e..dc32393c 100644 --- a/generator/views/lookml_utils.py +++ b/generator/views/lookml_utils.py @@ -118,6 +118,11 @@ def _generate_dimensions(client: bigquery.Client, table: str) -> List[Dict[str, dimensions = {} for dimension in _generate_dimensions_helper(client.get_table(table).schema): name = dimension["name"] + + # This prevents `time` dimension groups from overwriting other dimensions below + if dimension.get("type") == "time": + name += "_time" + # overwrite duplicate "submission", "end", "start" dimension group, thus picking the # last value sorted by field name, which is submission_timestamp # See also https://github.com/mozilla/lookml-generator/issues/471 @@ -126,9 +131,6 @@ def _generate_dimensions(client: bigquery.Client, table: str) -> List[Dict[str, and name != "submission" and not name.endswith("end") and not name.endswith("start") - and not (name == "event" and dimension["type"] == "time") - # workaround for `mozdata.firefox_desktop.desktop_installs` - and not (name == "attribution_dltoken" and dimension["type"] == "time") ): raise click.ClickException( f"duplicate dimension {name!r} for table {table!r}" @@ -146,6 +148,11 @@ def _generate_dimensions_from_query( dimensions = {} for dimension in _generate_dimensions_helper(schema or []): name = dimension["name"] + + # This prevents `time` dimension groups from overwriting other dimensions below + if dimension.get("type") == "time": + name += "_time" + # overwrite duplicate "submission", "end", "start" dimension group, thus picking the # last value sorted by field name, which is submission_timestamp # See also https://github.com/mozilla/lookml-generator/issues/471 @@ -154,9 +161,6 @@ def _generate_dimensions_from_query( and name != "submission" and not name.endswith("end") and not name.endswith("start") - and not (name == "event" and dimension["type"] == "time") - # workaround for `mozdata.firefox_desktop.desktop_installs` - and not (name == "attribution_dltoken" and dimension["type"] == "time") ): raise click.ClickException(f"duplicate dimension {name!r} in query") dimensions[name] = dimension From 6f18afdb111a108219bba9b25fc150e69a308628 Mon Sep 17 00:00:00 2001 From: Arkadiusz Komarzewski Date: Tue, 16 Jul 2024 14:04:34 +0200 Subject: [PATCH 2/3] add test --- generator/views/lookml_utils.py | 32 ++++++------- tests/test_lookml.py | 79 +++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 16 deletions(-) diff --git a/generator/views/lookml_utils.py b/generator/views/lookml_utils.py index dc32393c..2bf6f7b8 100644 --- a/generator/views/lookml_utils.py +++ b/generator/views/lookml_utils.py @@ -117,25 +117,25 @@ def _generate_dimensions(client: bigquery.Client, table: str) -> List[Dict[str, """ dimensions = {} for dimension in _generate_dimensions_helper(client.get_table(table).schema): - name = dimension["name"] + name_key = dimension["name"] # This prevents `time` dimension groups from overwriting other dimensions below if dimension.get("type") == "time": - name += "_time" + name_key += "_time" # overwrite duplicate "submission", "end", "start" dimension group, thus picking the # last value sorted by field name, which is submission_timestamp # See also https://github.com/mozilla/lookml-generator/issues/471 if ( - name in dimensions - and name != "submission" - and not name.endswith("end") - and not name.endswith("start") + name_key in dimensions + and dimension["name"] != "submission" + and not dimension["name"].endswith("end") + and not dimension["name"].endswith("start") ): raise click.ClickException( - f"duplicate dimension {name!r} for table {table!r}" + f"duplicate dimension {name_key!r} for table {table!r}" ) - dimensions[name] = dimension + dimensions[name_key] = dimension return list(dimensions.values()) @@ -147,23 +147,23 @@ def _generate_dimensions_from_query( schema = client.query(query, job_config=job_config).schema dimensions = {} for dimension in _generate_dimensions_helper(schema or []): - name = dimension["name"] + name_key = dimension["name"] # This prevents `time` dimension groups from overwriting other dimensions below if dimension.get("type") == "time": - name += "_time" + name_key += "_time" # overwrite duplicate "submission", "end", "start" dimension group, thus picking the # last value sorted by field name, which is submission_timestamp # See also https://github.com/mozilla/lookml-generator/issues/471 if ( - name in dimensions - and name != "submission" - and not name.endswith("end") - and not name.endswith("start") + name_key in dimensions + and dimension["name"] != "submission" + and not dimension["name"].endswith("end") + and not dimension["name"].endswith("start") ): - raise click.ClickException(f"duplicate dimension {name!r} in query") - dimensions[name] = dimension + raise click.ClickException(f"duplicate dimension {name_key!r} in query") + dimensions[name_key] = dimension return list(dimensions.values()) diff --git a/tests/test_lookml.py b/tests/test_lookml.py index 8732c8f7..0ba4f939 100644 --- a/tests/test_lookml.py +++ b/tests/test_lookml.py @@ -473,6 +473,15 @@ def get_table(self, table_ref): SchemaField(name="parsed_date", field_type="DATE"), ], ) + if table_ref == "mozdata.pass.duplicate_event_dimension": + return bigquery.Table( + table_ref, + schema=[ + SchemaField(name="submission_timestamp", field_type="TIMESTAMP"), + SchemaField(name="event_timestamp", field_type="TIMESTAMP"), + SchemaField(name="event", field_type="STRING"), + ], + ) if table_ref == "mozdata.fail.duplicate_client": return bigquery.Table( table_ref, @@ -1922,6 +1931,76 @@ def test_duplicate_dimension(runner, glean_apps, tmp_path): _lookml(open(namespaces), glean_apps, "looker-hub/") +def test_duplicate_dimension_event(runner, glean_apps, tmp_path): + namespaces = tmp_path / "namespaces.yaml" + namespaces.write_text( + dedent( + """ + custom: + pretty_name: Custom + glean_app: false + views: + events_stream: + type: table_view + tables: + - channel: release + table: mozdata.pass.duplicate_event_dimension + """ + ) + ) + with runner.isolated_filesystem(): + with patch("google.cloud.bigquery.Client", MockClient): + _lookml(open(namespaces), glean_apps, "looker-hub/") + expected = { + "views": [ + { + "dimension_groups": [ + { + "name": "event", + "sql": "${TABLE}.event_timestamp", + "timeframes": [ + "raw", + "time", + "date", + "week", + "month", + "quarter", + "year", + ], + "type": "time", + }, + { + "sql": "${TABLE}.submission_timestamp", + "type": "time", + "timeframes": [ + "raw", + "time", + "date", + "week", + "month", + "quarter", + "year", + ], + "name": "submission", + }, + ], + "dimensions": [ + {"name": "event", "sql": "${TABLE}.event", "type": "string"} + ], + "name": "events_stream", + "sql_table_name": "`mozdata.pass.duplicate_event_dimension`", + } + ] + } + + print_and_test( + lkml.load(lkml.dump(expected)), + lkml.load( + Path("looker-hub/custom/views/events_stream.view.lkml").read_text() + ), + ) + + def test_duplicate_client_id(runner, glean_apps, tmp_path): namespaces = tmp_path / "namespaces.yaml" namespaces.write_text( From dd8d5b524e888eabb5f0a1874ed7ec21baa557a6 Mon Sep 17 00:00:00 2001 From: Arkadiusz Komarzewski Date: Wed, 17 Jul 2024 11:33:24 +0200 Subject: [PATCH 3/3] narrow down check --- generator/views/lookml_utils.py | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/generator/views/lookml_utils.py b/generator/views/lookml_utils.py index 2bf6f7b8..92f60956 100644 --- a/generator/views/lookml_utils.py +++ b/generator/views/lookml_utils.py @@ -126,11 +126,13 @@ def _generate_dimensions(client: bigquery.Client, table: str) -> List[Dict[str, # overwrite duplicate "submission", "end", "start" dimension group, thus picking the # last value sorted by field name, which is submission_timestamp # See also https://github.com/mozilla/lookml-generator/issues/471 - if ( - name_key in dimensions - and dimension["name"] != "submission" - and not dimension["name"].endswith("end") - and not dimension["name"].endswith("start") + if name_key in dimensions and not ( + dimension.get("type") == "time" + and ( + dimension["name"] == "submission" + or dimension["name"].endswith("end") + or dimension["name"].endswith("start") + ) ): raise click.ClickException( f"duplicate dimension {name_key!r} for table {table!r}" @@ -156,11 +158,13 @@ def _generate_dimensions_from_query( # overwrite duplicate "submission", "end", "start" dimension group, thus picking the # last value sorted by field name, which is submission_timestamp # See also https://github.com/mozilla/lookml-generator/issues/471 - if ( - name_key in dimensions - and dimension["name"] != "submission" - and not dimension["name"].endswith("end") - and not dimension["name"].endswith("start") + if name_key in dimensions and not ( + dimension.get("type") == "time" + and ( + dimension["name"] == "submission" + or dimension["name"].endswith("end") + or dimension["name"].endswith("start") + ) ): raise click.ClickException(f"duplicate dimension {name_key!r} in query") dimensions[name_key] = dimension