diff --git a/generator/views/lookml_utils.py b/generator/views/lookml_utils.py index e4a37c4e..92f60956 100644 --- a/generator/views/lookml_utils.py +++ b/generator/views/lookml_utils.py @@ -117,23 +117,27 @@ def _generate_dimensions(client: bigquery.Client, table: str) -> List[Dict[str, """ dimensions = {} for dimension in _generate_dimensions_helper(client.get_table(table).schema): - name = dimension["name"] + name_key = dimension["name"] + + # This prevents `time` dimension groups from overwriting other dimensions below + if dimension.get("type") == "time": + name_key += "_time" + # overwrite duplicate "submission", "end", "start" dimension group, thus picking the # last value sorted by field name, which is submission_timestamp # See also https://github.com/mozilla/lookml-generator/issues/471 - if ( - name in dimensions - and name != "submission" - and not name.endswith("end") - and not name.endswith("start") - and not (name == "event" and dimension["type"] == "time") - # workaround for `mozdata.firefox_desktop.desktop_installs` - and not (name == "attribution_dltoken" and dimension["type"] == "time") + if name_key in dimensions and not ( + dimension.get("type") == "time" + and ( + dimension["name"] == "submission" + or dimension["name"].endswith("end") + or dimension["name"].endswith("start") + ) ): raise click.ClickException( - f"duplicate dimension {name!r} for table {table!r}" + f"duplicate dimension {name_key!r} for table {table!r}" ) - dimensions[name] = dimension + dimensions[name_key] = dimension return list(dimensions.values()) @@ -145,21 +149,25 @@ def _generate_dimensions_from_query( schema = client.query(query, job_config=job_config).schema dimensions = {} for dimension in _generate_dimensions_helper(schema or []): - name = dimension["name"] + name_key = dimension["name"] + + # This prevents `time` dimension groups from overwriting other dimensions below + if dimension.get("type") == "time": + name_key += "_time" + # overwrite duplicate "submission", "end", "start" dimension group, thus picking the # last value sorted by field name, which is submission_timestamp # See also https://github.com/mozilla/lookml-generator/issues/471 - if ( - name in dimensions - and name != "submission" - and not name.endswith("end") - and not name.endswith("start") - and not (name == "event" and dimension["type"] == "time") - # workaround for `mozdata.firefox_desktop.desktop_installs` - and not (name == "attribution_dltoken" and dimension["type"] == "time") + if name_key in dimensions and not ( + dimension.get("type") == "time" + and ( + dimension["name"] == "submission" + or dimension["name"].endswith("end") + or dimension["name"].endswith("start") + ) ): - raise click.ClickException(f"duplicate dimension {name!r} in query") - dimensions[name] = dimension + raise click.ClickException(f"duplicate dimension {name_key!r} in query") + dimensions[name_key] = dimension return list(dimensions.values()) diff --git a/tests/test_lookml.py b/tests/test_lookml.py index 8732c8f7..0ba4f939 100644 --- a/tests/test_lookml.py +++ b/tests/test_lookml.py @@ -473,6 +473,15 @@ def get_table(self, table_ref): SchemaField(name="parsed_date", field_type="DATE"), ], ) + if table_ref == "mozdata.pass.duplicate_event_dimension": + return bigquery.Table( + table_ref, + schema=[ + SchemaField(name="submission_timestamp", field_type="TIMESTAMP"), + SchemaField(name="event_timestamp", field_type="TIMESTAMP"), + SchemaField(name="event", field_type="STRING"), + ], + ) if table_ref == "mozdata.fail.duplicate_client": return bigquery.Table( table_ref, @@ -1922,6 +1931,76 @@ def test_duplicate_dimension(runner, glean_apps, tmp_path): _lookml(open(namespaces), glean_apps, "looker-hub/") +def test_duplicate_dimension_event(runner, glean_apps, tmp_path): + namespaces = tmp_path / "namespaces.yaml" + namespaces.write_text( + dedent( + """ + custom: + pretty_name: Custom + glean_app: false + views: + events_stream: + type: table_view + tables: + - channel: release + table: mozdata.pass.duplicate_event_dimension + """ + ) + ) + with runner.isolated_filesystem(): + with patch("google.cloud.bigquery.Client", MockClient): + _lookml(open(namespaces), glean_apps, "looker-hub/") + expected = { + "views": [ + { + "dimension_groups": [ + { + "name": "event", + "sql": "${TABLE}.event_timestamp", + "timeframes": [ + "raw", + "time", + "date", + "week", + "month", + "quarter", + "year", + ], + "type": "time", + }, + { + "sql": "${TABLE}.submission_timestamp", + "type": "time", + "timeframes": [ + "raw", + "time", + "date", + "week", + "month", + "quarter", + "year", + ], + "name": "submission", + }, + ], + "dimensions": [ + {"name": "event", "sql": "${TABLE}.event", "type": "string"} + ], + "name": "events_stream", + "sql_table_name": "`mozdata.pass.duplicate_event_dimension`", + } + ] + } + + print_and_test( + lkml.load(lkml.dump(expected)), + lkml.load( + Path("looker-hub/custom/views/events_stream.view.lkml").read_text() + ), + ) + + def test_duplicate_client_id(runner, glean_apps, tmp_path): namespaces = tmp_path / "namespaces.yaml" namespaces.write_text(