Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 30 additions & 22 deletions generator/views/lookml_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,23 +117,27 @@ def _generate_dimensions(client: bigquery.Client, table: str) -> List[Dict[str,
"""
dimensions = {}
for dimension in _generate_dimensions_helper(client.get_table(table).schema):
name = dimension["name"]
name_key = dimension["name"]

# This prevents `time` dimension groups from overwriting other dimensions below
if dimension.get("type") == "time":
name_key += "_time"

# overwrite duplicate "submission", "end", "start" dimension group, thus picking the
# last value sorted by field name, which is submission_timestamp
# See also https://github.com/mozilla/lookml-generator/issues/471
if (
name in dimensions
and name != "submission"
and not name.endswith("end")
and not name.endswith("start")
and not (name == "event" and dimension["type"] == "time")
# workaround for `mozdata.firefox_desktop.desktop_installs`
and not (name == "attribution_dltoken" and dimension["type"] == "time")
Comment on lines -129 to -131

@akkomar akkomar Jul 16, 2024

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We no longer need these workarounds now.

if name_key in dimensions and not (
dimension.get("type") == "time"
and (
dimension["name"] == "submission"
or dimension["name"].endswith("end")
or dimension["name"].endswith("start")
)
):
raise click.ClickException(
f"duplicate dimension {name!r} for table {table!r}"
f"duplicate dimension {name_key!r} for table {table!r}"
)
dimensions[name] = dimension
dimensions[name_key] = dimension
return list(dimensions.values())


Expand All @@ -145,21 +149,25 @@ def _generate_dimensions_from_query(
schema = client.query(query, job_config=job_config).schema
dimensions = {}
for dimension in _generate_dimensions_helper(schema or []):
name = dimension["name"]
name_key = dimension["name"]

# This prevents `time` dimension groups from overwriting other dimensions below
if dimension.get("type") == "time":
name_key += "_time"

# overwrite duplicate "submission", "end", "start" dimension group, thus picking the
# last value sorted by field name, which is submission_timestamp
# See also https://github.com/mozilla/lookml-generator/issues/471
if (
name in dimensions
and name != "submission"
and not name.endswith("end")
and not name.endswith("start")
and not (name == "event" and dimension["type"] == "time")
# workaround for `mozdata.firefox_desktop.desktop_installs`
and not (name == "attribution_dltoken" and dimension["type"] == "time")
if name_key in dimensions and not (
dimension.get("type") == "time"
and (
dimension["name"] == "submission"
or dimension["name"].endswith("end")
or dimension["name"].endswith("start")
)
):
raise click.ClickException(f"duplicate dimension {name!r} in query")
dimensions[name] = dimension
raise click.ClickException(f"duplicate dimension {name_key!r} in query")
dimensions[name_key] = dimension
return list(dimensions.values())


Expand Down
79 changes: 79 additions & 0 deletions tests/test_lookml.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,15 @@ def get_table(self, table_ref):
SchemaField(name="parsed_date", field_type="DATE"),
],
)
if table_ref == "mozdata.pass.duplicate_event_dimension":
return bigquery.Table(
table_ref,
schema=[
SchemaField(name="submission_timestamp", field_type="TIMESTAMP"),
SchemaField(name="event_timestamp", field_type="TIMESTAMP"),
SchemaField(name="event", field_type="STRING"),
],
)
if table_ref == "mozdata.fail.duplicate_client":
return bigquery.Table(
table_ref,
Expand Down Expand Up @@ -1922,6 +1931,76 @@ def test_duplicate_dimension(runner, glean_apps, tmp_path):
_lookml(open(namespaces), glean_apps, "looker-hub/")


def test_duplicate_dimension_event(runner, glean_apps, tmp_path):
namespaces = tmp_path / "namespaces.yaml"
namespaces.write_text(
dedent(
"""
custom:
pretty_name: Custom
glean_app: false
views:
events_stream:
type: table_view
tables:
- channel: release
table: mozdata.pass.duplicate_event_dimension
"""
)
)
with runner.isolated_filesystem():
with patch("google.cloud.bigquery.Client", MockClient):
_lookml(open(namespaces), glean_apps, "looker-hub/")
expected = {
"views": [
{
"dimension_groups": [
{
"name": "event",
"sql": "${TABLE}.event_timestamp",
"timeframes": [
"raw",
"time",
"date",
"week",
"month",
"quarter",
"year",
],
"type": "time",
},
{
"sql": "${TABLE}.submission_timestamp",
"type": "time",
"timeframes": [
"raw",
"time",
"date",
"week",
"month",
"quarter",
"year",
],
"name": "submission",
},
],
"dimensions": [
{"name": "event", "sql": "${TABLE}.event", "type": "string"}
],
"name": "events_stream",
"sql_table_name": "`mozdata.pass.duplicate_event_dimension`",
}
]
}

print_and_test(
lkml.load(lkml.dump(expected)),
lkml.load(
Path("looker-hub/custom/views/events_stream.view.lkml").read_text()
),
)


def test_duplicate_client_id(runner, glean_apps, tmp_path):
namespaces = tmp_path / "namespaces.yaml"
namespaces.write_text(
Expand Down