diff --git a/server/frontend/src/api.js b/server/frontend/src/api.js index 3d543a7b..9c76e941 100644 --- a/server/frontend/src/api.js +++ b/server/frontend/src/api.js @@ -55,3 +55,6 @@ export const dismissAllNotifications = async () => export const getCurrentBucketSpikes = async (params) => (await mainAxios.get("/reportmanager/rest/bucket-spikes/", { params })).data; + +export const listCountryRankColumns = async () => + (await mainAxios.get("/reportmanager/rest/country-rank-columns/")).data; diff --git a/server/frontend/src/bucket_filter.js b/server/frontend/src/bucket_filter.js index d59c3c9a..486ef7aa 100644 --- a/server/frontend/src/bucket_filter.js +++ b/server/frontend/src/bucket_filter.js @@ -54,11 +54,14 @@ const nodeToQuery = (node) => { "Free-text search isn't supported yet. Use field:value without a space in between, e.g. domain:example.com.", ); } - // We only support equality (`field:value`) for now. liqe also parses comparison - // operators (`field:>=10`, `field:<5`, …); reject them. - if (node.operator?.operator !== ":") { + // liqe parses comparison operators as composite operator strings like + // ":<", ":<=", ":>", ":>=". Allow them for filters that opt in via + // supportsComparison; reject them for everything else. + const op = node.operator?.operator; + const isComparison = op !== ":"; + if (isComparison && op !== ":<" && op !== ":<=" && op !== ":>" && op !== ":>=") { return fail( - `Unsupported operator "${node.operator?.operator}". Only field:value is supported.`, + `Unsupported operator "${op}". Only field:value is supported.`, ); } const key = node.field.name.toLowerCase(); @@ -66,7 +69,13 @@ const nodeToQuery = (node) => { if (!def) { return fail(`Unknown field "${node.field.name}".`); } - return ok(def.toQuery(node.expression.value)); + if (isComparison && !def.supportsComparison) { + return fail( + `Comparison operators are not supported for field "${node.field.name}".`, + ); + } + // Strip the leading ":" so toQuery receives "", "<", "<=", ">", ">=" + return ok(def.toQuery(node.expression.value, op.slice(1))); } case "UnaryOperator": { // both '-' and 'NOT' negate the operand diff --git a/server/frontend/src/bucket_filter_config.js b/server/frontend/src/bucket_filter_config.js index b596d324..749a5c15 100644 --- a/server/frontend/src/bucket_filter_config.js +++ b/server/frontend/src/bucket_filter_config.js @@ -130,6 +130,30 @@ class TriagedBucketState extends BucketState { } } +class CountryRankBucketFilter extends BucketFilter { + supportsComparison = true; + + constructor(countryColumn) { + super(countryColumn, `Rank (${countryColumn})`); + this.countryColumn = countryColumn; + } + + toQuery(value, op = "<=") { + if (typeof value !== "number" || isNaN(value)) { + throw new Error( + `Invalid rank value: ${value}. Use a number, e.g. poland_rank:<=1000.`, + ); + } + const opMap = { "<=": "lte", "<": "lt", ">=": "gte", ">": "gt", "": "lte" }; + const djangoOp = opMap[op] ?? "lte"; + return { + op: "AND", + country_ranks__country: this.countryColumn, + [`country_ranks__rank__${djangoOp}`]: value, + }; + } +} + const bucketFilterList = [ new CountryBucketFilter(), new LabelBucketFilter(), @@ -147,6 +171,13 @@ export const BUCKET_FILTERS = Object.fromEntries( bucketFilterList.map((filter) => [filter.key, filter]), ); +export function registerCountryRankFilters(countryColumns) { + for (const col of countryColumns) { + const filter = new CountryRankBucketFilter(col); + BUCKET_FILTERS[filter.key] = filter; + } +} + export const BUCKET_STATES = Object.fromEntries( bucketStateList.map((state) => [state.key, state]), ); diff --git a/server/frontend/src/main.js b/server/frontend/src/main.js index 4a52f0a8..96ec5c24 100644 --- a/server/frontend/src/main.js +++ b/server/frontend/src/main.js @@ -2,6 +2,8 @@ import { createApp } from "vue"; import FloatingVue from "floating-vue"; import router from "./router.js"; +import { listCountryRankColumns } from "./api.js"; +import { registerCountryRankFilters } from "./bucket_filter_config.js"; import ActivityGraph from "./components/ActivityGraph.vue"; import AssignBtn from "./components/Buckets/AssignBtn.vue"; import BugPublicationForm from "./components/Bugs/PublicationForm.vue"; @@ -43,6 +45,11 @@ const app = createApp({ app.use(router); app.use(FloatingVue); +// Fetch country rank columns before mounting so rank filters (poland_rank:<=1000) +// are available immediately when the user interacts with the filter bar. document.addEventListener("DOMContentLoaded", function () { - app.mount("#app"); + listCountryRankColumns() + .then((columns) => registerCountryRankFilters(columns)) + .catch((e) => console.debug("Failed to load country rank columns:", e)) + .finally(() => app.mount("#app")); }); diff --git a/server/frontend/tests/country_rank_filter.test.js b/server/frontend/tests/country_rank_filter.test.js new file mode 100644 index 00000000..814692cd --- /dev/null +++ b/server/frontend/tests/country_rank_filter.test.js @@ -0,0 +1,96 @@ +import { + BUCKET_FILTERS, + registerCountryRankFilters, +} from "../src/bucket_filter_config.js"; + +describe("CountryRankBucketFilter", () => { + beforeEach(() => { + registerCountryRankFilters(["poland_rank"]); + }); + + afterEach(() => { + delete BUCKET_FILTERS["poland_rank"]; + }); + + test("toQuery with <= produces lte lookup", () => { + const filter = BUCKET_FILTERS["poland_rank"]; + expect(filter.toQuery(1000, "<=")).toEqual({ + op: "AND", + country_ranks__country: "poland_rank", + country_ranks__rank__lte: 1000, + }); + }); + + test("toQuery with < produces lt lookup", () => { + const filter = BUCKET_FILTERS["poland_rank"]; + expect(filter.toQuery(1000, "<")).toEqual({ + op: "AND", + country_ranks__country: "poland_rank", + country_ranks__rank__lt: 1000, + }); + }); + + test("toQuery with >= produces gte lookup", () => { + const filter = BUCKET_FILTERS["poland_rank"]; + expect(filter.toQuery(500, ">=")).toEqual({ + op: "AND", + country_ranks__country: "poland_rank", + country_ranks__rank__gte: 500, + }); + }); + + test("toQuery with > produces gt lookup", () => { + const filter = BUCKET_FILTERS["poland_rank"]; + expect(filter.toQuery(500, ">")).toEqual({ + op: "AND", + country_ranks__country: "poland_rank", + country_ranks__rank__gt: 500, + }); + }); + + test("toQuery with no op defaults to lte", () => { + const filter = BUCKET_FILTERS["poland_rank"]; + expect(filter.toQuery(1000)).toEqual({ + op: "AND", + country_ranks__country: "poland_rank", + country_ranks__rank__lte: 1000, + }); + }); + + test("toQuery throws for a non-numeric value", () => { + const filter = BUCKET_FILTERS["poland_rank"]; + expect(() => filter.toQuery("notanumber", "<=")).toThrow( + /Invalid rank value: notanumber/, + ); + }); + + test("supportsComparison is true", () => { + expect(BUCKET_FILTERS["poland_rank"].supportsComparison).toBe(true); + }); +}); + +describe("registerCountryRankFilters", () => { + afterEach(() => { + delete BUCKET_FILTERS["germany_rank"]; + delete BUCKET_FILTERS["global_rank"]; + }); + + test("adds filter keys for each supplied column", () => { + registerCountryRankFilters(["germany_rank", "global_rank"]); + expect(BUCKET_FILTERS["germany_rank"]).toBeDefined(); + expect(BUCKET_FILTERS["global_rank"]).toBeDefined(); + }); + + test("registered filter has the correct key and countryColumn", () => { + registerCountryRankFilters(["germany_rank"]); + const filter = BUCKET_FILTERS["germany_rank"]; + expect(filter.key).toBe("germany_rank"); + expect(filter.countryColumn).toBe("germany_rank"); + }); + + test("calling with an empty array adds nothing", () => { + const keysBefore = Object.keys(BUCKET_FILTERS).length; + registerCountryRankFilters([]); + expect(Object.keys(BUCKET_FILTERS).length).toBe(keysBefore); + }); +}); diff --git a/server/reportmanager/management/commands/import_country_ranks.py b/server/reportmanager/management/commands/import_country_ranks.py new file mode 100644 index 00000000..7a7102bf --- /dev/null +++ b/server/reportmanager/management/commands/import_country_ranks.py @@ -0,0 +1,159 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +from logging import getLogger + +from django.conf import settings +from django.utils import timezone +from google.cloud import bigquery +from google.oauth2 import service_account + +from django.core.management import BaseCommand + +from reportmanager.models import Bucket, BucketCountryRank +from reportmanager.utils import normalize_domain + +LOG = getLogger("reportmanager.import_country_ranks") + + +class Command(BaseCommand): + help = "Import CrUX country rank data from BigQuery into BucketCountryRank" + + def add_arguments(self, parser): + parser.add_argument( + "--bq-project", + default=None, + help="Override the BigQuery project (default: settings.BIGQUERY_PROJECT)", + ) + parser.add_argument( + "--domains", + nargs="+", + default=None, + help="Limit import to these specific domains. If omitted, imports for all bucket domains.", + ) + + def handle(self, bq_project: str | None, domains: list[str] | None, **options: object) -> None: + project = bq_project or settings.BIGQUERY_PROJECT + + params: dict = {"project": project} + if svc_acct := getattr(settings, "BIGQUERY_SERVICE_ACCOUNT", None): + params["credentials"] = ( + service_account.Credentials.from_service_account_info( + svc_acct, + scopes=[ + "https://www.googleapis.com/auth/bigquery", + "https://www.googleapis.com/auth/drive", + ], + ) + ) + + client = bigquery.Client(**params) + crux_dataset_fn = f"`{project}.webcompat_knowledge_base.CRUX_DATASET`" + + partial = domains is not None + if partial: + # Normalize incoming domains to match the domain_normalized field + domains = list({nd for d in domains if (nd := normalize_domain(d))}) + # Only import for buckets that don't already have rank data. + already_ranked = set( + BucketCountryRank.objects.filter( + bucket__domain_normalized__in=domains + ).values_list("bucket__domain_normalized", flat=True) + ) + buckets = list( + Bucket.objects.filter(domain_normalized__in=domains) + .exclude(domain_normalized__in=already_ranked) + .only("id", "domain_normalized") + ) + domains = [b.domain_normalized for b in buckets] + else: + # Default: import for all bucket domains. + buckets = list( + Bucket.objects.exclude(domain_normalized__isnull=True) + .exclude(domain_normalized="") + .only("id", "domain_normalized") + ) + domains = [b.domain_normalized for b in buckets] + + if not domains: + LOG.info("No buckets with a normalized domain — nothing to import") + return + + LOG.info("Querying ranks for %d bucket domains", len(domains)) + + query = ( + f"SELECT * EXCEPT (yyyymm) " + f"FROM `{project}.crux_imported.host_min_ranks` " + f"WHERE yyyymm = {crux_dataset_fn}() " + f"AND host IN UNNEST(@domains)" + ) + job_config = bigquery.QueryJobConfig( + query_parameters=[ + bigquery.ArrayQueryParameter("domains", "STRING", domains) + ] + ) + rows = client.query(query, job_config=job_config).result() + + # Discover rank columns from result schema (anything ending in _rank) + rank_cols = [f.name for f in rows.schema if f.name.endswith("_rank")] + + if not rank_cols: + LOG.warning("No rank columns found in host_min_ranks result schema") + return + + LOG.info("Found %d rank columns: %s", len(rank_cols), rank_cols) + + # Build host -> {country_col: rank} dict (skip NULL ranks) + host_ranks: dict[str, dict[str, int]] = {} + for row in rows: + host = row["host"] + ranks = {col: row[col] for col in rank_cols if row[col] is not None} + if ranks: + host_ranks[host] = ranks + + LOG.info("Loaded rank data for %d hosts", len(host_ranks)) + + now = timezone.now() + to_upsert: list[BucketCountryRank] = [] + + for bucket in buckets: + ranks = host_ranks.get(bucket.domain_normalized) + if ranks: + for country, rank in ranks.items(): + to_upsert.append( + BucketCountryRank( + bucket_id=bucket.id, + country=country, + rank=rank, + updated_at=now, + ) + ) + + # Upsert in batches of 1000 + upserted_count = 0 + batch_size = 1000 + for i in range(0, len(to_upsert), batch_size): + batch = to_upsert[i : i + batch_size] + BucketCountryRank.objects.bulk_create( + batch, + update_conflicts=True, + update_fields=["rank", "updated_at"], + unique_fields=["bucket", "country"], + ) + upserted_count += len(batch) + + # Only clean up stale rows on a full import. The partial (--domains) + # path only fills in missing data, so there's nothing to clean up. + if not partial: + deleted_count, _ = BucketCountryRank.objects.exclude(updated_at=now).delete() + else: + deleted_count = 0 + + LOG.info( + "import_country_ranks complete: %d rank columns, %d buckets processed, " + "%d rows upserted, %d stale rows deleted", + len(rank_cols), + len(buckets), + upserted_count, + deleted_count, + ) diff --git a/server/reportmanager/management/commands/triage_new_reports.py b/server/reportmanager/management/commands/triage_new_reports.py index 6545154e..5aabefa3 100644 --- a/server/reportmanager/management/commands/triage_new_reports.py +++ b/server/reportmanager/management/commands/triage_new_reports.py @@ -2,7 +2,7 @@ from itertools import batched from logging import getLogger -from django.core.management import BaseCommand +from django.core.management import BaseCommand, call_command from django.utils import timezone from reportmanager.clustering.ClusterBucketManager import ( @@ -246,6 +246,9 @@ def run_triage(job: ClusteringJob) -> None: f"Triage completed successfully. Created {buckets_created} cluster buckets and {fallback_buckets} domain buckets." # noqa ) + if domains: + call_command("import_country_ranks", domains=list(domains)) + except Exception as e: complete_job(job, success=False, error=str(e)) raise diff --git a/server/reportmanager/migrations/0024_bucketcountryrank.py b/server/reportmanager/migrations/0024_bucketcountryrank.py new file mode 100644 index 00000000..d2d2dc9a --- /dev/null +++ b/server/reportmanager/migrations/0024_bucketcountryrank.py @@ -0,0 +1,27 @@ +# Generated by Django 6.0.5 on 2026-06-17 09:51 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('reportmanager', '0023_backfill_domain_normalized'), + ] + + operations = [ + migrations.CreateModel( + name='BucketCountryRank', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('country', models.CharField(max_length=50)), + ('rank', models.IntegerField()), + ('updated_at', models.DateTimeField(auto_now=True)), + ('bucket', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='country_ranks', to='reportmanager.bucket')), + ], + options={ + 'constraints': [models.UniqueConstraint(fields=('bucket', 'country'), name='unique_bucket_country_rank')], + }, + ), + ] diff --git a/server/reportmanager/migrations/0026_remove_auto_now_from_bucketcountryrank.py b/server/reportmanager/migrations/0026_remove_auto_now_from_bucketcountryrank.py new file mode 100644 index 00000000..830da4ee --- /dev/null +++ b/server/reportmanager/migrations/0026_remove_auto_now_from_bucketcountryrank.py @@ -0,0 +1,19 @@ +# Generated by Django 6.0.5 on 2026-06-17 11:19 + +import django.utils.timezone +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('reportmanager', '0024_bucketcountryrank'), + ] + + operations = [ + migrations.AlterField( + model_name='bucketcountryrank', + name='updated_at', + field=models.DateTimeField(default=django.utils.timezone.now), + ), + ] diff --git a/server/reportmanager/models.py b/server/reportmanager/models.py index 0b1b256a..e2770f1e 100644 --- a/server/reportmanager/models.py +++ b/server/reportmanager/models.py @@ -934,6 +934,22 @@ class Meta(TypedModelMeta): ) +class BucketCountryRank(models.Model): + bucket = models.ForeignKey( + Bucket, on_delete=models.CASCADE, related_name="country_ranks" + ) + country = models.CharField(max_length=50) # BQ column name, e.g. "poland_rank" + rank = models.IntegerField() + updated_at = models.DateTimeField(default=timezone.now) + + class Meta: + constraints = [ + models.UniqueConstraint( + fields=["bucket", "country"], name="unique_bucket_country_rank" + ) + ] + + @receiver(post_save, sender=DjangoUser) def add_default_perms(sender, instance, created, **kwargs): if created: diff --git a/server/reportmanager/urls.py b/server/reportmanager/urls.py index a6d08316..dbd3af8c 100644 --- a/server/reportmanager/urls.py +++ b/server/reportmanager/urls.py @@ -150,4 +150,9 @@ r"^usersettings/$", views.UserSettingsEditView.as_view(), name="usersettings" ), re_path(r"^rest/", include(router.urls)), + re_path( + r"^rest/country-rank-columns/$", + views.CountryRankColumnsView.as_view(), + name="country-rank-columns", + ), ] diff --git a/server/reportmanager/views.py b/server/reportmanager/views.py index b860c1c9..db28bd40 100644 --- a/server/reportmanager/views.py +++ b/server/reportmanager/views.py @@ -34,6 +34,7 @@ from django.views.generic.edit import CreateView, DeleteView, UpdateView from django.views.generic.list import ListView from rest_framework import mixins, status, viewsets +from rest_framework.views import APIView from rest_framework.authentication import SessionAuthentication, TokenAuthentication from rest_framework.decorators import action from rest_framework.exceptions import MethodNotAllowed, ValidationError @@ -50,6 +51,7 @@ ) from .models import ( Bucket, + BucketCountryRank, BucketHit, BucketWatch, Bug, @@ -1762,3 +1764,20 @@ class ClusteringJobViewSet(mixins.ListModelMixin, viewsets.GenericViewSet): authentication_classes = (TokenAuthentication, SessionAuthentication) queryset = ClusteringJob.objects.all().order_by("-started_at") serializer_class = ClusteringJobSerializer + + +class CountryRankColumnsView(APIView): + """Returns the distinct country rank column names present in BucketCountryRank. + + Used by the frontend to register rank filters dynamically (e.g. poland_rank:<=1000). + """ + + authentication_classes = (TokenAuthentication, SessionAuthentication) + + def get(self, request): + columns = list( + BucketCountryRank.objects.values_list("country", flat=True) + .distinct() + .order_by("country") + ) + return Response(columns) diff --git a/tests/test_country_rank_columns_api.py b/tests/test_country_rank_columns_api.py new file mode 100644 index 00000000..513faa65 --- /dev/null +++ b/tests/test_country_rank_columns_api.py @@ -0,0 +1,84 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +import pytest +from django.contrib.auth.models import Permission +from django.contrib.auth.models import User as DjangoUser +from django.contrib.contenttypes.models import ContentType +from django.urls import reverse +from rest_framework.authtoken.models import Token +from rest_framework.test import APIClient + +from reportmanager.models import Bucket, BucketCountryRank +from reportmanager.models import User as ReportManagerUser + + +def make_bucket(domain="example.com"): + return Bucket.objects.create(signature='{"symptoms": []}', domain=domain) + + +@pytest.fixture +def authed_client(db): + """Create a user with read permissions and return an authenticated APIClient.""" + user = DjangoUser.objects.create_user( + username="testuser", password="testpass", email="testuser@example.com" + ) + ct = ContentType.objects.get_for_model(ReportManagerUser) + for codename in ("reportmanager_visible", "reportmanager_read"): + perm = Permission.objects.get(content_type=ct, codename=codename) + user.user_permissions.add(perm) + + token, _ = Token.objects.get_or_create(user=user) + client = APIClient() + client.credentials(HTTP_AUTHORIZATION=f"Token {token.key}") + return client + + +@pytest.mark.django_db +class TestCountryRankColumnsEndpoint: + URL = "/reportmanager/rest/country-rank-columns/" + + def test_returns_distinct_country_columns(self, authed_client): + bucket = make_bucket() + BucketCountryRank.objects.create(bucket=bucket, country="poland_rank", rank=10) + BucketCountryRank.objects.create(bucket=bucket, country="us_rank", rank=20) + + response = authed_client.get(self.URL) + + assert response.status_code == 200 + assert response.json() == ["poland_rank", "us_rank"] + + def test_returns_empty_list_when_no_ranks(self, authed_client): + response = authed_client.get(self.URL) + + assert response.status_code == 200 + assert response.json() == [] + + def test_deduplicates_country_values(self, authed_client): + bucket1 = make_bucket(domain="example.com") + bucket2 = make_bucket(domain="other.com") + BucketCountryRank.objects.create(bucket=bucket1, country="germany_rank", rank=1) + BucketCountryRank.objects.create(bucket=bucket2, country="germany_rank", rank=2) + BucketCountryRank.objects.create(bucket=bucket1, country="us_rank", rank=3) + + response = authed_client.get(self.URL) + + assert response.status_code == 200 + assert response.json() == ["germany_rank", "us_rank"] + + def test_returns_columns_sorted_alphabetically(self, authed_client): + bucket = make_bucket() + BucketCountryRank.objects.create(bucket=bucket, country="us_rank", rank=1) + BucketCountryRank.objects.create(bucket=bucket, country="global_rank", rank=2) + BucketCountryRank.objects.create(bucket=bucket, country="poland_rank", rank=3) + + response = authed_client.get(self.URL) + + assert response.status_code == 200 + assert response.json() == ["global_rank", "poland_rank", "us_rank"] + + def test_unauthenticated_returns_403(self, db): + client = APIClient() + response = client.get(self.URL) + + assert response.status_code in (401, 403) diff --git a/tests/test_import_country_ranks.py b/tests/test_import_country_ranks.py new file mode 100644 index 00000000..9fc65d2e --- /dev/null +++ b/tests/test_import_country_ranks.py @@ -0,0 +1,132 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +from unittest.mock import MagicMock, patch + +import pytest +from django.core.management import call_command + +from reportmanager.models import Bucket, BucketCountryRank + + +def make_bucket(domain: str | None = None) -> Bucket: + return Bucket.objects.create(signature='{"symptoms": []}', domain=domain) + + +def make_bq_client(host_rows: list[dict], rank_cols: list[str]) -> MagicMock: + """Build a mock BigQuery client for the single CRUX_DATASET() query. + + The command now issues one query (SELECT * EXCEPT (yyyymm) ... WHERE yyyymm = + CRUX_DATASET()) whose result carries both .schema and row iteration. + """ + schema_fields = [] + for col in ["host"] + rank_cols: + f = MagicMock() + f.name = col + schema_fields.append(f) + + def make_row(data: dict) -> MagicMock: + row = MagicMock() + row.__getitem__ = lambda self, key: data.get(key) + return row + + result = MagicMock() + result.schema = schema_fields + result.__iter__ = lambda self: iter([make_row(r) for r in host_rows]) + + client = MagicMock() + # The command calls client.query(sql, job_config=...).result() + client.query.return_value.result.return_value = result + return client + + + + +@pytest.mark.django_db +class TestImportCountryRanks: + def _run_command(self, client_mock): + with patch( + "reportmanager.management.commands.import_country_ranks.bigquery.Client", + return_value=client_mock, + ): + call_command("import_country_ranks") + + def test_creates_ranks_for_matching_bucket(self): + bucket = make_bucket(domain="example.com") + client = make_bq_client( + host_rows=[{"host": "example.com", "poland_rank": 100, "us_rank": 200}], + rank_cols=["poland_rank", "us_rank"], + ) + self._run_command(client) + + ranks = BucketCountryRank.objects.filter(bucket=bucket) + assert ranks.count() == 2 + poland = ranks.get(country="poland_rank") + assert poland.rank == 100 + us = ranks.get(country="us_rank") + assert us.rank == 200 + + def test_no_rank_created_for_unmatched_bucket(self): + make_bucket(domain="other.com") + client = make_bq_client( + host_rows=[{"host": "example.com", "poland_rank": 100, "us_rank": 200}], + rank_cols=["poland_rank", "us_rank"], + ) + self._run_command(client) + + assert BucketCountryRank.objects.count() == 0 + + def test_stale_rows_are_deleted(self): + bucket = make_bucket(domain="example.com") + # Pre-create a stale row for a domain that is no longer in BQ + BucketCountryRank.objects.create( + bucket=bucket, country="germany_rank", rank=50 + ) + + # BQ data no longer contains example.com at all + client = make_bq_client( + host_rows=[{"host": "other.com", "germany_rank": 1}], + rank_cols=["germany_rank"], + ) + self._run_command(client) + + assert not BucketCountryRank.objects.filter(bucket=bucket).exists() + + def test_rank_is_updated_when_changed(self): + bucket = make_bucket(domain="example.com") + existing = BucketCountryRank.objects.create( + bucket=bucket, country="us_rank", rank=999 + ) + old_updated_at = existing.updated_at + + client = make_bq_client( + host_rows=[{"host": "example.com", "us_rank": 42}], + rank_cols=["us_rank"], + ) + self._run_command(client) + + existing.refresh_from_db() + assert existing.rank == 42 + assert existing.updated_at >= old_updated_at + + def test_null_ranks_are_skipped(self): + bucket = make_bucket(domain="example.com") + client = make_bq_client( + host_rows=[{"host": "example.com", "poland_rank": None, "us_rank": 10}], + rank_cols=["poland_rank", "us_rank"], + ) + self._run_command(client) + + ranks = BucketCountryRank.objects.filter(bucket=bucket) + assert ranks.count() == 1 + assert ranks.get().country == "us_rank" + + def test_bucket_without_domain_is_skipped(self): + make_bucket(domain=None) + client = make_bq_client( + host_rows=[{"host": "example.com", "us_rank": 10}], + rank_cols=["us_rank"], + ) + self._run_command(client) + + assert BucketCountryRank.objects.count() == 0