Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions server/frontend/src/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,6 @@ export const dismissAllNotifications = async () =>

export const getCurrentBucketSpikes = async (params) =>
(await mainAxios.get("/reportmanager/rest/bucket-spikes/", { params })).data;

export const listCountryRankColumns = async () =>
(await mainAxios.get("/reportmanager/rest/country-rank-columns/")).data;
19 changes: 14 additions & 5 deletions server/frontend/src/bucket_filter.js
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,28 @@ const nodeToQuery = (node) => {
"Free-text search isn't supported yet. Use field:value without a space in between, e.g. domain:example.com.",
);
}
// We only support equality (`field:value`) for now. liqe also parses comparison
// operators (`field:>=10`, `field:<5`, …); reject them.
if (node.operator?.operator !== ":") {
// liqe parses comparison operators as composite operator strings like
// ":<", ":<=", ":>", ":>=". Allow them for filters that opt in via
// supportsComparison; reject them for everything else.
const op = node.operator?.operator;
const isComparison = op !== ":";
if (isComparison && op !== ":<" && op !== ":<=" && op !== ":>" && op !== ":>=") {
return fail(
`Unsupported operator "${node.operator?.operator}". Only field:value is supported.`,
`Unsupported operator "${op}". Only field:value is supported.`,
);
}
const key = node.field.name.toLowerCase();
const def = BUCKET_FILTERS[key];
if (!def) {
return fail(`Unknown field "${node.field.name}".`);
}
return ok(def.toQuery(node.expression.value));
if (isComparison && !def.supportsComparison) {
return fail(
`Comparison operators are not supported for field "${node.field.name}".`,
);
}
// Strip the leading ":" so toQuery receives "", "<", "<=", ">", ">="
return ok(def.toQuery(node.expression.value, op.slice(1)));
}
case "UnaryOperator": {
// both '-' and 'NOT' negate the operand
Expand Down
31 changes: 31 additions & 0 deletions server/frontend/src/bucket_filter_config.js
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,30 @@ class TriagedBucketState extends BucketState {
}
}

class CountryRankBucketFilter extends BucketFilter {
supportsComparison = true;

constructor(countryColumn) {
super(countryColumn, `Rank (${countryColumn})`);
this.countryColumn = countryColumn;
}

toQuery(value, op = "<=") {
if (typeof value !== "number" || isNaN(value)) {
throw new Error(
`Invalid rank value: ${value}. Use a number, e.g. poland_rank:<=1000.`,
);
}
const opMap = { "<=": "lte", "<": "lt", ">=": "gte", ">": "gt", "": "lte" };
const djangoOp = opMap[op] ?? "lte";
return {
op: "AND",
country_ranks__country: this.countryColumn,
[`country_ranks__rank__${djangoOp}`]: value,
};
}
}

const bucketFilterList = [
new CountryBucketFilter(),
new LabelBucketFilter(),
Expand All @@ -147,6 +171,13 @@ export const BUCKET_FILTERS = Object.fromEntries(
bucketFilterList.map((filter) => [filter.key, filter]),
);

export function registerCountryRankFilters(countryColumns) {
for (const col of countryColumns) {
const filter = new CountryRankBucketFilter(col);
BUCKET_FILTERS[filter.key] = filter;
}
}

export const BUCKET_STATES = Object.fromEntries(
bucketStateList.map((state) => [state.key, state]),
);
9 changes: 8 additions & 1 deletion server/frontend/src/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ import { createApp } from "vue";
import FloatingVue from "floating-vue";

import router from "./router.js";
import { listCountryRankColumns } from "./api.js";
import { registerCountryRankFilters } from "./bucket_filter_config.js";
import ActivityGraph from "./components/ActivityGraph.vue";
import AssignBtn from "./components/Buckets/AssignBtn.vue";
import BugPublicationForm from "./components/Bugs/PublicationForm.vue";
Expand Down Expand Up @@ -43,6 +45,11 @@ const app = createApp({
app.use(router);
app.use(FloatingVue);

// Fetch country rank columns before mounting so rank filters (poland_rank:<=1000)
// are available immediately when the user interacts with the filter bar.
document.addEventListener("DOMContentLoaded", function () {
app.mount("#app");
listCountryRankColumns()
.then((columns) => registerCountryRankFilters(columns))
.catch((e) => console.debug("Failed to load country rank columns:", e))
.finally(() => app.mount("#app"));
});
96 changes: 96 additions & 0 deletions server/frontend/tests/country_rank_filter.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import {
BUCKET_FILTERS,
registerCountryRankFilters,
} from "../src/bucket_filter_config.js";

describe("CountryRankBucketFilter", () => {
beforeEach(() => {
registerCountryRankFilters(["poland_rank"]);
});

afterEach(() => {
delete BUCKET_FILTERS["poland_rank"];
});

test("toQuery with <= produces lte lookup", () => {
const filter = BUCKET_FILTERS["poland_rank"];
expect(filter.toQuery(1000, "<=")).toEqual({
op: "AND",
country_ranks__country: "poland_rank",
country_ranks__rank__lte: 1000,
});
});

test("toQuery with < produces lt lookup", () => {
const filter = BUCKET_FILTERS["poland_rank"];
expect(filter.toQuery(1000, "<")).toEqual({
op: "AND",
country_ranks__country: "poland_rank",
country_ranks__rank__lt: 1000,
});
});

test("toQuery with >= produces gte lookup", () => {
const filter = BUCKET_FILTERS["poland_rank"];
expect(filter.toQuery(500, ">=")).toEqual({
op: "AND",
country_ranks__country: "poland_rank",
country_ranks__rank__gte: 500,
});
});

test("toQuery with > produces gt lookup", () => {
const filter = BUCKET_FILTERS["poland_rank"];
expect(filter.toQuery(500, ">")).toEqual({
op: "AND",
country_ranks__country: "poland_rank",
country_ranks__rank__gt: 500,
});
});

test("toQuery with no op defaults to lte", () => {
const filter = BUCKET_FILTERS["poland_rank"];
expect(filter.toQuery(1000)).toEqual({
op: "AND",
country_ranks__country: "poland_rank",
country_ranks__rank__lte: 1000,
});
});

test("toQuery throws for a non-numeric value", () => {
const filter = BUCKET_FILTERS["poland_rank"];
expect(() => filter.toQuery("notanumber", "<=")).toThrow(
/Invalid rank value: notanumber/,
);
});

test("supportsComparison is true", () => {
expect(BUCKET_FILTERS["poland_rank"].supportsComparison).toBe(true);
});
});

describe("registerCountryRankFilters", () => {
afterEach(() => {
delete BUCKET_FILTERS["germany_rank"];
delete BUCKET_FILTERS["global_rank"];
});

test("adds filter keys for each supplied column", () => {
registerCountryRankFilters(["germany_rank", "global_rank"]);
expect(BUCKET_FILTERS["germany_rank"]).toBeDefined();
expect(BUCKET_FILTERS["global_rank"]).toBeDefined();
});

test("registered filter has the correct key and countryColumn", () => {
registerCountryRankFilters(["germany_rank"]);
const filter = BUCKET_FILTERS["germany_rank"];
expect(filter.key).toBe("germany_rank");
expect(filter.countryColumn).toBe("germany_rank");
});

test("calling with an empty array adds nothing", () => {
const keysBefore = Object.keys(BUCKET_FILTERS).length;
registerCountryRankFilters([]);
expect(Object.keys(BUCKET_FILTERS).length).toBe(keysBefore);
});
});
159 changes: 159 additions & 0 deletions server/reportmanager/management/commands/import_country_ranks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from logging import getLogger

from django.conf import settings
from django.utils import timezone
from google.cloud import bigquery
from google.oauth2 import service_account

from django.core.management import BaseCommand

from reportmanager.models import Bucket, BucketCountryRank
from reportmanager.utils import normalize_domain

LOG = getLogger("reportmanager.import_country_ranks")


class Command(BaseCommand):
help = "Import CrUX country rank data from BigQuery into BucketCountryRank"

def add_arguments(self, parser):
parser.add_argument(
"--bq-project",
default=None,
help="Override the BigQuery project (default: settings.BIGQUERY_PROJECT)",
)
parser.add_argument(
"--domains",
nargs="+",
default=None,
help="Limit import to these specific domains. If omitted, imports for all bucket domains.",
)

def handle(self, bq_project: str | None, domains: list[str] | None, **options: object) -> None:
project = bq_project or settings.BIGQUERY_PROJECT

params: dict = {"project": project}
if svc_acct := getattr(settings, "BIGQUERY_SERVICE_ACCOUNT", None):
params["credentials"] = (
service_account.Credentials.from_service_account_info(
svc_acct,
scopes=[
"https://www.googleapis.com/auth/bigquery",
"https://www.googleapis.com/auth/drive",
],
)
)

client = bigquery.Client(**params)
crux_dataset_fn = f"`{project}.webcompat_knowledge_base.CRUX_DATASET`"

partial = domains is not None
if partial:
# Normalize incoming domains to match the domain_normalized field
domains = list({nd for d in domains if (nd := normalize_domain(d))})
# Only import for buckets that don't already have rank data.
already_ranked = set(
BucketCountryRank.objects.filter(
bucket__domain_normalized__in=domains
).values_list("bucket__domain_normalized", flat=True)
)
buckets = list(
Bucket.objects.filter(domain_normalized__in=domains)
.exclude(domain_normalized__in=already_ranked)
.only("id", "domain_normalized")
)
domains = [b.domain_normalized for b in buckets]
else:
# Default: import for all bucket domains.
buckets = list(
Bucket.objects.exclude(domain_normalized__isnull=True)
.exclude(domain_normalized="")
.only("id", "domain_normalized")
)
domains = [b.domain_normalized for b in buckets]

if not domains:
LOG.info("No buckets with a normalized domain — nothing to import")
return

LOG.info("Querying ranks for %d bucket domains", len(domains))

query = (
f"SELECT * EXCEPT (yyyymm) "
f"FROM `{project}.crux_imported.host_min_ranks` "
f"WHERE yyyymm = {crux_dataset_fn}() "
f"AND host IN UNNEST(@domains)"
)
job_config = bigquery.QueryJobConfig(
query_parameters=[
bigquery.ArrayQueryParameter("domains", "STRING", domains)
]
)
rows = client.query(query, job_config=job_config).result()

# Discover rank columns from result schema (anything ending in _rank)
rank_cols = [f.name for f in rows.schema if f.name.endswith("_rank")]

if not rank_cols:
LOG.warning("No rank columns found in host_min_ranks result schema")
return

LOG.info("Found %d rank columns: %s", len(rank_cols), rank_cols)

# Build host -> {country_col: rank} dict (skip NULL ranks)
host_ranks: dict[str, dict[str, int]] = {}
for row in rows:
host = row["host"]
ranks = {col: row[col] for col in rank_cols if row[col] is not None}
if ranks:
host_ranks[host] = ranks

LOG.info("Loaded rank data for %d hosts", len(host_ranks))

now = timezone.now()
to_upsert: list[BucketCountryRank] = []

for bucket in buckets:
ranks = host_ranks.get(bucket.domain_normalized)
if ranks:
for country, rank in ranks.items():
to_upsert.append(
BucketCountryRank(
bucket_id=bucket.id,
country=country,
rank=rank,
updated_at=now,
)
)

# Upsert in batches of 1000
upserted_count = 0
batch_size = 1000
for i in range(0, len(to_upsert), batch_size):
batch = to_upsert[i : i + batch_size]
BucketCountryRank.objects.bulk_create(
batch,
update_conflicts=True,
update_fields=["rank", "updated_at"],
unique_fields=["bucket", "country"],
)
upserted_count += len(batch)

# Only clean up stale rows on a full import. The partial (--domains)
# path only fills in missing data, so there's nothing to clean up.
if not partial:
deleted_count, _ = BucketCountryRank.objects.exclude(updated_at=now).delete()
else:
deleted_count = 0

LOG.info(
"import_country_ranks complete: %d rank columns, %d buckets processed, "
"%d rows upserted, %d stale rows deleted",
len(rank_cols),
len(buckets),
upserted_count,
deleted_count,
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from itertools import batched
from logging import getLogger

from django.core.management import BaseCommand
from django.core.management import BaseCommand, call_command
from django.utils import timezone

from reportmanager.clustering.ClusterBucketManager import (
Expand Down Expand Up @@ -246,6 +246,9 @@ def run_triage(job: ClusteringJob) -> None:
f"Triage completed successfully. Created {buckets_created} cluster buckets and {fallback_buckets} domain buckets." # noqa
)

if domains:
call_command("import_country_ranks", domains=list(domains))

except Exception as e:
complete_job(job, success=False, error=str(e))
raise
Expand Down
Loading