diff --git a/jobs/webcompat-kb/data/sql/interventions/meta.toml b/jobs/webcompat-kb/data/sql/interventions/meta.toml new file mode 100644 index 00000000..86b05d6a --- /dev/null +++ b/jobs/webcompat-kb/data/sql/interventions/meta.toml @@ -0,0 +1,2 @@ +name = "intervnetions" +description="Data about shipping interventions" \ No newline at end of file diff --git a/jobs/webcompat-kb/data/sql/interventions/tables/interventions/meta.toml b/jobs/webcompat-kb/data/sql/interventions/tables/interventions/meta.toml new file mode 100644 index 00000000..52759608 --- /dev/null +++ b/jobs/webcompat-kb/data/sql/interventions/tables/interventions/meta.toml @@ -0,0 +1,2 @@ +name = "interventions" +etl = ["interventions"] \ No newline at end of file diff --git a/jobs/webcompat-kb/data/sql/interventions/tables/interventions/table.toml b/jobs/webcompat-kb/data/sql/interventions/tables/interventions/table.toml new file mode 100644 index 00000000..fdc81e5c --- /dev/null +++ b/jobs/webcompat-kb/data/sql/interventions/tables/interventions/table.toml @@ -0,0 +1,59 @@ +[bug] +type = "INTEGER" +mode = "REQUIRED" + +[issue] +type = "STRING" +mode = "REQUIRED" + +[label] +type = "STRING" +mode = "REQUIRED" + +[css] +type = "RECORD" +mode = "OPTIONAL" + +[css.id] +type = "STRING" +mode = "REQUIRED" + +[css.src] +type = "STRING" +mode = "REQUIRED" + +[max_version] +type = "FLOAT" +mode = "OPTIONAL" + +[min_version] +type = "FLOAT" +mode = "OPTIONAL" + +[not_channels] +type = "STRING" +mode = "REPEATED" + +[not_platforms] +type = "STRING" +mode = "REPEATED" + +[only_channels] +type = "STRING" +mode = "REPEATED" + +[platforms] +type = "STRING" +mode = "REPEATED" + +[ua_string] +type = "RECORD" +mode = "OPTIONAL" + +[ua_string.change] +type = "STRING" +mode = "REQUIRED" + +[ua_string.version] +type = "STRING" +mode = "OPTIONAL" diff --git a/jobs/webcompat-kb/webcompat_kb/github.py b/jobs/webcompat-kb/webcompat_kb/github.py new file mode 100644 index 00000000..3ca35588 --- /dev/null +++ b/jobs/webcompat-kb/webcompat_kb/github.py @@ -0,0 +1,120 @@ +from datetime import datetime +from typing import Iterable, Mapping, Optional, Sequence +from urllib.parse import urlencode + +from pydantic import BaseModel +from .httphelpers import Json, get_json, get_paginated_json + + +class GitHubUser(BaseModel): + name: Optional[str] = None + email: Optional[str] = None + login: str + id: int + + +class GitHubLabel(BaseModel): + id: Optional[int] = None + url: Optional[str] = None + name: Optional[str] = None + description: Optional[str] = None + color: Optional[str] = None + default: Optional[bool] = None + + +class GitHubIssue(BaseModel): + assignee: Optional[GitHubUser] = None + body: str + closed_at: Optional[datetime] = None + comments: int + comments_url: str + draft: Optional[bool] = None + events_url: str + html_url: str + id: int + labels: list[str | GitHubLabel] + labels_url: str + number: int + repository_url: str + state: str + title: str + url: str + user: Optional[GitHubUser] = None + created_at: datetime + updated_at: datetime + + +class GitHubComment(BaseModel): + id: int + body: str + user: GitHubUser + created_at: datetime + updated_at: datetime + + +class GitHubContentTreeLinks(BaseModel): + self: str + git: str + html: str + + +class GitHubContentTree(BaseModel): + name: str + path: str + sha: str + size: int + url: str + html_url: str + git_url: str + download_url: str + type: str + _links: GitHubContentTreeLinks + + +class GitHub: + def __init__(self, token: Optional[str]): + self.token = token + + def headers(self) -> Mapping[str, str]: + headers = {"X-GitHub-Api-Version": "2022-11-28"} + if self.token is not None: + headers["Authorization"] = f"Bearer {self.token}" + return headers + + def issues( + self, + repo: str, + labels: Iterable[str], + last_updated: Optional[datetime], + state: Optional[str] = "all", + ) -> Sequence[GitHubIssue]: + query = {"state": state} + if labels is not None: + query["labels"] = ",".join(labels) + if last_updated is not None: + query["since"] = last_updated.isoformat() + + url = f"https://api.github.com/repos/{repo}/issues?{urlencode(query)}" + return [ + GitHubIssue.model_validate(item) + for item in get_paginated_json(url, self.headers()) + ] + + def issue_comments( + self, issue: GitHubIssue, all_pages: bool = False + ) -> Sequence[GitHubComment]: + if not all_pages: + comments = get_json(issue.comments_url, self.headers()) + assert isinstance(comments, list) + else: + comments = get_paginated_json(issue.comments_url, self.headers()) + return [GitHubComment.model_validate(item) for item in comments] + + def repository_contents(self, repo: str, path: str) -> Sequence[GitHubContentTree]: + if path[0] != "/": + path = f"/{path}" + url = f"https://api.github.com/repos/{repo}/contents{path}" + return [ + GitHubContentTree.model_validate(item) + for item in get_paginated_json(url, self.headers()) + ] diff --git a/jobs/webcompat-kb/webcompat_kb/interop.py b/jobs/webcompat-kb/webcompat_kb/interop.py index daa38f47..6f78c36e 100644 --- a/jobs/webcompat-kb/webcompat_kb/interop.py +++ b/jobs/webcompat-kb/webcompat_kb/interop.py @@ -3,64 +3,18 @@ import re from dataclasses import dataclass from datetime import datetime -from typing import Iterable, Mapping, MutableMapping, Optional, Sequence -from urllib.parse import urlencode +from typing import Mapping, MutableMapping, Optional from google.api_core.exceptions import NotFound from pydantic import BaseModel from .base import Context, EtlJob, dataset_arg from .bqhelpers import BigQuery, TableSchema -from .httphelpers import Json, get_json, get_paginated_json +from .github import GitHub, GitHubIssue +from .httphelpers import Json from .projectdata import Project -class GitHubUser(BaseModel): - name: Optional[str] = None - email: Optional[str] = None - login: str - id: int - - -class GitHubLabel(BaseModel): - id: Optional[int] = None - url: Optional[str] = None - name: Optional[str] = None - description: Optional[str] = None - color: Optional[str] = None - default: Optional[bool] = None - - -class GitHubIssue(BaseModel): - assignee: Optional[GitHubUser] = None - body: str - closed_at: Optional[datetime] = None - comments: int - comments_url: str - draft: Optional[bool] = None - events_url: str - html_url: str - id: int - labels: list[str | GitHubLabel] - labels_url: str - number: int - repository_url: str - state: str - title: str - url: str - user: Optional[GitHubUser] = None - created_at: datetime - updated_at: datetime - - -class GitHubComment(BaseModel): - id: int - body: str - user: GitHubUser - created_at: datetime - updated_at: datetime - - class InteropRow(BaseModel): year: int issue: int @@ -87,46 +41,6 @@ class InteropYear: interop_years = [InteropYear(2026, datetime(2025, 9, 4))] -class GitHub: - def __init__(self, token: Optional[str]): - self.token = token - - def headers(self) -> Mapping[str, str]: - headers = {"X-GitHub-Api-Version": "2022-11-28"} - if self.token is not None: - headers["Authorization"] = f"Bearer {self.token}" - return headers - - def issues( - self, - repo: str, - labels: Iterable[str], - last_updated: Optional[datetime], - state: Optional[str] = "all", - ) -> Sequence[GitHubIssue]: - query = {"state": state} - if labels is not None: - query["labels"] = ",".join(labels) - if last_updated is not None: - query["since"] = last_updated.isoformat() - - url = f"https://api.github.com/repos/{repo}/issues?{urlencode(query)}" - return [ - GitHubIssue.model_validate(item) - for item in get_paginated_json(url, self.headers()) - ] - - def issue_comments( - self, issue: GitHubIssue, all_pages: bool = False - ) -> Sequence[GitHubComment]: - if not all_pages: - comments = get_json(issue.comments_url, self.headers()) - assert isinstance(comments, list) - else: - comments = get_paginated_json(issue.comments_url, self.headers()) - return [GitHubComment.model_validate(item) for item in comments] - - def get_last_import( client: BigQuery, import_runs_table: TableSchema, diff --git a/jobs/webcompat-kb/webcompat_kb/interventions.py b/jobs/webcompat-kb/webcompat_kb/interventions.py new file mode 100644 index 00000000..ba503288 --- /dev/null +++ b/jobs/webcompat-kb/webcompat_kb/interventions.py @@ -0,0 +1,303 @@ +from dataclasses import dataclass, asdict +import argparse +from typing import Optional, Sequence, Self + +from pydantic import BaseModel, Field, model_validator + +from .base import Context, EtlJob +from .bqhelpers import BigQuery +from .github import GitHub +from .httphelpers import get_json +from .interop import repo_arg +from .projectdata import Project + + +class AlterHeader(BaseModel): + headers: list[str] + replacement: str + fallback: Optional[str] = None + types: Optional[list[str]] = None + replace: Optional[str] = None + urls: Optional[list[str]] = None + + +class MatchesOrBlocksEntry(BaseModel): + types: list[str] + url: str + + +MatchesOrBlocks = list[str] | list[MatchesOrBlocksEntry] + + +class InterventionBug(BaseModel): + issue: str + matches: Optional[MatchesOrBlocks] = None + exclude_matches: Optional[MatchesOrBlocks] = None + blocks: Optional[MatchesOrBlocks] = None + exclude_blocks: Optional[MatchesOrBlocks] = None + + @model_validator(mode="after") + def check_required(self) -> "InterventionBug": + if not ( + self.matches or self.exclude_matches or self.blocks or self.exclude_blocks + ): + raise ValueError( + "at least one of matches, exclude_matches, blocks, " + "exclude_blocks is required" + ) + return self + + +class ContentScripts(BaseModel): + all_frames: Optional[bool] = None + isolated: Optional[bool] = None + match_origin_as_fallback: Optional[bool] = None + css: Optional[list[str]] = None + js: Optional[list[str]] = None + + @model_validator(mode="after") + def check_required(self) -> "ContentScripts": + if not (self.css or self.js): + raise ValueError("at least one of css, js is required") + return self + + +class CssIntervention(BaseModel): + which: list[str] + all_frames: Optional[bool] = None + match_origin_as_fallback: Optional[bool] = None + + +class HideAlertsConfig(BaseModel): + alerts: str | list[str] + all_frames: Optional[bool] = None + match_origin_as_fallback: Optional[bool] = None + + +HideAlerts = str | list[str] | HideAlertsConfig + + +class HiddenMessage(BaseModel): + message: str + container: str + click_adjacent: Optional[str] = None + + +class HideMessagesConfig(BaseModel): + messages: list[HiddenMessage] + all_frames: Optional[bool] = None + match_origin_as_fallback: Optional[bool] = None + + +HideMessages = HiddenMessage | list[HiddenMessage] | HideMessagesConfig + + +class ModifyMetaViewportChangeSpecObject(BaseModel): + value: Optional[str] + only_if_equals: Optional[str | list[str]] = None + only_if_not_equals: Optional[str | list[str]] = None + + +ModifyMetaViewportChangeSpec = str | ModifyMetaViewportChangeSpecObject | None + + +class ModifyMetaViewportChanges(BaseModel): + height: Optional[ModifyMetaViewportChangeSpec] = None + initial_scale: Optional[ModifyMetaViewportChangeSpec] = Field( + default=None, alias="initial-scale" + ) + interactive_widget: Optional[ModifyMetaViewportChangeSpec] = Field( + default=None, alias="interactive-widget" + ) + maximum_scale: Optional[ModifyMetaViewportChangeSpec] = Field( + default=None, alias="maximum-scale" + ) + minimum_scale: Optional[ModifyMetaViewportChangeSpec] = Field( + default=None, alias="minimum-scale" + ) + user_scalable: Optional[ModifyMetaViewportChangeSpec] = Field( + default=None, alias="user-scalable" + ) + viewport_fit: Optional[ModifyMetaViewportChangeSpec] = Field( + default=None, alias="viewport-fit" + ) + width: Optional[ModifyMetaViewportChangeSpec] = None + + +class ModifyMetaViewportConfig(BaseModel): + modify: ModifyMetaViewportChanges + all_frames: Optional[bool] = None + match_origin_as_fallback: Optional[bool] = None + + +ModifyMetaViewport = ModifyMetaViewportChanges | ModifyMetaViewportConfig + + +class ReplaceStringInRequestEntry(BaseModel): + find: str + replace: str + urls: list[str] + types: Optional[list[str]] = None + + +class RunScriptBeforeRequest(BaseModel): + message: str + script: str + urls: list[str] + + +class UAString(BaseModel): + change: str + version: Optional[str] + + +class InterventionData(BaseModel): + alter_request_headers: Optional[list[AlterHeader]] = None + alter_response_headers: Optional[list[AlterHeader]] = None + content_scripts: Optional[ContentScripts] = None + css: Optional[list[str] | CssIntervention] = None + hide_alerts: Optional[HideAlerts] = None + hide_messages: Optional[HideMessages] = None + modify_meta_viewport: Optional[ModifyMetaViewport] = None + max_version: Optional[float] = None + min_version: Optional[float] = None + not_channels: Optional[list[str]] = None + not_platforms: Optional[list[str]] = None + only_channels: Optional[list[str]] = None + platforms: Optional[list[str]] = None + pref_check: Optional[dict[str, bool]] = None + replace_string_in_request: Optional[list[ReplaceStringInRequestEntry]] = None + run_script_before_request: Optional[RunScriptBeforeRequest] = None + skip_if: Optional[list[str]] = None + ua_string: Optional[list[str | UAString]] = None + + @model_validator(mode="after") + def check_platforms(self) -> "InterventionData": + if self.platforms is None and self.not_platforms is None: + raise ValueError("at least one of platforms, not_platforms is required") + return self + + +class Intervention(BaseModel): + bugs: dict[str, InterventionBug] + interventions: list[InterventionData] + label: str + css: Optional[dict[str, str]] = None + + +@dataclass +class CSSStruct: + id: str + src: str + + +@dataclass +class UAStringStruct: + change: str + version: Optional[str] = None + + +@dataclass +class MatchTypeStruct: + pass + + +@dataclass +class InterventionRow: + bug: int + issue: str + + label: str + css: Optional[list[CSSStruct]] = None + + # match_type: MatchTypeStruct + max_version: Optional[float] = None + min_version: Optional[float] = None + not_channels: Optional[list[str]] = None + not_platforms: Optional[list[str]] = None + only_channels: Optional[list[str]] = None + platforms: Optional[list[str]] = None + + ua_string: Optional[list[UAStringStruct]] = None + + @classmethod + def from_intervention(cls, src: Intervention) -> list[Self]: + rv = [] + for bug_id, bug in src.bugs.items(): + for intervention in src.interventions: + rv.append( + cls( + bug=int(bug_id), + issue=bug.issue, + label=src.label, + css=[CSSStruct(id, src) for id, src in src.css.items()] + if src.css is not None + else None, + # match_type=MatchTypeStruct(), + max_version=intervention.max_version, + min_version=intervention.min_version, + not_platforms=intervention.not_platforms, + only_channels=intervention.only_channels, + platforms=intervention.platforms, + ua_string=[ + UAStringStruct(item.change, item.version) + if isinstance(item, UAString) + else UAStringStruct(item) + for item in intervention.ua_string + ] + if intervention.ua_string is not None + else None, + ) + ) + return rv + + +def get_all_interventions(gh_client: GitHub, repo: str) -> Sequence[Intervention]: + rv = [] + for item in gh_client.repository_contents( + repo, "/browser/extensions/webcompat/data/interventions" + ): + if item.type == "file": + rv.append(Intervention.model_validate(get_json(item.download_url))) + return rv + + +def update_interventions( + project: Project, client: BigQuery, gh_client: GitHub, repo: str +) -> None: + rows = [] + for intervention in get_all_interventions(gh_client, repo): + rows.extend(InterventionRow.from_intervention(intervention)) + + table = project["interventions"]["interventions"].table() + client.write_table( + table, table.schema, [asdict(row) for row in rows], overwrite=True + ) + + +class InterventionsJob(EtlJob): + name = "interventions" + + @classmethod + def add_arguments(cls, parser: argparse.ArgumentParser) -> None: + group = parser.add_argument_group( + title="Intervensions", description="Interventions arguments" + ) + group.add_argument( + "--firefox-repo", + type=repo_arg, + default="mozilla-firefox/firefox", + help="Firefox repository in the format org/repo", + ) + + def default_dataset(self, context: Context) -> str: + return "interventions" + + def main(self, context: Context) -> None: + gh_client = GitHub(context.args.github_token) + update_interventions( + context.project, + context.bq_client, + gh_client, + context.args.firefox_repo, + ) diff --git a/jobs/webcompat-kb/webcompat_kb/main.py b/jobs/webcompat-kb/webcompat_kb/main.py index bc43c34c..7a079bdb 100644 --- a/jobs/webcompat-kb/webcompat_kb/main.py +++ b/jobs/webcompat-kb/webcompat_kb/main.py @@ -16,6 +16,7 @@ chrome_use_counters, # noqa: F401 interop, # noqa: F401 user_reports_aggregate, # noqa: F401 + interventions, # noqa: F401 ) from .base import ( ALL_JOBS,