From 13587e17c23bfcabac5868eb81fb3ff3e3b2c94e Mon Sep 17 00:00:00 2001 From: Tessa Heidkamp Date: Thu, 9 Apr 2026 13:01:55 +0200 Subject: [PATCH] Add validate-logins tool Co-authored-by: Johannes Salas Schmidt --- CHANGELOG.md | 1 + Cargo.lock | 9 ++++ Cargo.toml | 1 + components/logins/src/login.rs | 4 +- tools/validate-logins/Cargo.toml | 12 +++++ tools/validate-logins/README.md | 16 ++++++ tools/validate-logins/src/main.rs | 87 +++++++++++++++++++++++++++++++ 7 files changed, 128 insertions(+), 2 deletions(-) create mode 100644 tools/validate-logins/Cargo.toml create mode 100644 tools/validate-logins/README.md create mode 100644 tools/validate-logins/src/main.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 4b745700fd..f45ed526f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ ### Logins - New `allow_empty_passwords` feature flag to allow storing logins with empty passwords. This feature is intended to be enabled on desktop during the migration. - Add `ignore_form_action_origin_validation_errors` feature flag that allows logins with non-URL `form_action_origin` values (e.g. "email", "UserCode") to be imported without error. URL normalization for valid URLs is still applied. +- Add logins validations cli tool ### Merino Client - Added a client for the merino suggest endpoint diff --git a/Cargo.lock b/Cargo.lock index e78b07aaa6..b701975e64 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5103,6 +5103,15 @@ dependencies = [ "serde", ] +[[package]] +name = "validate-logins" +version = "0.1.0" +dependencies = [ + "logins", + "serde", + "serde_json", +] + [[package]] name = "vcpkg" version = "0.2.15" diff --git a/Cargo.toml b/Cargo.toml index 9f753f9845..3da22bbdc6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,7 @@ members = [ "tools/generate-rust-dashboards", "tools/start-bindings", "tools/uniffi-bindgen-library-mode", + "tools/validate-logins", "automation/swift-components-docs", "examples/*/", diff --git a/components/logins/src/login.rs b/components/logins/src/login.rs index 6ce20056f4..faa7dae75a 100644 --- a/components/logins/src/login.rs +++ b/components/logins/src/login.rs @@ -285,7 +285,7 @@ use sync_guid::Guid; use url::Url; // LoginEntry fields that are stored in cleartext -#[derive(Debug, Clone, Hash, PartialEq, Eq, Default)] +#[derive(Debug, Clone, Hash, PartialEq, Eq, Default, Deserialize)] pub struct LoginFields { pub origin: String, pub form_action_origin: Option, @@ -368,7 +368,7 @@ pub enum BulkResultEntry { } /// A login handed over to the store API; ie a login not yet persisted -#[derive(Debug, Clone, Hash, PartialEq, Eq, Default)] +#[derive(Debug, Clone, Hash, PartialEq, Eq, Default, Deserialize)] pub struct LoginEntry { // login fields pub origin: String, diff --git a/tools/validate-logins/Cargo.toml b/tools/validate-logins/Cargo.toml new file mode 100644 index 0000000000..bd7d1f28ac --- /dev/null +++ b/tools/validate-logins/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "validate-logins" +version = "0.1.0" +authors = ["sync-team@mozilla.com"] +license = "MPL-2.0" +edition = "2021" +publish = false + +[dependencies] +logins = { path = "../../components/logins", features = ["ignore_form_action_origin_validation_errors"] } +serde = "1" +serde_json = "1" diff --git a/tools/validate-logins/README.md b/tools/validate-logins/README.md new file mode 100644 index 0000000000..976d5d6882 --- /dev/null +++ b/tools/validate-logins/README.md @@ -0,0 +1,16 @@ +# Validate Logins + +Scratch tooling for origin validation used to investigate how login validation +behaves against real-world data from telemetry. + +## Input format + +The tool reads newline-delimited JSON (NDJSON) from stdin. Each line is one login entry: + +```json +{"origin":"https://example.com","form_action_origin":"example.com","password":"p","username":"u","username_field":"u","password_field":"p"} +``` + +Real-world origins vary widely — bare hostnames (`ftp.example.com`), missing schemes +(`example.com`), FTP/SSH entries from legacy extensions (eg FireFTP), and +`chrome://` origins are all common. The validator checks and attempts to fix each one. diff --git a/tools/validate-logins/src/main.rs b/tools/validate-logins/src/main.rs new file mode 100644 index 0000000000..0140c6c660 --- /dev/null +++ b/tools/validate-logins/src/main.rs @@ -0,0 +1,87 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! Reads nd-json (one LoginEntry JSON object per line) from stdin and reports +//! validation/fixup results. Not meant to land; scratch tooling only. +//! +//! Usage: +//! cargo run --bin validate_logins < entries.ndjson +//! cat entries.ndjson | cargo run --bin validate_logins + +use logins::{LoginEntry, ValidateAndFixup}; +use std::collections::HashMap; +use std::io::{self, BufRead}; + +fn main() { + let stdin = io::stdin(); + let mut count = 0usize; + let mut ok = 0usize; + let mut fixed = 0usize; + let mut invalid_counts: HashMap = HashMap::new(); + + for (i, line) in stdin.lock().lines().enumerate() { + let line = match line { + Ok(l) => l, + Err(e) => { + eprintln!("[{i}] io error: {e}"); + continue; + } + }; + let line = line.trim(); + if line.is_empty() { + continue; + } + + let entry: LoginEntry = match serde_json::from_str(line) { + Ok(e) => e, + Err(e) => { + eprintln!("[{i}] parse error: {e}"); + continue; + } + }; + + match entry.maybe_fixup() { + Ok(None) => { + println!("[{i}] ok"); + ok += 1; + } + Ok(Some(fixed_entry)) => { + println!("[{i}] fixed: {fixed_entry:?}"); + fixed += 1; + } + Err(e) => { + println!("[{i}] invalid: {e}"); + *invalid_counts.entry(e.to_string()).or_default() += 1; + } + } + + count += 1; + } + + let invalid_total: usize = invalid_counts.values().sum(); + let valid_total = ok + fixed; + + eprintln!("\n--- summary ({count} entries) ---"); + eprintln!(" ok: {ok} ({:.2}%)", 100.0 * ok as f64 / count as f64); + eprintln!( + " fixed: {fixed} ({:.2}%)", + 100.0 * fixed as f64 / count as f64 + ); + eprintln!( + " valid: {valid_total} ({:.2}%)", + 100.0 * valid_total as f64 / count as f64 + ); + eprintln!( + " invalid: {invalid_total} ({:.2}%)", + 100.0 * invalid_total as f64 / count as f64 + ); + if !invalid_counts.is_empty() { + eprintln!("\n breakdown:"); + let mut breakdown: Vec<_> = invalid_counts.iter().collect(); + breakdown.sort_by(|a, b| b.1.cmp(a.1)); + for (msg, n) in breakdown { + eprintln!(" {n:>6} {msg}"); + } + } +}