From 724a1264e22763c24c145d4dcd8805563ba0fa1b Mon Sep 17 00:00:00 2001 From: Alon Kochba Date: Mon, 23 Mar 2026 10:43:29 +0200 Subject: [PATCH 01/14] feat: add /v1/geo-breakdown endpoint for geographic CWV breakdown (#94) * feat: add /v1/geo-breakdown endpoint for geographic CWV breakdown Adds a new controller and route that returns core_web_vitals data for all geographies for a given technology. Unlike /cwv, this endpoint omits the geo filter so callers can build a geographic breakdown chart without issuing one request per country. * refactor: merge geo-breakdown into reportController factory Add crossGeo option to createReportController; delete standalone geoBreakdownController.js. Endpoint now returns a single-month snapshot (latest by default, or the month specified by the end param). --------- Co-authored-by: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> --- src/controllers/reportController.js | 42 +++++++++++++++++------------ src/index.js | 7 +++++ 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/controllers/reportController.js b/src/controllers/reportController.js index 3fd4b53..248cd44 100644 --- a/src/controllers/reportController.js +++ b/src/controllers/reportController.js @@ -38,9 +38,11 @@ const REPORT_CONFIGS = { /** * Generic report data controller factory - * Creates controllers for adoption, pageWeight, lighthouse, and cwv data + * Creates controllers for adoption, pageWeight, lighthouse, and cwv data. + * Pass { crossGeo: true } to get a cross-geography snapshot (omits geo filter, + * includes geo in projection, returns a single month of data). */ -const createReportController = (reportType) => { +const createReportController = (reportType, { crossGeo = false } = {}) => { const config = REPORT_CONFIGS[reportType]; if (!config) { throw new Error(`Unknown report type: ${reportType}`); @@ -79,20 +81,10 @@ const createReportController = (reportType) => { // Validate and process technology array const techArray = validateArrayParameter(technologyParam, 'technology'); - // Handle 'latest' date substitution - let startDate = params.start; - if (startDate === 'latest') { - startDate = await getLatestDate(firestore, config.table); - } - // Build Firestore query let query = firestore.collection(config.table); - // Apply required filters - query = query.where('geo', '==', geoParam); query = query.where('rank', '==', rankParam); - - // Apply technology filter with batch processing query = query.where('technology', 'in', techArray); // Apply version filter with special handling for 'ALL' case @@ -102,12 +94,27 @@ const createReportController = (reportType) => { //query = query.where('version', '==', 'ALL'); } - // Apply date filters - if (startDate) query = query.where('date', '>=', startDate); - if (params.end) query = query.where('date', '<=', params.end); + if (crossGeo) { + // Cross-geo: single-month snapshot, all geographies included. + // Use 'end' param if provided, otherwise default to latest available date. + const snapshotDate = params.end || await getLatestDate(firestore, config.table); + query = query.where('date', '==', snapshotDate); + query = query.select('date', 'technology', 'geo', config.dataField); + } else { + // Normal time-series: filter by geo, apply date range, no geo in projection. + query = query.where('geo', '==', geoParam); - // Apply field projection to optimize query - query = query.select('date', 'technology', config.dataField); + // Handle 'latest' date substitution + let startDate = params.start; + if (startDate === 'latest') { + startDate = await getLatestDate(firestore, config.table); + } + + if (startDate) query = query.where('date', '>=', startDate); + if (params.end) query = query.where('date', '<=', params.end); + + query = query.select('date', 'technology', config.dataField); + } // Execute query const snapshot = await query.get(); @@ -132,5 +139,6 @@ export const listAdoptionData = createReportController('adoption'); export const listCWVTechData = createReportController('cwv'); export const listLighthouseData = createReportController('lighthouse'); export const listPageWeightData = createReportController('pageWeight'); +export const listGeoBreakdownData = createReportController('cwv', { crossGeo: true }); diff --git a/src/index.js b/src/index.js index a5831b9..cfe2b32 100644 --- a/src/index.js +++ b/src/index.js @@ -13,6 +13,7 @@ const controllers = { ranks: null, geos: null, versions: null, + geoBreakdown: null, static: null }; @@ -42,6 +43,9 @@ const getController = async (name) => { case 'versions': controllers[name] = await import('./controllers/versionsController.js'); break; + case 'geoBreakdown': + controllers[name] = await import('./controllers/reportController.js'); + break; case 'static': controllers[name] = await import('./controllers/cdnController.js'); break; @@ -140,6 +144,9 @@ const handleRequest = async (req, res) => { } else if (pathname === '/v1/versions' && req.method === 'GET') { const { listVersions } = await getController('versions'); await listVersions(req, res); + } else if (pathname === '/v1/geo-breakdown' && req.method === 'GET') { + const { listGeoBreakdownData } = await getController('geoBreakdown'); + await listGeoBreakdownData(req, res); } else if (pathname.startsWith('/v1/static/') && req.method === 'GET') { // GCS proxy endpoint for reports files const filePath = decodeURIComponent(pathname.replace('/v1/static/', '')); From 6351949e74670945fbdda1dd6cfd8353044ed2a9 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Mon, 23 Mar 2026 23:17:15 +0100 Subject: [PATCH 02/14] test: add tests for /v1/geo-breakdown --- src/tests/routes.test.js | 43 ++++++++++++++++++++++++++++++++++++++++ test-api.sh | 23 +++++++++++++++++++-- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/tests/routes.test.js b/src/tests/routes.test.js index 6a627f6..8a93593 100644 --- a/src/tests/routes.test.js +++ b/src/tests/routes.test.js @@ -381,6 +381,49 @@ describe('API Routes', () => { }); }); + describe('GET /v1/geo-breakdown', () => { + it('should return geo breakdown data with default parameters', async () => { + const res = await request(app).get('/v1/geo-breakdown'); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should return geo breakdown data for a specific technology', async () => { + const res = await request(app).get('/v1/geo-breakdown?technology=WordPress'); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should accept an end date parameter', async () => { + const res = await request(app).get('/v1/geo-breakdown?technology=WordPress&end=2024-01-01'); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should accept a rank parameter', async () => { + const res = await request(app).get('/v1/geo-breakdown?technology=WordPress&rank=Top%201M'); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should handle empty technology parameter (defaults to ALL)', async () => { + const res = await request(app).get('/v1/geo-breakdown?technology='); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should handle CORS preflight requests', async () => { + const res = await request(app) + .options('/v1/geo-breakdown') + .set('Origin', 'http://example.com') + .set('Access-Control-Request-Method', 'GET') + .set('Access-Control-Request-Headers', 'Content-Type'); + + expect(res.statusCode).toEqual(204); + expect(res.headers['access-control-allow-origin']).toEqual('*'); + }); + }); + describe('Error Handling', () => { it('should return 404 for unknown endpoints', async () => { const res = await request(app).get('/v1/unknown-endpoint'); diff --git a/test-api.sh b/test-api.sh index ead560f..7809094 100755 --- a/test-api.sh +++ b/test-api.sh @@ -34,7 +34,7 @@ test_filter() { echo "Testing filter: ${description}" echo "URL: ${url}" - + response=$(curl -s -w "\n%{http_code}" "${url}") http_code=$(echo "$response" | tail -n1) body=$(echo "$response" | sed '$d') @@ -48,7 +48,7 @@ test_filter() { # Run the verification check using jq # The check should return "true" if it passes check_result=$(echo "$body" | jq "${filter_check}") - + if [[ "$check_result" != "true" ]]; then echo "Error: Filter verification failed for ${description}" echo "Verification expression: ${filter_check}" @@ -176,4 +176,23 @@ test_filter "/v1/categories" "" \ "length > 0" \ "Categories list is not empty" +# Test geo-breakdown endpoint +test_cors_preflight "/v1/geo-breakdown" +test_endpoint "/v1/geo-breakdown" "" +test_endpoint "/v1/geo-breakdown" "?technology=WordPress" +test_endpoint "/v1/geo-breakdown" "?technology=WordPress&rank=Top%201M" + +# Test geo-breakdown filter correspondences +test_filter "/v1/geo-breakdown" "" \ + "all(.[]; .technology == \"ALL\") and length > 0" \ + "Geo breakdown defaults (technology=ALL)" + +test_filter "/v1/geo-breakdown" "?technology=WordPress" \ + "all(.[]; .technology == \"WordPress\") and length > 0" \ + "Geo breakdown specific technology (WordPress)" + +test_filter "/v1/geo-breakdown" "?technology=WordPress" \ + "all(.[]; has(\"geo\")) and length > 0" \ + "Geo breakdown response includes geo field" + echo "API tests complete! All endpoints returned 200 and data corresponds to filters." From 1789b9e802f3aa067cb636ced421fdbf73d37557 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 24 Mar 2026 16:07:49 +0100 Subject: [PATCH 03/14] fix: update CDN cache duration in setCommonHeaders function --- src/index.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/index.js b/src/index.js index cfe2b32..fc3c7f8 100644 --- a/src/index.js +++ b/src/index.js @@ -67,8 +67,8 @@ const setCORSHeaders = (res) => { const setCommonHeaders = (res) => { setCORSHeaders(res); res.setHeader('Content-Type', 'application/json'); - // Browser cache: 1 hour, CDN cache: 30 days - res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=2592000'); + // Browser cache: 1 hour, CDN cache: 1 day + res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=86400'); res.setHeader('Cloud-CDN-Cache-Tag', 'report-api'); res.setHeader('Timing-Allow-Origin', '*'); }; From 3b751a40ce94636bb4135cadbc18eeb52989ceaa Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 24 Mar 2026 16:32:31 +0100 Subject: [PATCH 04/14] fix: update CDN cache tag and duration in response headers --- src/controllers/cdnController.js | 6 +++--- src/index.js | 2 +- src/tests/headers.test.js | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/controllers/cdnController.js b/src/controllers/cdnController.js index d34b9ed..b44576b 100644 --- a/src/controllers/cdnController.js +++ b/src/controllers/cdnController.js @@ -70,9 +70,9 @@ export const proxyReportsFile = async (req, res, filePath) => { // Set response headers res.setHeader('Content-Type', contentType); res.setHeader('Cross-Origin-Resource-Policy', 'cross-origin'); - res.setHeader('Cloud-CDN-Cache-Tag', 'bucket-proxy'); - // Browser cache: 1 hour, CDN cache: 30 days - res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=2592000'); + res.setHeader('Cache-Tag', 'bucket-proxy'); + // Browser cache: 1 hour, CDN cache: 1 days + res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=86400'); if (metadata.etag) { res.setHeader('ETag', metadata.etag); diff --git a/src/index.js b/src/index.js index fc3c7f8..91c61f1 100644 --- a/src/index.js +++ b/src/index.js @@ -69,7 +69,7 @@ const setCommonHeaders = (res) => { res.setHeader('Content-Type', 'application/json'); // Browser cache: 1 hour, CDN cache: 1 day res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=86400'); - res.setHeader('Cloud-CDN-Cache-Tag', 'report-api'); + res.setHeader('Cache-Tag', 'report-api'); res.setHeader('Timing-Allow-Origin', '*'); }; diff --git a/src/tests/headers.test.js b/src/tests/headers.test.js index d930d75..6673387 100644 --- a/src/tests/headers.test.js +++ b/src/tests/headers.test.js @@ -12,8 +12,8 @@ jest.unstable_mockModule('../controllers/cdnController.js', () => ({ proxyReportsFile: jest.fn((req, res) => { res.setHeader('Content-Type', 'application/json'); res.setHeader('Cross-Origin-Resource-Policy', 'cross-origin'); - res.setHeader('Cloud-CDN-Cache-Tag', 'bucket-proxy'); - res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=2592000'); + res.setHeader('Cache-Tag', 'bucket-proxy'); + res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=86400'); res.statusCode = 200; res.end(JSON.stringify({ mocked: true })); }) @@ -34,8 +34,8 @@ describe('CDN Headers', () => { const res = await request(app).get('/v1/technologies'); expect(res.statusCode).toEqual(200); - expect(res.headers['cache-control']).toBe('public, max-age=3600, s-maxage=2592000'); - expect(res.headers['cloud-cdn-cache-tag']).toBe('report-api'); + expect(res.headers['cache-control']).toBe('public, max-age=3600, s-maxage=86400'); + expect(res.headers['cache-tag']).toBe('report-api'); expect(res.headers['access-control-allow-origin']).toBe('*'); expect(res.headers['access-control-allow-headers']).toContain('Content-Type'); expect(res.headers['access-control-allow-headers']).toContain('If-None-Match'); @@ -46,8 +46,8 @@ describe('CDN Headers', () => { const res = await request(app).get('/v1/static/test.json'); expect(res.statusCode).toEqual(200); - expect(res.headers['cache-control']).toBe('public, max-age=3600, s-maxage=2592000'); - expect(res.headers['cloud-cdn-cache-tag']).toBe('bucket-proxy'); + expect(res.headers['cache-control']).toBe('public, max-age=3600, s-maxage=86400'); + expect(res.headers['cache-tag']).toBe('bucket-proxy'); expect(res.headers['cross-origin-resource-policy']).toBe('cross-origin'); }); @@ -55,7 +55,7 @@ describe('CDN Headers', () => { const res = await request(app).get('/'); expect(res.statusCode).toEqual(200); - expect(res.headers['cache-control']).toBe('public, max-age=3600, s-maxage=2592000'); - expect(res.headers['cloud-cdn-cache-tag']).toBe('report-api'); + expect(res.headers['cache-control']).toBe('public, max-age=3600, s-maxage=86400'); + expect(res.headers['cache-tag']).toBe('report-api'); }); }); From 0f7fe75dc3b0f401af1cd4b5bea361678b3324b1 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 24 Mar 2026 16:34:36 +0100 Subject: [PATCH 05/14] feat: add ETag support for caching in report responses --- src/controllers/reportController.js | 16 ++++++++++--- src/index.js | 23 +------------------ src/utils/controllerHelpers.js | 35 ++++++++++++++++++++++++++--- 3 files changed, 46 insertions(+), 28 deletions(-) diff --git a/src/controllers/reportController.js b/src/controllers/reportController.js index 248cd44..71ea30a 100644 --- a/src/controllers/reportController.js +++ b/src/controllers/reportController.js @@ -7,7 +7,9 @@ import { sendValidationError, getLatestDate, handleControllerError, - validateArrayParameter + validateArrayParameter, + generateETag, + isModified } from '../utils/controllerHelpers.js'; /** @@ -123,9 +125,17 @@ const createReportController = (reportType, { crossGeo = false } = {}) => { data.push(doc.data()); }); - // Send response + // Send response with ETag support + const jsonData = JSON.stringify(data); + const etag = generateETag(jsonData); + res.setHeader('ETag', `"${etag}"`); + if (!isModified(req, etag)) { + res.statusCode = 304; + res.end(); + return; + } res.statusCode = 200; - res.end(JSON.stringify(data)); + res.end(jsonData); } catch (error) { handleControllerError(res, error, `fetching ${reportType} data`); diff --git a/src/index.js b/src/index.js index 91c61f1..ec23e22 100644 --- a/src/index.js +++ b/src/index.js @@ -1,5 +1,5 @@ -import crypto from 'crypto'; import functions from '@google-cloud/functions-framework'; +import { sendJSONResponse, isModified } from './utils/controllerHelpers.js'; // Dynamic imports for better performance - only load when needed const controllers = { @@ -73,27 +73,6 @@ const setCommonHeaders = (res) => { res.setHeader('Timing-Allow-Origin', '*'); }; -// Helper function to generate ETag -const generateETag = (jsonData) => { - return crypto.createHash('md5').update(jsonData).digest('hex'); -}; - -// Helper function to send JSON response with ETag support -const sendJSONResponse = (res, data, statusCode = 200) => { - const jsonData = JSON.stringify(data); - const etag = generateETag(jsonData); - - res.setHeader('ETag', `"${etag}"`); - res.statusCode = statusCode; - res.end(jsonData); -}; - -// Helper function to check if resource is modified -const isModified = (req, etag) => { - const ifNoneMatch = req.headers['if-none-match'] || (req.get && req.get('if-none-match')); - return !ifNoneMatch || ifNoneMatch !== `"${etag}"`; -}; - // Route handler function const handleRequest = async (req, res) => { try { diff --git a/src/utils/controllerHelpers.js b/src/utils/controllerHelpers.js index 03d2372..58583d6 100644 --- a/src/utils/controllerHelpers.js +++ b/src/utils/controllerHelpers.js @@ -1,3 +1,4 @@ +import crypto from 'crypto'; import { convertToArray } from './helpers.js'; /** @@ -99,6 +100,23 @@ const handleControllerError = (res, error, operation) => { })); }; +const generateETag = (jsonData) => { + return crypto.createHash('md5').update(jsonData).digest('hex'); +}; + +const sendJSONResponse = (res, data, statusCode = 200) => { + const jsonData = JSON.stringify(data); + const etag = generateETag(jsonData); + res.setHeader('ETag', `"${etag}"`); + res.statusCode = statusCode; + res.end(jsonData); +}; + +const isModified = (req, etag) => { + const ifNoneMatch = req.headers['if-none-match'] || (req.get && req.get('if-none-match')); + return !ifNoneMatch || ifNoneMatch !== `"${etag}"`; +}; + /** * Generic query executor * Handles query execution and response for simple queries @@ -126,9 +144,17 @@ const executeQuery = async (req, res, collection, queryBuilder, dataProcessor = data = dataProcessor(data, params); } - // Send response + // Send response with ETag support + const jsonData = JSON.stringify(data); + const etag = generateETag(jsonData); + res.setHeader('ETag', `"${etag}"`); + if (!isModified(req, etag)) { + res.statusCode = 304; + res.end(); + return; + } res.statusCode = 200; - res.end(JSON.stringify(data)); + res.end(jsonData); } catch (error) { // Handle validation errors specifically @@ -170,5 +196,8 @@ export { validateArrayParameter, handleControllerError, executeQuery, - validateTechnologyArray + validateTechnologyArray, + generateETag, + sendJSONResponse, + isModified }; From e210b141dcca0f56eb59196c80009f0fdda49bf6 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 24 Mar 2026 16:36:35 +0100 Subject: [PATCH 06/14] test: add ETag header tests for /v1/technologies and /v1/adoption routes --- src/tests/routes.test.js | 44 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/tests/routes.test.js b/src/tests/routes.test.js index 8a93593..5c065e1 100644 --- a/src/tests/routes.test.js +++ b/src/tests/routes.test.js @@ -455,6 +455,50 @@ describe('API Routes', () => { expect(res.headers).toHaveProperty('etag'); }); + it('should include ETag headers on executeQuery-based routes', async () => { + const res = await request(app).get('/v1/technologies'); + expect(res.statusCode).toEqual(200); + expect(res.headers).toHaveProperty('etag'); + expect(res.headers['etag']).toMatch(/^"[a-f0-9]+"$/); + }); + + it('should include ETag headers on reportController-based routes', async () => { + const res = await request(app).get('/v1/adoption'); + expect(res.statusCode).toEqual(200); + expect(res.headers).toHaveProperty('etag'); + expect(res.headers['etag']).toMatch(/^"[a-f0-9]+"$/); + }); + + it('should return 304 for executeQuery-based routes when ETag matches', async () => { + const first = await request(app).get('/v1/technologies'); + expect(first.statusCode).toEqual(200); + const etag = first.headers['etag']; + + const second = await request(app) + .get('/v1/technologies') + .set('If-None-Match', etag); + expect(second.statusCode).toEqual(304); + }); + + it('should return 304 for reportController-based routes when ETag matches', async () => { + const first = await request(app).get('/v1/adoption'); + expect(first.statusCode).toEqual(200); + const etag = first.headers['etag']; + + const second = await request(app) + .get('/v1/adoption') + .set('If-None-Match', etag); + expect(second.statusCode).toEqual(304); + }); + + it('should return 200 when If-None-Match does not match', async () => { + const res = await request(app) + .get('/v1/technologies') + .set('If-None-Match', '"stale-etag"'); + expect(res.statusCode).toEqual(200); + expect(res.headers).toHaveProperty('etag'); + }); + it('should include timing headers', async () => { const res = await request(app).get('/v1/technologies'); expect(res.headers['timing-allow-origin']).toEqual('*'); From 553a80ececc581f362e4e1bd749e5f2244595d2d Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Fri, 27 Mar 2026 23:44:18 +0100 Subject: [PATCH 07/14] feat: implement CWV distribution endpoint with BigQuery integration and add tests --- src/controllers/cwvDistributionController.js | 126 +++++++++++++++++++ src/index.js | 9 +- src/package-lock.json | 89 +++++++++++++ src/package.json | 1 + src/tests/routes.test.js | 54 +++++++- src/utils/db.js | 8 ++ test-api.sh | 23 ++++ 7 files changed, 308 insertions(+), 2 deletions(-) create mode 100644 src/controllers/cwvDistributionController.js diff --git a/src/controllers/cwvDistributionController.js b/src/controllers/cwvDistributionController.js new file mode 100644 index 0000000..e4c3642 --- /dev/null +++ b/src/controllers/cwvDistributionController.js @@ -0,0 +1,126 @@ +import { bigquery } from '../utils/db.js'; +import { convertToArray } from '../utils/helpers.js'; +import { + handleControllerError, + generateETag, + isModified, + sendValidationError +} from '../utils/controllerHelpers.js'; + +/** + * Build the BigQuery SQL for the CWV distribution histogram. + * rank is applied to p.rank only (no rank column on device_summary). + */ +const buildQuery = (rankFilter) => { + const rankClause = rankFilter ? 'AND p.rank <= @rank' : ''; + + return `WITH metrics AS ( + SELECT + client, + t.technology, + root_page, + ANY_VALUE(p75_lcp) AS lcp, + ANY_VALUE(p75_inp) AS inp, + ANY_VALUE(p75_cls) AS cls, + ANY_VALUE(p75_fcp) AS fcp, + ANY_VALUE(p75_ttfb) AS ttfb + FROM + \`httparchive.crawl.pages\` p, + UNNEST(technologies) t, + \`chrome-ux-report.materialized.device_summary\` c + WHERE + p.date = @date AND + c.date = @date AND + t.technology IN UNNEST(@technologies) AND + root_page = origin || '/' AND + IF(client = 'mobile', 'phone', 'desktop') = device + ${rankClause} + GROUP BY + client, + t.technology, + root_page +) + +SELECT + client, + technology, + bucket AS loading_bucket, + bucket / 4 AS inp_bucket, + bucket / 2000 AS cls_bucket, + COUNT(DISTINCT root_page WHERE lcp = bucket) AS lcp_origins, + COUNT(DISTINCT root_page WHERE inp = bucket / 4) AS inp_origins, + COUNT(DISTINCT root_page WHERE cls = bucket / 2000) AS cls_origins, + COUNT(DISTINCT root_page WHERE fcp = bucket) AS fcp_origins, + COUNT(DISTINCT root_page WHERE ttfb = bucket) AS ttfb_origins +FROM + metrics, + UNNEST(GENERATE_ARRAY(0.0, 10000.0, 100.0)) AS bucket +GROUP BY + client, + technology, + bucket +ORDER BY + client, + technology, + bucket`; +}; + +/** + * GET /v1/cwv-distribution + * + * Query parameters: + * technology (required) - comma-separated list of technologies, e.g. "Wix,WordPress" + * date (required) - crawl date in YYYY-MM-DD format, e.g. "2026-02-01" + * rank (optional) - numeric rank ceiling, e.g. "10000". Omit or set to "ALL" to include all ranks. + */ +export const listCWVDistributionData = async (req, res) => { + try { + const params = req.query; + + const errors = []; + if (!params.technology) errors.push(['technology', 'missing technology parameter']); + if (!params.date) errors.push(['date', 'missing date parameter']); + if (errors.length > 0) { + sendValidationError(res, errors); + return; + } + + const technologies = convertToArray(params.technology); + const date = params.date; + const rankParam = params.rank && params.rank !== 'ALL' ? params.rank : null; + + const queryStr = buildQuery(rankParam !== null); + + const queryOptions = { + query: queryStr, + params: { + technologies, + date, + ...(rankParam !== null && { rank: parseInt(rankParam, 10) }) + }, + types: { + technologies: ['STRING'], + date: 'STRING', + ...(rankParam !== null && { rank: 'INT64' }) + }, + useLegacySql: false + }; + + const [rows] = await bigquery.query(queryOptions); + + const jsonData = JSON.stringify(rows); + const etag = generateETag(jsonData); + res.setHeader('ETag', `"${etag}"`); + if (!isModified(req, etag)) { + res.statusCode = 304; + res.end(); + return; + } + + res.statusCode = 200; + res.end(jsonData); + + } catch (error) { + handleControllerError(res, error, 'fetching CWV distribution data'); + } +}; diff --git a/src/index.js b/src/index.js index ec23e22..6c508c9 100644 --- a/src/index.js +++ b/src/index.js @@ -14,7 +14,8 @@ const controllers = { geos: null, versions: null, geoBreakdown: null, - static: null + static: null, + cwvDistribution: null }; // Helper function to dynamically import controllers @@ -49,6 +50,9 @@ const getController = async (name) => { case 'static': controllers[name] = await import('./controllers/cdnController.js'); break; + case 'cwvDistribution': + controllers[name] = await import('./controllers/cwvDistributionController.js'); + break; } } return controllers[name]; @@ -126,6 +130,9 @@ const handleRequest = async (req, res) => { } else if (pathname === '/v1/geo-breakdown' && req.method === 'GET') { const { listGeoBreakdownData } = await getController('geoBreakdown'); await listGeoBreakdownData(req, res); + } else if (pathname === '/v1/cwv-distribution' && req.method === 'GET') { + const { listCWVDistributionData } = await getController('cwvDistribution'); + await listCWVDistributionData(req, res); } else if (pathname.startsWith('/v1/static/') && req.method === 'GET') { // GCS proxy endpoint for reports files const filePath = decodeURIComponent(pathname.replace('/v1/static/', '')); diff --git a/src/package-lock.json b/src/package-lock.json index 6b7c05e..9ec7777 100644 --- a/src/package-lock.json +++ b/src/package-lock.json @@ -8,6 +8,7 @@ "name": "report-api", "version": "1.0.0", "dependencies": { + "@google-cloud/bigquery": "^7.9.1", "@google-cloud/firestore": "8.3.0", "@google-cloud/functions-framework": "^5.0.2", "@google-cloud/storage": "7.19.0" @@ -51,6 +52,7 @@ "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==", "dev": true, "license": "MIT", + "peer": true, "dependencies": { "@babel/code-frame": "^7.27.1", "@babel/generator": "^7.28.5", @@ -569,6 +571,61 @@ "tslib": "^2.4.0" } }, + "node_modules/@google-cloud/bigquery": { + "version": "7.9.4", + "resolved": "https://registry.npmjs.org/@google-cloud/bigquery/-/bigquery-7.9.4.tgz", + "integrity": "sha512-C7jeI+9lnCDYK3cRDujcBsPgiwshWKn/f0BiaJmClplfyosCLfWE83iGQ0eKH113UZzjR9c9q7aZQg0nU388sw==", + "license": "Apache-2.0", + "dependencies": { + "@google-cloud/common": "^5.0.0", + "@google-cloud/paginator": "^5.0.2", + "@google-cloud/precise-date": "^4.0.0", + "@google-cloud/promisify": "4.0.0", + "arrify": "^2.0.1", + "big.js": "^6.0.0", + "duplexify": "^4.0.0", + "extend": "^3.0.2", + "is": "^3.3.0", + "stream-events": "^1.0.5", + "uuid": "^9.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@google-cloud/bigquery/node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/@google-cloud/common": { + "version": "5.0.2", + "resolved": "https://registry.npmjs.org/@google-cloud/common/-/common-5.0.2.tgz", + "integrity": "sha512-V7bmBKYQyu0eVG2BFejuUjlBt+zrya6vtsKdY+JxMM/dNntPF41vZ9+LhOshEUH01zOHEqBSvI7Dad7ZS6aUeA==", + "license": "Apache-2.0", + "dependencies": { + "@google-cloud/projectify": "^4.0.0", + "@google-cloud/promisify": "^4.0.0", + "arrify": "^2.0.1", + "duplexify": "^4.1.1", + "extend": "^3.0.2", + "google-auth-library": "^9.0.0", + "html-entities": "^2.5.2", + "retry-request": "^7.0.0", + "teeny-request": "^9.0.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/@google-cloud/firestore": { "version": "8.3.0", "resolved": "https://registry.npmjs.org/@google-cloud/firestore/-/firestore-8.3.0.tgz", @@ -620,6 +677,15 @@ "node": ">=14.0.0" } }, + "node_modules/@google-cloud/precise-date": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/@google-cloud/precise-date/-/precise-date-4.0.0.tgz", + "integrity": "sha512-1TUx3KdaU3cN7nfCdNf+UVqA/PSX29Cjcox3fZZBtINlRrXVTmUkQnCKv2MbBUbCopbK4olAT1IHl76uZyCiVA==", + "license": "Apache-2.0", + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/@google-cloud/projectify": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/@google-cloud/projectify/-/projectify-4.0.0.tgz", @@ -2148,6 +2214,19 @@ "baseline-browser-mapping": "dist/cli.js" } }, + "node_modules/big.js": { + "version": "6.2.2", + "resolved": "https://registry.npmjs.org/big.js/-/big.js-6.2.2.tgz", + "integrity": "sha512-y/ie+Faknx7sZA5MfGA2xKlu0GDv8RWrXGsmlteyJQ2lvoKv9GBK/fpRMc2qlSoBAgNxrixICFCBefIq8WCQpQ==", + "license": "MIT", + "engines": { + "node": "*" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/bigjs" + } + }, "node_modules/bignumber.js": { "version": "9.3.1", "resolved": "https://registry.npmjs.org/bignumber.js/-/bignumber.js-9.3.1.tgz", @@ -2210,6 +2289,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.9.0", "caniuse-lite": "^1.0.30001759", @@ -3953,6 +4033,15 @@ "node": ">= 0.10" } }, + "node_modules/is": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/is/-/is-3.3.2.tgz", + "integrity": "sha512-a2xr4E3s1PjDS8ORcGgXpWx6V+liNs+O3JRD2mb9aeugD7rtkkZ0zgLdYgw0tWsKhsdiezGYptSiMlVazCBTuQ==", + "license": "MIT", + "engines": { + "node": ">= 0.4" + } + }, "node_modules/is-arguments": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/is-arguments/-/is-arguments-1.2.0.tgz", diff --git a/src/package.json b/src/package.json index ad4fc68..0eed985 100644 --- a/src/package.json +++ b/src/package.json @@ -15,6 +15,7 @@ "run": "docker run -p 8080:8080 report-api" }, "dependencies": { + "@google-cloud/bigquery": "^7.9.1", "@google-cloud/firestore": "8.3.0", "@google-cloud/functions-framework": "^5.0.2", "@google-cloud/storage": "7.19.0" diff --git a/src/tests/routes.test.js b/src/tests/routes.test.js index 5c065e1..d058bab 100644 --- a/src/tests/routes.test.js +++ b/src/tests/routes.test.js @@ -73,9 +73,14 @@ jest.unstable_mockModule('../utils/db.js', () => { collection: jest.fn().mockImplementation((collectionName) => mockQuery) }; + const mockBigQueryInstance = { + query: jest.fn().mockResolvedValue([[]]) + }; + return { firestore: mockFirestoreInstance, - firestoreOld: mockFirestoreInstance + firestoreOld: mockFirestoreInstance, + bigquery: mockBigQueryInstance }; }); @@ -835,4 +840,51 @@ describe('API Routes', () => { }); }); }); + + describe('GET /v1/cwv-distribution', () => { + it('should return 400 when technology is missing', async () => { + const res = await request(app).get('/v1/cwv-distribution?date=2026-02-01'); + expect(res.statusCode).toEqual(400); + expect(res.body).toHaveProperty('errors'); + }); + + it('should return 400 when date is missing', async () => { + const res = await request(app).get('/v1/cwv-distribution?technology=Wix'); + expect(res.statusCode).toEqual(400); + expect(res.body).toHaveProperty('errors'); + }); + + it('should return 400 when both technology and date are missing', async () => { + const res = await request(app).get('/v1/cwv-distribution'); + expect(res.statusCode).toEqual(400); + expect(res.body).toHaveProperty('errors'); + }); + + it('should return 200 with valid technology and date', async () => { + const res = await request(app).get('/v1/cwv-distribution?technology=Wix&date=2026-02-01'); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should return 200 with multiple technologies', async () => { + const res = await request(app).get('/v1/cwv-distribution?technology=Wix,WordPress&date=2026-02-01'); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should return 200 with rank filter applied', async () => { + const res = await request(app).get('/v1/cwv-distribution?technology=Wix&date=2026-02-01&rank=10000'); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should handle CORS preflight requests', async () => { + const res = await request(app) + .options('/v1/cwv-distribution') + .set('Origin', 'http://example.com') + .set('Access-Control-Request-Method', 'GET'); + expect(res.statusCode).toEqual(204); + expect(res.headers['access-control-allow-origin']).toEqual('*'); + }); + }); }); diff --git a/src/utils/db.js b/src/utils/db.js index 41eed29..0e6e5ae 100644 --- a/src/utils/db.js +++ b/src/utils/db.js @@ -1,4 +1,5 @@ import { Firestore } from '@google-cloud/firestore'; +import { BigQuery } from '@google-cloud/bigquery'; // Initialize Firestore with basic optimizations (default connection using env variables) const firestore = new Firestore({ @@ -30,3 +31,10 @@ const firestoreOld = new Firestore({ // Export both connections - maintain backward compatibility export { firestore, firestoreOld }; + +// Initialize BigQuery client +const bigquery = new BigQuery({ + projectId: process.env.PROJECT +}); + +export { bigquery }; diff --git a/test-api.sh b/test-api.sh index 7809094..337679d 100755 --- a/test-api.sh +++ b/test-api.sh @@ -195,4 +195,27 @@ test_filter "/v1/geo-breakdown" "?technology=WordPress" \ "all(.[]; has(\"geo\")) and length > 0" \ "Geo breakdown response includes geo field" +# Test cwv-distribution endpoint +test_cors_preflight "/v1/cwv-distribution" +test_endpoint "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" +test_endpoint "/v1/cwv-distribution" "?technology=Wix,WordPress&date=2026-02-01" +test_endpoint "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01&rank=10000" + +# Test cwv-distribution filter correspondences +test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" \ + "all(.[]; .technology == \"Wix\") and length > 0" \ + "CWV distribution single technology (Wix)" + +test_filter "/v1/cwv-distribution" "?technology=Wix,WordPress&date=2026-02-01" \ + "all(.[]; .technology == \"Wix\" or .technology == \"WordPress\") and length > 0" \ + "CWV distribution multiple technologies (Wix, WordPress)" + +test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" \ + "all(.[]; has(\"loading_bucket\") and has(\"lcp_origins\") and has(\"inp_origins\") and has(\"cls_origins\")) and length > 0" \ + "CWV distribution response includes histogram bucket fields" + +test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" \ + "[.[].client] | unique | sort == [\"desktop\", \"mobile\"]" \ + "CWV distribution returns both desktop and mobile clients" + echo "API tests complete! All endpoints returned 200 and data corresponds to filters." From 6c18a4f3163c41175d64fd3d5b24f0b9c627901d Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 31 Mar 2026 01:07:11 +0200 Subject: [PATCH 08/14] fix: remove unnecessary useLegacySql option from BigQuery query options --- src/controllers/cwvDistributionController.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/controllers/cwvDistributionController.js b/src/controllers/cwvDistributionController.js index e4c3642..bf654f6 100644 --- a/src/controllers/cwvDistributionController.js +++ b/src/controllers/cwvDistributionController.js @@ -102,8 +102,7 @@ export const listCWVDistributionData = async (req, res) => { technologies: ['STRING'], date: 'STRING', ...(rankParam !== null && { rank: 'INT64' }) - }, - useLegacySql: false + } }; const [rows] = await bigquery.query(queryOptions); From 22b7b444989fccbc29cef94ff424430642a289df Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 7 Apr 2026 01:24:11 +0200 Subject: [PATCH 09/14] feat: add workflow for testing --- .github/workflows/{dependabot.yml => test.yml} | 1 + 1 file changed, 1 insertion(+) rename .github/workflows/{dependabot.yml => test.yml} (96%) diff --git a/.github/workflows/dependabot.yml b/.github/workflows/test.yml similarity index 96% rename from .github/workflows/dependabot.yml rename to .github/workflows/test.yml index 1b650c3..a5df2b5 100644 --- a/.github/workflows/dependabot.yml +++ b/.github/workflows/test.yml @@ -9,6 +9,7 @@ on: jobs: test: runs-on: ubuntu-latest + if: github.head_ref != 'development' steps: - uses: actions/checkout@v6 - run: | From a8d033ff446f5f4ad50952e4e90e42cecb606f31 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 7 Apr 2026 01:28:06 +0200 Subject: [PATCH 10/14] fix: update ingress_settings default value to allow all traffic --- terraform/run-service/variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/run-service/variables.tf b/terraform/run-service/variables.tf index d5f77a0..228ed22 100644 --- a/terraform/run-service/variables.tf +++ b/terraform/run-service/variables.tf @@ -35,7 +35,7 @@ variable "available_cpu" { } variable "ingress_settings" { type = string - default = "INGRESS_TRAFFIC_INTERNAL_LOAD_BALANCER" + default = "INGRESS_TRAFFIC_ALL" description = "String value that controls what traffic can reach the function. Check ingress documentation to see the impact of each settings value. Changes to this field will recreate the cloud function." } variable "timeout" { From 31e5765da6c8d06d2d9f07f9ce40f7a188929f13 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Fri, 10 Apr 2026 00:36:20 +0200 Subject: [PATCH 11/14] feat: add geo filter support to CWV distribution endpoint and update tests --- README.md | 49 ++++++++++++- src/controllers/cwvDistributionController.js | 73 ++++++++++++++++---- src/tests/routes.test.js | 12 ++++ test-api.sh | 14 ++++ 4 files changed, 133 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 826c50f..b9ba6d2 100644 --- a/README.md +++ b/README.md @@ -279,6 +279,52 @@ curl --request GET \ ] ``` +### `GET /cwv-distribution` + +Provides per-bucket CWV metric distribution histograms for technologies, optionally filtered by geo and rank. + +#### CWV Distribution Parameters + +- `technology` (required): Technology name(s) - comma-separated list, e.g. `Wix,WordPress` +- `date` (required): Crawl date in `YYYY-MM-DD` format, e.g. `2026-02-01` +- `geo` (optional): Geographic filter (defaults to `ALL`). Use a country name such as `United States of America` for country-level data. +- `rank` (optional): Numeric rank ceiling, e.g. `10000`. Omit or set to `ALL` to include all ranks. + +#### CWV Distribution Response + +```bash +curl --request GET \ + --url 'https://{{HOST}}/v1/cwv-distribution?technology=WordPress&date=2026-02-01&geo=ALL' +``` + +Returns a JSON array where each element represents one histogram bucket for a technology/client/geo combination: + +```json +[ + { + "geo": "ALL", + "client": "mobile", + "technology": "WordPress", + "loading_bucket": 0, + "inp_bucket": 0, + "cls_bucket": 0, + "lcp_origins": 12345, + "inp_origins": 23456, + "cls_origins": 34567, + "fcp_origins": 11111, + "ttfb_origins": 22222 + }, + ... +] +``` + +Bucket semantics: + +- `loading_bucket` / `lcp_bucket` / `fcp_bucket` / `ttfb_bucket`: millisecond value (0–10000 in steps of 100) +- `inp_bucket`: `loading_bucket / 4` (INP scale) +- `cls_bucket`: `loading_bucket / 2000` (CLS scale) +- `*_origins`: count of distinct origins whose p75 value equals that bucket + ### `GET /lighthouse` Provides Lighthouse scores for technologies. @@ -386,7 +432,6 @@ Returns a JSON object with the following schema: ] ``` - ### `GET /audits` Provides Lighthouse audits for technologies. @@ -663,5 +708,3 @@ Response: ... } ``` - - diff --git a/src/controllers/cwvDistributionController.js b/src/controllers/cwvDistributionController.js index bf654f6..32a1c94 100644 --- a/src/controllers/cwvDistributionController.js +++ b/src/controllers/cwvDistributionController.js @@ -10,38 +10,78 @@ import { /** * Build the BigQuery SQL for the CWV distribution histogram. * rank is applied to p.rank only (no rank column on device_summary). + * geo filters are applied in the final SELECT via WHERE geo = @geo. + * Pass geo='ALL' (default) to use the device_summary (all-origins) data. + * Pass a country name to use the country_summary data for that country. */ const buildQuery = (rankFilter) => { - const rankClause = rankFilter ? 'AND p.rank <= @rank' : ''; + const rankClause = rankFilter ? 'AND rank <= @rank' : ''; - return `WITH metrics AS ( + return `WITH pages AS ( SELECT client, - t.technology, + t.technology AS technology, + root_page + FROM + httparchive.crawl.pages, + UNNEST(technologies) AS t + WHERE + date = @date AND + t.technology IN UNNEST(@technologies) + ${rankClause} + ), metrics AS ( + SELECT + 'ALL' AS geo, + client, + technology, root_page, ANY_VALUE(p75_lcp) AS lcp, ANY_VALUE(p75_inp) AS inp, ANY_VALUE(p75_cls) AS cls, ANY_VALUE(p75_fcp) AS fcp, ANY_VALUE(p75_ttfb) AS ttfb - FROM - \`httparchive.crawl.pages\` p, - UNNEST(technologies) t, - \`chrome-ux-report.materialized.device_summary\` c + FROM pages AS p, + \`chrome-ux-report.materialized.device_summary\` d + WHERE + d.date = @date AND + root_page = origin || '/' AND + IF(device = 'desktop', 'desktop', 'mobile') = client AND + @geo = 'ALL' + ${rankClause} + GROUP BY + client, + technology, + root_page + + UNION ALL + + SELECT + \`chrome-ux-report\`.experimental.GET_COUNTRY(country_code) AS geo, + client, + technology, + root_page, + ANY_VALUE(p75_lcp) AS lcp, + ANY_VALUE(p75_inp) AS inp, + ANY_VALUE(p75_cls) AS cls, + ANY_VALUE(p75_fcp) AS fcp, + ANY_VALUE(p75_ttfb) AS ttfb + FROM pages AS p, + \`chrome-ux-report.materialized.country_summary\` c WHERE - p.date = @date AND - c.date = @date AND - t.technology IN UNNEST(@technologies) AND + yyyymm = CAST(FORMAT_DATE('%Y%m', @date) AS INT64) AND root_page = origin || '/' AND - IF(client = 'mobile', 'phone', 'desktop') = device + IF(device = 'desktop', 'desktop', 'mobile') = client AND + \`chrome-ux-report\`.experimental.GET_COUNTRY(country_code) = @geo ${rankClause} GROUP BY + geo, client, - t.technology, + technology, root_page ) SELECT + geo, client, technology, bucket AS loading_bucket, @@ -56,10 +96,12 @@ FROM metrics, UNNEST(GENERATE_ARRAY(0.0, 10000.0, 100.0)) AS bucket GROUP BY + geo, client, technology, bucket ORDER BY + geo, client, technology, bucket`; @@ -72,6 +114,7 @@ ORDER BY * technology (required) - comma-separated list of technologies, e.g. "Wix,WordPress" * date (required) - crawl date in YYYY-MM-DD format, e.g. "2026-02-01" * rank (optional) - numeric rank ceiling, e.g. "10000". Omit or set to "ALL" to include all ranks. + * geo (optional) - geographic filter, e.g. "United States of America". Defaults to "ALL". */ export const listCWVDistributionData = async (req, res) => { try { @@ -88,6 +131,7 @@ export const listCWVDistributionData = async (req, res) => { const technologies = convertToArray(params.technology); const date = params.date; const rankParam = params.rank && params.rank !== 'ALL' ? params.rank : null; + const geo = params.geo || 'ALL'; const queryStr = buildQuery(rankParam !== null); @@ -96,12 +140,17 @@ export const listCWVDistributionData = async (req, res) => { params: { technologies, date, + geo, ...(rankParam !== null && { rank: parseInt(rankParam, 10) }) }, types: { technologies: ['STRING'], date: 'STRING', + geo: 'STRING', ...(rankParam !== null && { rank: 'INT64' }) + }, + labels: { + source: 'cwv-distribution-controller' } }; diff --git a/src/tests/routes.test.js b/src/tests/routes.test.js index d058bab..dde4d5e 100644 --- a/src/tests/routes.test.js +++ b/src/tests/routes.test.js @@ -878,6 +878,18 @@ describe('API Routes', () => { expect(Array.isArray(res.body)).toBe(true); }); + it('should return 200 with geo filter applied', async () => { + const res = await request(app).get('/v1/cwv-distribution?technology=Wix&date=2026-02-01&geo=United%20States%20of%20America'); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should return 200 with geo=ALL (default behavior)', async () => { + const res = await request(app).get('/v1/cwv-distribution?technology=Wix&date=2026-02-01&geo=ALL'); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + it('should handle CORS preflight requests', async () => { const res = await request(app) .options('/v1/cwv-distribution') diff --git a/test-api.sh b/test-api.sh index 337679d..a1bebd4 100755 --- a/test-api.sh +++ b/test-api.sh @@ -200,6 +200,8 @@ test_cors_preflight "/v1/cwv-distribution" test_endpoint "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" test_endpoint "/v1/cwv-distribution" "?technology=Wix,WordPress&date=2026-02-01" test_endpoint "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01&rank=10000" +test_endpoint "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01&geo=ALL" +test_endpoint "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01&geo=United%20States%20of%20America" # Test cwv-distribution filter correspondences test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" \ @@ -218,4 +220,16 @@ test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" \ "[.[].client] | unique | sort == [\"desktop\", \"mobile\"]" \ "CWV distribution returns both desktop and mobile clients" +test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" \ + "all(.[]; has(\"geo\")) and length > 0" \ + "CWV distribution response includes geo field" + +test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01" \ + "all(.[]; .geo == \"ALL\") and length > 0" \ + "CWV distribution defaults to geo=ALL" + +test_filter "/v1/cwv-distribution" "?technology=Wix&date=2026-02-01&geo=United%20States%20of%20America" \ + "all(.[]; .geo == \"United States of America\") and length > 0" \ + "CWV distribution filters by specific geo (United States of America)" + echo "API tests complete! All endpoints returned 200 and data corresponds to filters." From ceda97d09f816d77dd0ea40847ee506197da69b9 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Fri, 10 Apr 2026 01:03:35 +0200 Subject: [PATCH 12/14] feat: add geo breakdown endpoint to readme and CWV distribution to MCP server --- README.md | 44 ++++++ src/controllers/cwvDistributionController.js | 136 +------------------ src/mcpHandler.js | 16 +++ src/utils/reportService.js | 115 +++++++++++++++- 4 files changed, 181 insertions(+), 130 deletions(-) diff --git a/README.md b/README.md index b9ba6d2..3947ac2 100644 --- a/README.md +++ b/README.md @@ -495,6 +495,50 @@ Returns a JSON object with the following schema: ] ``` +### `GET /geo-breakdown` + +Provides Core Web Vitals breakdown by geography for a given technology and rank. Returns a single month snapshot of CWV data (LCP, CLS, INP, TTFB) across all geographies. + +#### Geo Breakdown Parameters + +- `technology` (optional): Technology name(s) - comma-separated list (defaults to `ALL`) +- `rank` (optional): Traffic rank segment, e.g. `top 1000`, `top 10000`. Defaults to `ALL`. +- `end` (optional): Snapshot date in `YYYY-MM-DD` format. Defaults to the latest available date. + +#### Geo Breakdown Response + +```bash +curl --request GET \ + --url 'https://{{HOST}}/v1/geo-breakdown?technology=WordPress&rank=top%2010000' +``` + +Returns a JSON array where each element represents CWV data for a technology on a given date and geographic region: + +```json +[ + { + "date": "2026-02-01", + "geo": "United States of America", + "technology": "WordPress", + "vitals": [ + { + "mobile": { + "good_number": 12345, + "tested": 56789 + }, + "desktop": { + "good_number": 6789, + "tested": 10000 + }, + "name": "lcp" + }, + ... + ] + }, + ... +] +``` + ### `GET /ranks` Lists all available ranks. diff --git a/src/controllers/cwvDistributionController.js b/src/controllers/cwvDistributionController.js index 32a1c94..1995c4d 100644 --- a/src/controllers/cwvDistributionController.js +++ b/src/controllers/cwvDistributionController.js @@ -1,5 +1,4 @@ -import { bigquery } from '../utils/db.js'; -import { convertToArray } from '../utils/helpers.js'; +import { queryCWVDistribution } from '../utils/reportService.js'; import { handleControllerError, generateETag, @@ -7,106 +6,6 @@ import { sendValidationError } from '../utils/controllerHelpers.js'; -/** - * Build the BigQuery SQL for the CWV distribution histogram. - * rank is applied to p.rank only (no rank column on device_summary). - * geo filters are applied in the final SELECT via WHERE geo = @geo. - * Pass geo='ALL' (default) to use the device_summary (all-origins) data. - * Pass a country name to use the country_summary data for that country. - */ -const buildQuery = (rankFilter) => { - const rankClause = rankFilter ? 'AND rank <= @rank' : ''; - - return `WITH pages AS ( - SELECT - client, - t.technology AS technology, - root_page - FROM - httparchive.crawl.pages, - UNNEST(technologies) AS t - WHERE - date = @date AND - t.technology IN UNNEST(@technologies) - ${rankClause} - ), metrics AS ( - SELECT - 'ALL' AS geo, - client, - technology, - root_page, - ANY_VALUE(p75_lcp) AS lcp, - ANY_VALUE(p75_inp) AS inp, - ANY_VALUE(p75_cls) AS cls, - ANY_VALUE(p75_fcp) AS fcp, - ANY_VALUE(p75_ttfb) AS ttfb - FROM pages AS p, - \`chrome-ux-report.materialized.device_summary\` d - WHERE - d.date = @date AND - root_page = origin || '/' AND - IF(device = 'desktop', 'desktop', 'mobile') = client AND - @geo = 'ALL' - ${rankClause} - GROUP BY - client, - technology, - root_page - - UNION ALL - - SELECT - \`chrome-ux-report\`.experimental.GET_COUNTRY(country_code) AS geo, - client, - technology, - root_page, - ANY_VALUE(p75_lcp) AS lcp, - ANY_VALUE(p75_inp) AS inp, - ANY_VALUE(p75_cls) AS cls, - ANY_VALUE(p75_fcp) AS fcp, - ANY_VALUE(p75_ttfb) AS ttfb - FROM pages AS p, - \`chrome-ux-report.materialized.country_summary\` c - WHERE - yyyymm = CAST(FORMAT_DATE('%Y%m', @date) AS INT64) AND - root_page = origin || '/' AND - IF(device = 'desktop', 'desktop', 'mobile') = client AND - \`chrome-ux-report\`.experimental.GET_COUNTRY(country_code) = @geo - ${rankClause} - GROUP BY - geo, - client, - technology, - root_page -) - -SELECT - geo, - client, - technology, - bucket AS loading_bucket, - bucket / 4 AS inp_bucket, - bucket / 2000 AS cls_bucket, - COUNT(DISTINCT root_page WHERE lcp = bucket) AS lcp_origins, - COUNT(DISTINCT root_page WHERE inp = bucket / 4) AS inp_origins, - COUNT(DISTINCT root_page WHERE cls = bucket / 2000) AS cls_origins, - COUNT(DISTINCT root_page WHERE fcp = bucket) AS fcp_origins, - COUNT(DISTINCT root_page WHERE ttfb = bucket) AS ttfb_origins -FROM - metrics, - UNNEST(GENERATE_ARRAY(0.0, 10000.0, 100.0)) AS bucket -GROUP BY - geo, - client, - technology, - bucket -ORDER BY - geo, - client, - technology, - bucket`; -}; - /** * GET /v1/cwv-distribution * @@ -128,33 +27,12 @@ export const listCWVDistributionData = async (req, res) => { return; } - const technologies = convertToArray(params.technology); - const date = params.date; - const rankParam = params.rank && params.rank !== 'ALL' ? params.rank : null; - const geo = params.geo || 'ALL'; - - const queryStr = buildQuery(rankParam !== null); - - const queryOptions = { - query: queryStr, - params: { - technologies, - date, - geo, - ...(rankParam !== null && { rank: parseInt(rankParam, 10) }) - }, - types: { - technologies: ['STRING'], - date: 'STRING', - geo: 'STRING', - ...(rankParam !== null && { rank: 'INT64' }) - }, - labels: { - source: 'cwv-distribution-controller' - } - }; - - const [rows] = await bigquery.query(queryOptions); + const rows = await queryCWVDistribution({ + technology: params.technology, + date: params.date, + geo: params.geo || 'ALL', + rank: params.rank && params.rank !== 'ALL' ? params.rank : null, + }); const jsonData = JSON.stringify(rows); const etag = generateETag(jsonData); diff --git a/src/mcpHandler.js b/src/mcpHandler.js index 02c6db4..4785d53 100644 --- a/src/mcpHandler.js +++ b/src/mcpHandler.js @@ -9,6 +9,7 @@ import { queryRanks, queryGeos, queryVersions, + queryCWVDistribution, } from './utils/reportService.js'; const createMcpServer = () => { @@ -140,6 +141,21 @@ const createMcpServer = () => { } ); + server.tool( + 'get_cwv_distribution', + 'Get Core Web Vitals metric distribution histograms for websites using specific web technologies. Returns per-bucket origin counts for LCP, INP, CLS, FCP, and TTFB, optionally filtered by geography and rank.', + { + technology: z.string().describe('Comma-separated technology names (e.g. "WordPress" or "Wix,WordPress")'), + date: z.string().describe('Crawl date in YYYY-MM-DD format (e.g. "2026-02-01")'), + geo: z.string().optional().describe('Geographic filter — a country name (e.g. "United States of America") or "ALL" for global data. Defaults to "ALL"'), + rank: z.string().optional().describe('Numeric rank ceiling (e.g. "10000"). Omit or set to "ALL" for all ranks'), + }, + async ({ technology, date, geo, rank }) => { + const data = await queryCWVDistribution({ technology, date, geo: geo || 'ALL', rank: rank && rank !== 'ALL' ? rank : null }); + return { content: [{ type: 'text', text: JSON.stringify(data) }] }; + } + ); + server.tool( 'list_ranks', 'List available traffic rank segments for filtering Tech Report data (e.g. "top 1000", "top 10000", "top 100000", "ALL").', diff --git a/src/utils/reportService.js b/src/utils/reportService.js index cec06d1..fc29a98 100644 --- a/src/utils/reportService.js +++ b/src/utils/reportService.js @@ -1,4 +1,5 @@ -import { firestore, firestoreOld } from './db.js'; +import { firestore, firestoreOld, bigquery } from './db.js'; +import { convertToArray } from './helpers.js'; import { getLatestDate, validateArrayParameter, @@ -154,6 +155,118 @@ export const queryReport = async (reportType, params = {}) => { return data; }; +export const queryCWVDistribution = async ({ technology, date, geo = 'ALL', rank = null }) => { + const technologies = convertToArray(technology); + const rankParam = (rank !== null && rank !== 'ALL') ? parseInt(rank, 10) : null; + const rankClause = rankParam !== null ? 'AND rank <= @rank' : ''; + + const query = `WITH pages AS ( + SELECT + client, + t.technology AS technology, + root_page + FROM + httparchive.crawl.pages, + UNNEST(technologies) AS t + WHERE + date = @date AND + t.technology IN UNNEST(@technologies) + ${rankClause} + ), metrics AS ( + SELECT + 'ALL' AS geo, + client, + technology, + root_page, + ANY_VALUE(p75_lcp) AS lcp, + ANY_VALUE(p75_inp) AS inp, + ANY_VALUE(p75_cls) AS cls, + ANY_VALUE(p75_fcp) AS fcp, + ANY_VALUE(p75_ttfb) AS ttfb + FROM pages AS p, + \`chrome-ux-report.materialized.device_summary\` d + WHERE + d.date = @date AND + root_page = origin || '/' AND + IF(device = 'desktop', 'desktop', 'mobile') = client AND + @geo = 'ALL' + GROUP BY + client, + technology, + root_page + + UNION ALL + + SELECT + \`chrome-ux-report\`.experimental.GET_COUNTRY(country_code) AS geo, + client, + technology, + root_page, + ANY_VALUE(p75_lcp) AS lcp, + ANY_VALUE(p75_inp) AS inp, + ANY_VALUE(p75_cls) AS cls, + ANY_VALUE(p75_fcp) AS fcp, + ANY_VALUE(p75_ttfb) AS ttfb + FROM pages AS p, + \`chrome-ux-report.materialized.country_summary\` c + WHERE + yyyymm = CAST(FORMAT_DATE('%Y%m', @date) AS INT64) AND + root_page = origin || '/' AND + IF(device = 'desktop', 'desktop', 'mobile') = client AND + \`chrome-ux-report\`.experimental.GET_COUNTRY(country_code) = @geo + GROUP BY + geo, + client, + technology, + root_page +) + +SELECT + geo, + client, + technology, + bucket AS loading_bucket, + bucket / 4 AS inp_bucket, + bucket / 2000 AS cls_bucket, + COUNT(DISTINCT root_page WHERE lcp = bucket) AS lcp_origins, + COUNT(DISTINCT root_page WHERE inp = bucket / 4) AS inp_origins, + COUNT(DISTINCT root_page WHERE cls = bucket / 2000) AS cls_origins, + COUNT(DISTINCT root_page WHERE fcp = bucket) AS fcp_origins, + COUNT(DISTINCT root_page WHERE ttfb = bucket) AS ttfb_origins +FROM + metrics, + UNNEST(GENERATE_ARRAY(0.0, 10000.0, 100.0)) AS bucket +GROUP BY + geo, + client, + technology, + bucket +ORDER BY + geo, + client, + technology, + bucket`; + + const [rows] = await bigquery.query({ + query, + params: { + technologies, + date, + geo, + ...(rankParam !== null && { rank: rankParam }), + }, + types: { + technologies: ['STRING'], + date: 'STRING', + geo: 'STRING', + ...(rankParam !== null && { rank: 'INT64' }), + }, + labels: { source: 'cwv-distribution' }, + }); + + return rows; +}; + export const queryRanks = async () => { const snapshot = await firestore .collection('ranks') From 03e052eafcdeafeda8f1cc60bedeee3ce67c6225 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Mon, 13 Apr 2026 14:03:30 +0200 Subject: [PATCH 13/14] feat: update CWV distribution query to handle 'ALL' technology case and refactor related logic --- src/package.json | 4 ++-- src/utils/reportService.js | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/package.json b/src/package.json index 8fb5dd8..82a2529 100644 --- a/src/package.json +++ b/src/package.json @@ -8,11 +8,11 @@ "node": ">=22.0.0" }, "scripts": { - "start": "DATABASE=tech-report-api-prod functions-framework --target=app", + "function": "DATABASE=tech-report-api-prod functions-framework --target=app", "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js", "test:live": "bash ../test-api.sh", "build": "docker build -t report-api .", - "run": "docker run -p 8080:8080 report-api" + "docker": "docker run -p 8080:8080 report-api" }, "dependencies": { "@google-cloud/bigquery": "^7.9.1", diff --git a/src/utils/reportService.js b/src/utils/reportService.js index fc29a98..d7960a8 100644 --- a/src/utils/reportService.js +++ b/src/utils/reportService.js @@ -156,7 +156,9 @@ export const queryReport = async (reportType, params = {}) => { }; export const queryCWVDistribution = async ({ technology, date, geo = 'ALL', rank = null }) => { - const technologies = convertToArray(technology); + const allTechnologies = !technology || technology === 'ALL'; + const technologies = allTechnologies ? [] : convertToArray(technology); + const techClause = allTechnologies ? '' : 'AND t.technology IN UNNEST(@technologies)'; const rankParam = (rank !== null && rank !== 'ALL') ? parseInt(rank, 10) : null; const rankClause = rankParam !== null ? 'AND rank <= @rank' : ''; @@ -169,8 +171,8 @@ export const queryCWVDistribution = async ({ technology, date, geo = 'ALL', rank httparchive.crawl.pages, UNNEST(technologies) AS t WHERE - date = @date AND - t.technology IN UNNEST(@technologies) + date = @date + ${techClause} ${rankClause} ), metrics AS ( SELECT @@ -250,13 +252,13 @@ ORDER BY const [rows] = await bigquery.query({ query, params: { - technologies, + ...(!allTechnologies && { technologies }), date, geo, ...(rankParam !== null && { rank: rankParam }), }, types: { - technologies: ['STRING'], + ...(!allTechnologies && { technologies: ['STRING'] }), date: 'STRING', geo: 'STRING', ...(rankParam !== null && { rank: 'INT64' }), From 030133917f333b631db57c87d9a464c443c2bc04 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Wed, 15 Apr 2026 17:41:42 +0200 Subject: [PATCH 14/14] feat: configure ingress settings and increase Cloud Run service resource limits and concurrency Signed-off-by: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> --- terraform/main.tf | 2 ++ terraform/run-service/variables.tf | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/terraform/main.tf b/terraform/main.tf index 81afba9..f894709 100644 --- a/terraform/main.tf +++ b/terraform/main.tf @@ -28,6 +28,8 @@ module "endpoints" { service_name = "report-api" region = var.region min_instances = var.environment == "prod" ? 1 : 0 + ingress_settings = var.environment == "prod" ? "INGRESS_TRAFFIC_INTERNAL_LOAD_BALANCER" : "INGRESS_TRAFFIC_ALL" + environment_variables = { "PROJECT" = var.project "DATABASE" = "${var.project_database}prod" // TODO: Update this to use ${var.environment} diff --git a/terraform/run-service/variables.tf b/terraform/run-service/variables.tf index 228ed22..997b132 100644 --- a/terraform/run-service/variables.tf +++ b/terraform/run-service/variables.tf @@ -29,7 +29,7 @@ variable "available_memory" { description = "The amount of memory for the Cloud Function" } variable "available_cpu" { - default = "1" + default = "2" type = string description = "The amount of CPU for the Cloud Function" } @@ -39,7 +39,7 @@ variable "ingress_settings" { description = "String value that controls what traffic can reach the function. Check ingress documentation to see the impact of each settings value. Changes to this field will recreate the cloud function." } variable "timeout" { - default = "60s" + default = "120s" type = string description = "Timeout for the service. Default value is 60 seconds. Cannot be more than 540 seconds." } @@ -56,7 +56,7 @@ variable "min_instances" { variable "max_instance_request_concurrency" { description = "(Optional) The limit on the maximum number of requests that an instance can handle simultaneously. This can be used to control costs when scaling. Defaults to 1." type = number - default = 80 + default = 100 } variable "environment_variables" { description = "environment_variables"