From 724a1264e22763c24c145d4dcd8805563ba0fa1b Mon Sep 17 00:00:00 2001 From: Alon Kochba Date: Mon, 23 Mar 2026 10:43:29 +0200 Subject: [PATCH 1/6] feat: add /v1/geo-breakdown endpoint for geographic CWV breakdown (#94) * feat: add /v1/geo-breakdown endpoint for geographic CWV breakdown Adds a new controller and route that returns core_web_vitals data for all geographies for a given technology. Unlike /cwv, this endpoint omits the geo filter so callers can build a geographic breakdown chart without issuing one request per country. * refactor: merge geo-breakdown into reportController factory Add crossGeo option to createReportController; delete standalone geoBreakdownController.js. Endpoint now returns a single-month snapshot (latest by default, or the month specified by the end param). --------- Co-authored-by: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> --- src/controllers/reportController.js | 42 +++++++++++++++++------------ src/index.js | 7 +++++ 2 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/controllers/reportController.js b/src/controllers/reportController.js index 3fd4b53..248cd44 100644 --- a/src/controllers/reportController.js +++ b/src/controllers/reportController.js @@ -38,9 +38,11 @@ const REPORT_CONFIGS = { /** * Generic report data controller factory - * Creates controllers for adoption, pageWeight, lighthouse, and cwv data + * Creates controllers for adoption, pageWeight, lighthouse, and cwv data. + * Pass { crossGeo: true } to get a cross-geography snapshot (omits geo filter, + * includes geo in projection, returns a single month of data). */ -const createReportController = (reportType) => { +const createReportController = (reportType, { crossGeo = false } = {}) => { const config = REPORT_CONFIGS[reportType]; if (!config) { throw new Error(`Unknown report type: ${reportType}`); @@ -79,20 +81,10 @@ const createReportController = (reportType) => { // Validate and process technology array const techArray = validateArrayParameter(technologyParam, 'technology'); - // Handle 'latest' date substitution - let startDate = params.start; - if (startDate === 'latest') { - startDate = await getLatestDate(firestore, config.table); - } - // Build Firestore query let query = firestore.collection(config.table); - // Apply required filters - query = query.where('geo', '==', geoParam); query = query.where('rank', '==', rankParam); - - // Apply technology filter with batch processing query = query.where('technology', 'in', techArray); // Apply version filter with special handling for 'ALL' case @@ -102,12 +94,27 @@ const createReportController = (reportType) => { //query = query.where('version', '==', 'ALL'); } - // Apply date filters - if (startDate) query = query.where('date', '>=', startDate); - if (params.end) query = query.where('date', '<=', params.end); + if (crossGeo) { + // Cross-geo: single-month snapshot, all geographies included. + // Use 'end' param if provided, otherwise default to latest available date. + const snapshotDate = params.end || await getLatestDate(firestore, config.table); + query = query.where('date', '==', snapshotDate); + query = query.select('date', 'technology', 'geo', config.dataField); + } else { + // Normal time-series: filter by geo, apply date range, no geo in projection. + query = query.where('geo', '==', geoParam); - // Apply field projection to optimize query - query = query.select('date', 'technology', config.dataField); + // Handle 'latest' date substitution + let startDate = params.start; + if (startDate === 'latest') { + startDate = await getLatestDate(firestore, config.table); + } + + if (startDate) query = query.where('date', '>=', startDate); + if (params.end) query = query.where('date', '<=', params.end); + + query = query.select('date', 'technology', config.dataField); + } // Execute query const snapshot = await query.get(); @@ -132,5 +139,6 @@ export const listAdoptionData = createReportController('adoption'); export const listCWVTechData = createReportController('cwv'); export const listLighthouseData = createReportController('lighthouse'); export const listPageWeightData = createReportController('pageWeight'); +export const listGeoBreakdownData = createReportController('cwv', { crossGeo: true }); diff --git a/src/index.js b/src/index.js index a5831b9..cfe2b32 100644 --- a/src/index.js +++ b/src/index.js @@ -13,6 +13,7 @@ const controllers = { ranks: null, geos: null, versions: null, + geoBreakdown: null, static: null }; @@ -42,6 +43,9 @@ const getController = async (name) => { case 'versions': controllers[name] = await import('./controllers/versionsController.js'); break; + case 'geoBreakdown': + controllers[name] = await import('./controllers/reportController.js'); + break; case 'static': controllers[name] = await import('./controllers/cdnController.js'); break; @@ -140,6 +144,9 @@ const handleRequest = async (req, res) => { } else if (pathname === '/v1/versions' && req.method === 'GET') { const { listVersions } = await getController('versions'); await listVersions(req, res); + } else if (pathname === '/v1/geo-breakdown' && req.method === 'GET') { + const { listGeoBreakdownData } = await getController('geoBreakdown'); + await listGeoBreakdownData(req, res); } else if (pathname.startsWith('/v1/static/') && req.method === 'GET') { // GCS proxy endpoint for reports files const filePath = decodeURIComponent(pathname.replace('/v1/static/', '')); From 6351949e74670945fbdda1dd6cfd8353044ed2a9 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Mon, 23 Mar 2026 23:17:15 +0100 Subject: [PATCH 2/6] test: add tests for /v1/geo-breakdown --- src/tests/routes.test.js | 43 ++++++++++++++++++++++++++++++++++++++++ test-api.sh | 23 +++++++++++++++++++-- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/src/tests/routes.test.js b/src/tests/routes.test.js index 6a627f6..8a93593 100644 --- a/src/tests/routes.test.js +++ b/src/tests/routes.test.js @@ -381,6 +381,49 @@ describe('API Routes', () => { }); }); + describe('GET /v1/geo-breakdown', () => { + it('should return geo breakdown data with default parameters', async () => { + const res = await request(app).get('/v1/geo-breakdown'); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should return geo breakdown data for a specific technology', async () => { + const res = await request(app).get('/v1/geo-breakdown?technology=WordPress'); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should accept an end date parameter', async () => { + const res = await request(app).get('/v1/geo-breakdown?technology=WordPress&end=2024-01-01'); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should accept a rank parameter', async () => { + const res = await request(app).get('/v1/geo-breakdown?technology=WordPress&rank=Top%201M'); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should handle empty technology parameter (defaults to ALL)', async () => { + const res = await request(app).get('/v1/geo-breakdown?technology='); + expect(res.statusCode).toEqual(200); + expect(Array.isArray(res.body)).toBe(true); + }); + + it('should handle CORS preflight requests', async () => { + const res = await request(app) + .options('/v1/geo-breakdown') + .set('Origin', 'http://example.com') + .set('Access-Control-Request-Method', 'GET') + .set('Access-Control-Request-Headers', 'Content-Type'); + + expect(res.statusCode).toEqual(204); + expect(res.headers['access-control-allow-origin']).toEqual('*'); + }); + }); + describe('Error Handling', () => { it('should return 404 for unknown endpoints', async () => { const res = await request(app).get('/v1/unknown-endpoint'); diff --git a/test-api.sh b/test-api.sh index ead560f..7809094 100755 --- a/test-api.sh +++ b/test-api.sh @@ -34,7 +34,7 @@ test_filter() { echo "Testing filter: ${description}" echo "URL: ${url}" - + response=$(curl -s -w "\n%{http_code}" "${url}") http_code=$(echo "$response" | tail -n1) body=$(echo "$response" | sed '$d') @@ -48,7 +48,7 @@ test_filter() { # Run the verification check using jq # The check should return "true" if it passes check_result=$(echo "$body" | jq "${filter_check}") - + if [[ "$check_result" != "true" ]]; then echo "Error: Filter verification failed for ${description}" echo "Verification expression: ${filter_check}" @@ -176,4 +176,23 @@ test_filter "/v1/categories" "" \ "length > 0" \ "Categories list is not empty" +# Test geo-breakdown endpoint +test_cors_preflight "/v1/geo-breakdown" +test_endpoint "/v1/geo-breakdown" "" +test_endpoint "/v1/geo-breakdown" "?technology=WordPress" +test_endpoint "/v1/geo-breakdown" "?technology=WordPress&rank=Top%201M" + +# Test geo-breakdown filter correspondences +test_filter "/v1/geo-breakdown" "" \ + "all(.[]; .technology == \"ALL\") and length > 0" \ + "Geo breakdown defaults (technology=ALL)" + +test_filter "/v1/geo-breakdown" "?technology=WordPress" \ + "all(.[]; .technology == \"WordPress\") and length > 0" \ + "Geo breakdown specific technology (WordPress)" + +test_filter "/v1/geo-breakdown" "?technology=WordPress" \ + "all(.[]; has(\"geo\")) and length > 0" \ + "Geo breakdown response includes geo field" + echo "API tests complete! All endpoints returned 200 and data corresponds to filters." From 1789b9e802f3aa067cb636ced421fdbf73d37557 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 24 Mar 2026 16:07:49 +0100 Subject: [PATCH 3/6] fix: update CDN cache duration in setCommonHeaders function --- src/index.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/index.js b/src/index.js index cfe2b32..fc3c7f8 100644 --- a/src/index.js +++ b/src/index.js @@ -67,8 +67,8 @@ const setCORSHeaders = (res) => { const setCommonHeaders = (res) => { setCORSHeaders(res); res.setHeader('Content-Type', 'application/json'); - // Browser cache: 1 hour, CDN cache: 30 days - res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=2592000'); + // Browser cache: 1 hour, CDN cache: 1 day + res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=86400'); res.setHeader('Cloud-CDN-Cache-Tag', 'report-api'); res.setHeader('Timing-Allow-Origin', '*'); }; From 3b751a40ce94636bb4135cadbc18eeb52989ceaa Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 24 Mar 2026 16:32:31 +0100 Subject: [PATCH 4/6] fix: update CDN cache tag and duration in response headers --- src/controllers/cdnController.js | 6 +++--- src/index.js | 2 +- src/tests/headers.test.js | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/controllers/cdnController.js b/src/controllers/cdnController.js index d34b9ed..b44576b 100644 --- a/src/controllers/cdnController.js +++ b/src/controllers/cdnController.js @@ -70,9 +70,9 @@ export const proxyReportsFile = async (req, res, filePath) => { // Set response headers res.setHeader('Content-Type', contentType); res.setHeader('Cross-Origin-Resource-Policy', 'cross-origin'); - res.setHeader('Cloud-CDN-Cache-Tag', 'bucket-proxy'); - // Browser cache: 1 hour, CDN cache: 30 days - res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=2592000'); + res.setHeader('Cache-Tag', 'bucket-proxy'); + // Browser cache: 1 hour, CDN cache: 1 days + res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=86400'); if (metadata.etag) { res.setHeader('ETag', metadata.etag); diff --git a/src/index.js b/src/index.js index fc3c7f8..91c61f1 100644 --- a/src/index.js +++ b/src/index.js @@ -69,7 +69,7 @@ const setCommonHeaders = (res) => { res.setHeader('Content-Type', 'application/json'); // Browser cache: 1 hour, CDN cache: 1 day res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=86400'); - res.setHeader('Cloud-CDN-Cache-Tag', 'report-api'); + res.setHeader('Cache-Tag', 'report-api'); res.setHeader('Timing-Allow-Origin', '*'); }; diff --git a/src/tests/headers.test.js b/src/tests/headers.test.js index d930d75..6673387 100644 --- a/src/tests/headers.test.js +++ b/src/tests/headers.test.js @@ -12,8 +12,8 @@ jest.unstable_mockModule('../controllers/cdnController.js', () => ({ proxyReportsFile: jest.fn((req, res) => { res.setHeader('Content-Type', 'application/json'); res.setHeader('Cross-Origin-Resource-Policy', 'cross-origin'); - res.setHeader('Cloud-CDN-Cache-Tag', 'bucket-proxy'); - res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=2592000'); + res.setHeader('Cache-Tag', 'bucket-proxy'); + res.setHeader('Cache-Control', 'public, max-age=3600, s-maxage=86400'); res.statusCode = 200; res.end(JSON.stringify({ mocked: true })); }) @@ -34,8 +34,8 @@ describe('CDN Headers', () => { const res = await request(app).get('/v1/technologies'); expect(res.statusCode).toEqual(200); - expect(res.headers['cache-control']).toBe('public, max-age=3600, s-maxage=2592000'); - expect(res.headers['cloud-cdn-cache-tag']).toBe('report-api'); + expect(res.headers['cache-control']).toBe('public, max-age=3600, s-maxage=86400'); + expect(res.headers['cache-tag']).toBe('report-api'); expect(res.headers['access-control-allow-origin']).toBe('*'); expect(res.headers['access-control-allow-headers']).toContain('Content-Type'); expect(res.headers['access-control-allow-headers']).toContain('If-None-Match'); @@ -46,8 +46,8 @@ describe('CDN Headers', () => { const res = await request(app).get('/v1/static/test.json'); expect(res.statusCode).toEqual(200); - expect(res.headers['cache-control']).toBe('public, max-age=3600, s-maxage=2592000'); - expect(res.headers['cloud-cdn-cache-tag']).toBe('bucket-proxy'); + expect(res.headers['cache-control']).toBe('public, max-age=3600, s-maxage=86400'); + expect(res.headers['cache-tag']).toBe('bucket-proxy'); expect(res.headers['cross-origin-resource-policy']).toBe('cross-origin'); }); @@ -55,7 +55,7 @@ describe('CDN Headers', () => { const res = await request(app).get('/'); expect(res.statusCode).toEqual(200); - expect(res.headers['cache-control']).toBe('public, max-age=3600, s-maxage=2592000'); - expect(res.headers['cloud-cdn-cache-tag']).toBe('report-api'); + expect(res.headers['cache-control']).toBe('public, max-age=3600, s-maxage=86400'); + expect(res.headers['cache-tag']).toBe('report-api'); }); }); From 0f7fe75dc3b0f401af1cd4b5bea361678b3324b1 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 24 Mar 2026 16:34:36 +0100 Subject: [PATCH 5/6] feat: add ETag support for caching in report responses --- src/controllers/reportController.js | 16 ++++++++++--- src/index.js | 23 +------------------ src/utils/controllerHelpers.js | 35 ++++++++++++++++++++++++++--- 3 files changed, 46 insertions(+), 28 deletions(-) diff --git a/src/controllers/reportController.js b/src/controllers/reportController.js index 248cd44..71ea30a 100644 --- a/src/controllers/reportController.js +++ b/src/controllers/reportController.js @@ -7,7 +7,9 @@ import { sendValidationError, getLatestDate, handleControllerError, - validateArrayParameter + validateArrayParameter, + generateETag, + isModified } from '../utils/controllerHelpers.js'; /** @@ -123,9 +125,17 @@ const createReportController = (reportType, { crossGeo = false } = {}) => { data.push(doc.data()); }); - // Send response + // Send response with ETag support + const jsonData = JSON.stringify(data); + const etag = generateETag(jsonData); + res.setHeader('ETag', `"${etag}"`); + if (!isModified(req, etag)) { + res.statusCode = 304; + res.end(); + return; + } res.statusCode = 200; - res.end(JSON.stringify(data)); + res.end(jsonData); } catch (error) { handleControllerError(res, error, `fetching ${reportType} data`); diff --git a/src/index.js b/src/index.js index 91c61f1..ec23e22 100644 --- a/src/index.js +++ b/src/index.js @@ -1,5 +1,5 @@ -import crypto from 'crypto'; import functions from '@google-cloud/functions-framework'; +import { sendJSONResponse, isModified } from './utils/controllerHelpers.js'; // Dynamic imports for better performance - only load when needed const controllers = { @@ -73,27 +73,6 @@ const setCommonHeaders = (res) => { res.setHeader('Timing-Allow-Origin', '*'); }; -// Helper function to generate ETag -const generateETag = (jsonData) => { - return crypto.createHash('md5').update(jsonData).digest('hex'); -}; - -// Helper function to send JSON response with ETag support -const sendJSONResponse = (res, data, statusCode = 200) => { - const jsonData = JSON.stringify(data); - const etag = generateETag(jsonData); - - res.setHeader('ETag', `"${etag}"`); - res.statusCode = statusCode; - res.end(jsonData); -}; - -// Helper function to check if resource is modified -const isModified = (req, etag) => { - const ifNoneMatch = req.headers['if-none-match'] || (req.get && req.get('if-none-match')); - return !ifNoneMatch || ifNoneMatch !== `"${etag}"`; -}; - // Route handler function const handleRequest = async (req, res) => { try { diff --git a/src/utils/controllerHelpers.js b/src/utils/controllerHelpers.js index 03d2372..58583d6 100644 --- a/src/utils/controllerHelpers.js +++ b/src/utils/controllerHelpers.js @@ -1,3 +1,4 @@ +import crypto from 'crypto'; import { convertToArray } from './helpers.js'; /** @@ -99,6 +100,23 @@ const handleControllerError = (res, error, operation) => { })); }; +const generateETag = (jsonData) => { + return crypto.createHash('md5').update(jsonData).digest('hex'); +}; + +const sendJSONResponse = (res, data, statusCode = 200) => { + const jsonData = JSON.stringify(data); + const etag = generateETag(jsonData); + res.setHeader('ETag', `"${etag}"`); + res.statusCode = statusCode; + res.end(jsonData); +}; + +const isModified = (req, etag) => { + const ifNoneMatch = req.headers['if-none-match'] || (req.get && req.get('if-none-match')); + return !ifNoneMatch || ifNoneMatch !== `"${etag}"`; +}; + /** * Generic query executor * Handles query execution and response for simple queries @@ -126,9 +144,17 @@ const executeQuery = async (req, res, collection, queryBuilder, dataProcessor = data = dataProcessor(data, params); } - // Send response + // Send response with ETag support + const jsonData = JSON.stringify(data); + const etag = generateETag(jsonData); + res.setHeader('ETag', `"${etag}"`); + if (!isModified(req, etag)) { + res.statusCode = 304; + res.end(); + return; + } res.statusCode = 200; - res.end(JSON.stringify(data)); + res.end(jsonData); } catch (error) { // Handle validation errors specifically @@ -170,5 +196,8 @@ export { validateArrayParameter, handleControllerError, executeQuery, - validateTechnologyArray + validateTechnologyArray, + generateETag, + sendJSONResponse, + isModified }; From e210b141dcca0f56eb59196c80009f0fdda49bf6 Mon Sep 17 00:00:00 2001 From: Max Ostapenko <1611259+max-ostapenko@users.noreply.github.com> Date: Tue, 24 Mar 2026 16:36:35 +0100 Subject: [PATCH 6/6] test: add ETag header tests for /v1/technologies and /v1/adoption routes --- src/tests/routes.test.js | 44 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/tests/routes.test.js b/src/tests/routes.test.js index 8a93593..5c065e1 100644 --- a/src/tests/routes.test.js +++ b/src/tests/routes.test.js @@ -455,6 +455,50 @@ describe('API Routes', () => { expect(res.headers).toHaveProperty('etag'); }); + it('should include ETag headers on executeQuery-based routes', async () => { + const res = await request(app).get('/v1/technologies'); + expect(res.statusCode).toEqual(200); + expect(res.headers).toHaveProperty('etag'); + expect(res.headers['etag']).toMatch(/^"[a-f0-9]+"$/); + }); + + it('should include ETag headers on reportController-based routes', async () => { + const res = await request(app).get('/v1/adoption'); + expect(res.statusCode).toEqual(200); + expect(res.headers).toHaveProperty('etag'); + expect(res.headers['etag']).toMatch(/^"[a-f0-9]+"$/); + }); + + it('should return 304 for executeQuery-based routes when ETag matches', async () => { + const first = await request(app).get('/v1/technologies'); + expect(first.statusCode).toEqual(200); + const etag = first.headers['etag']; + + const second = await request(app) + .get('/v1/technologies') + .set('If-None-Match', etag); + expect(second.statusCode).toEqual(304); + }); + + it('should return 304 for reportController-based routes when ETag matches', async () => { + const first = await request(app).get('/v1/adoption'); + expect(first.statusCode).toEqual(200); + const etag = first.headers['etag']; + + const second = await request(app) + .get('/v1/adoption') + .set('If-None-Match', etag); + expect(second.statusCode).toEqual(304); + }); + + it('should return 200 when If-None-Match does not match', async () => { + const res = await request(app) + .get('/v1/technologies') + .set('If-None-Match', '"stale-etag"'); + expect(res.statusCode).toEqual(200); + expect(res.headers).toHaveProperty('etag'); + }); + it('should include timing headers', async () => { const res = await request(app).get('/v1/technologies'); expect(res.headers['timing-allow-origin']).toEqual('*');