Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@ All notable changes to CV Manager will be documented in this file.

Format follows [Keep a Changelog](https://keepachangelog.com/), versioning follows [Semantic Versioning](https://semver.org/).

## [1.49.5] - 2026-05-06

### Fixed
- **Googlebot reported `Blocked by robots.txt` for public read-only API endpoints** (e.g. `/api/datasets/id/:id`, `/api/settings/language`). The public site hydrates client-side from `/api/*` JSON, so a JS-rendering crawler that can't fetch those endpoints sees the SSR shell only and skips re-render — degrading indexing. The public server's `robots.txt` previously had a blanket `Disallow: /api/`, which blocked all of it. The public server only ever exposes a curated set of GET-only, rate-limited, sensitive-field-filtered endpoints, so it's safe to expose those to crawlers. `robots.txt` now emits explicit `Allow:` rules for each public read-only API path (`/api/profile`, `/api/sections`, `/api/settings`, `/api/experiences`, `/api/certifications`, `/api/education`, `/api/skills`, `/api/projects`, `/api/timeline`, `/api/custom-sections`, `/api/layout-types`, `/api/social-platforms`, `/api/cv`, `/api/datasets/slug/`, `/api/datasets/id/`) before the trailing `Disallow: /api/`, so the longest-prefix-wins rule keeps anything not on the allow-list blocked by default. Both `robots.txt` handlers (the dual-server and PUBLIC_ONLY paths) now share a single `buildRobotsTxt(req)` helper in `src/server.js` so they can't drift, with regression tests covering both the indexable and `noindex` branches.

## [1.49.4] - 2026-05-06

### Added
Expand Down
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "cv-manager",
"version": "1.49.4",
"version": "1.49.5",
"description": "Professional CV Management System",
"main": "src/server.js",
"scripts": {
Expand Down
49 changes: 38 additions & 11 deletions src/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,42 @@ function buildCanonicalTag(req) {
return ` <link rel="canonical" href="${escapeHtmlServer(url)}">`;
}

// Public read-only API paths the public site fetches client-side. Listed here
// (rather than just dropping `Disallow: /api/`) so that JS-rendering crawlers
// like Googlebot can hydrate the page while any future `/api/*` route that
// isn't on this list stays blocked by the trailing `Disallow: /api/` — most
// specific match wins per Google's robots.txt rules. Keep in sync with the
// `publicApp.get('/api/...')` routes below.
const PUBLIC_API_ALLOW_PATHS = [
'/api/profile',
'/api/sections',
'/api/settings',
'/api/experiences',
'/api/certifications',
'/api/education',
'/api/skills',
'/api/projects',
'/api/timeline',
'/api/custom-sections',
'/api/layout-types',
'/api/social-platforms',
'/api/cv',
'/api/datasets/slug/',
'/api/datasets/id/'
];

function buildRobotsTxt(req) {
const protocol = req.headers['x-forwarded-proto'] || req.protocol || 'https';
const host = req.headers['x-forwarded-host'] || req.headers.host || 'localhost';
const robotsMeta = db.prepare('SELECT value FROM settings WHERE key = ?').get('robotsMeta');
const metaValue = robotsMeta?.value || 'index, follow';
if (metaValue.includes('noindex')) {
return `User-agent: *\nDisallow: /`;
}
const allows = PUBLIC_API_ALLOW_PATHS.map(p => `Allow: ${p}`).join('\n');
return `User-agent: *\nAllow: /\n${allows}\nDisallow: /api/\nSitemap: ${protocol}://${host}/sitemap.xml`;
}

// Pull the current live CV into the same shape as a saved-dataset blob so
// the SSR helper has one input format to deal with.
function gatherLiveCvData() {
Expand Down Expand Up @@ -1978,17 +2014,8 @@ if (PUBLIC_ONLY) {
});

publicApp.get('/robots.txt', (req, res) => {
const protocol = req.headers['x-forwarded-proto'] || req.protocol || 'https';
const host = req.headers['x-forwarded-host'] || req.headers.host || 'localhost';
const robotsMeta = db.prepare('SELECT value FROM settings WHERE key = ?').get('robotsMeta');
const metaValue = robotsMeta?.value || 'index, follow';
const isNoIndex = metaValue.includes('noindex');
res.setHeader('Content-Type', 'text/plain');
if (isNoIndex) {
res.send(`User-agent: *\nDisallow: /`);
} else {
res.send(`User-agent: *\nAllow: /\nSitemap: ${protocol}://${host}/sitemap.xml\nDisallow: /api/`);
}
res.send(buildRobotsTxt(req));
});

publicApp.use('/shared', express.static(path.join(__dirname, '../public/shared')));
Expand Down Expand Up @@ -4400,7 +4427,7 @@ if (PUBLIC_ONLY) {
next();
});
publicApp.get('/sitemap.xml', (req, res) => { const protocol = req.headers['x-forwarded-proto'] || req.protocol || 'https'; const host = req.headers['x-forwarded-host'] || req.headers.host || 'localhost'; res.setHeader('Content-Type', 'application/xml'); res.send(`<?xml version="1.0" encoding="UTF-8"?><urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><url><loc>${protocol}://${host}/</loc><lastmod>${new Date().toISOString().split('T')[0]}</lastmod><changefreq>weekly</changefreq><priority>1.0</priority></url></urlset>`); });
publicApp.get('/robots.txt', (req, res) => { const protocol = req.headers['x-forwarded-proto'] || req.protocol || 'https'; const host = req.headers['x-forwarded-host'] || req.headers.host || 'localhost'; const robotsMeta = db.prepare('SELECT value FROM settings WHERE key = ?').get('robotsMeta'); const metaValue = robotsMeta?.value || 'index, follow'; const isNoIndex = metaValue.includes('noindex'); res.setHeader('Content-Type', 'text/plain'); if (isNoIndex) { res.send(`User-agent: *\nDisallow: /`); } else { res.send(`User-agent: *\nAllow: /\nSitemap: ${protocol}://${host}/sitemap.xml\nDisallow: /api/`); } });
publicApp.get('/robots.txt', (req, res) => { res.setHeader('Content-Type', 'text/plain'); res.send(buildRobotsTxt(req)); });
publicApp.use('/shared', express.static(path.join(__dirname, '../public/shared')));
// Favicon and icons (public uses icon-public.png with eye badge)
const publicIconPathB = path.join(__dirname, '../icon-public.png');
Expand Down
61 changes: 61 additions & 0 deletions tests/backend.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2634,6 +2634,67 @@ describe('Backend API', () => {
});
});

describe('robots.txt API allow-list', () => {
it('does not block public read-only API paths from JS-rendering crawlers', async () => {
// Make sure the indexable branch is exercised.
await fetch(`${BASE_URL}/api/settings/robotsMeta`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ value: 'index, follow' }),
});

const res = await fetch(`${PUBLIC_URL}/robots.txt`);
assert.strictEqual(res.status, 200);
const text = await res.text();

// Sanity: the rule the public site relies on for hydration must not
// be a bare blanket block. The Disallow may still appear as the
// catch-all fallback, but explicit Allow rules for the read-only
// endpoints must precede it (longer-prefix Allow wins for Google).
const requiredAllows = [
'/api/profile',
'/api/sections',
'/api/settings',
'/api/experiences',
'/api/certifications',
'/api/education',
'/api/skills',
'/api/projects',
'/api/timeline',
'/api/custom-sections',
'/api/cv',
'/api/datasets/slug/',
'/api/datasets/id/',
];
for (const path of requiredAllows) {
assert.ok(
text.includes(`Allow: ${path}`),
`robots.txt is missing Allow rule for ${path}; full body:\n${text}`,
);
}
});

it('still emits a single global Disallow when robotsMeta is noindex', async () => {
await fetch(`${BASE_URL}/api/settings/robotsMeta`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ value: 'noindex, nofollow' }),
});

const res = await fetch(`${PUBLIC_URL}/robots.txt`);
assert.strictEqual(res.status, 200);
const text = await res.text();
assert.match(text, /^User-agent: \*\nDisallow: \/$/);

// Restore default so subsequent tests see the indexable branch.
await fetch(`${BASE_URL}/api/settings/robotsMeta`, {
method: 'PUT',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ value: 'index, follow' }),
});
});
});

describe('Canonical link injection', () => {
it('emits canonical from request host on public root', async () => {
// Node's fetch reserves the Host header, so simulate the deployed-host
Expand Down
2 changes: 1 addition & 1 deletion version.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"version": "1.49.4",
"version": "1.49.5",
"changelog": "https://github.com/vincentmakes/cv-manager/blob/main/CHANGELOG.md"
}
Loading