diff --git a/CHANGELOG.md b/CHANGELOG.md
index dbba376..f371d05 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,11 @@ All notable changes to CV Manager will be documented in this file.
Format follows [Keep a Changelog](https://keepachangelog.com/), versioning follows [Semantic Versioning](https://semver.org/).
+## [1.49.5] - 2026-05-06
+
+### Fixed
+- **Googlebot reported `Blocked by robots.txt` for public read-only API endpoints** (e.g. `/api/datasets/id/:id`, `/api/settings/language`). The public site hydrates client-side from `/api/*` JSON, so a JS-rendering crawler that can't fetch those endpoints sees the SSR shell only and skips re-render — degrading indexing. The public server's `robots.txt` previously had a blanket `Disallow: /api/`, which blocked all of it. The public server only ever exposes a curated set of GET-only, rate-limited, sensitive-field-filtered endpoints, so it's safe to expose those to crawlers. `robots.txt` now emits explicit `Allow:` rules for each public read-only API path (`/api/profile`, `/api/sections`, `/api/settings`, `/api/experiences`, `/api/certifications`, `/api/education`, `/api/skills`, `/api/projects`, `/api/timeline`, `/api/custom-sections`, `/api/layout-types`, `/api/social-platforms`, `/api/cv`, `/api/datasets/slug/`, `/api/datasets/id/`) before the trailing `Disallow: /api/`, so the longest-prefix-wins rule keeps anything not on the allow-list blocked by default. Both `robots.txt` handlers (the dual-server and PUBLIC_ONLY paths) now share a single `buildRobotsTxt(req)` helper in `src/server.js` so they can't drift, with regression tests covering both the indexable and `noindex` branches.
+
## [1.49.4] - 2026-05-06
### Added
diff --git a/package-lock.json b/package-lock.json
index 88175a4..4470560 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "cv-manager",
- "version": "1.49.4",
+ "version": "1.49.5",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "cv-manager",
- "version": "1.49.4",
+ "version": "1.49.5",
"dependencies": {
"archiver": "^7.0.1",
"better-sqlite3": "^9.4.3",
diff --git a/package.json b/package.json
index b7ded9d..57d9259 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "cv-manager",
- "version": "1.49.4",
+ "version": "1.49.5",
"description": "Professional CV Management System",
"main": "src/server.js",
"scripts": {
diff --git a/src/server.js b/src/server.js
index f130d81..f2f80df 100644
--- a/src/server.js
+++ b/src/server.js
@@ -307,6 +307,42 @@ function buildCanonicalTag(req) {
return ` `;
}
+// Public read-only API paths the public site fetches client-side. Listed here
+// (rather than just dropping `Disallow: /api/`) so that JS-rendering crawlers
+// like Googlebot can hydrate the page while any future `/api/*` route that
+// isn't on this list stays blocked by the trailing `Disallow: /api/` — most
+// specific match wins per Google's robots.txt rules. Keep in sync with the
+// `publicApp.get('/api/...')` routes below.
+const PUBLIC_API_ALLOW_PATHS = [
+ '/api/profile',
+ '/api/sections',
+ '/api/settings',
+ '/api/experiences',
+ '/api/certifications',
+ '/api/education',
+ '/api/skills',
+ '/api/projects',
+ '/api/timeline',
+ '/api/custom-sections',
+ '/api/layout-types',
+ '/api/social-platforms',
+ '/api/cv',
+ '/api/datasets/slug/',
+ '/api/datasets/id/'
+];
+
+function buildRobotsTxt(req) {
+ const protocol = req.headers['x-forwarded-proto'] || req.protocol || 'https';
+ const host = req.headers['x-forwarded-host'] || req.headers.host || 'localhost';
+ const robotsMeta = db.prepare('SELECT value FROM settings WHERE key = ?').get('robotsMeta');
+ const metaValue = robotsMeta?.value || 'index, follow';
+ if (metaValue.includes('noindex')) {
+ return `User-agent: *\nDisallow: /`;
+ }
+ const allows = PUBLIC_API_ALLOW_PATHS.map(p => `Allow: ${p}`).join('\n');
+ return `User-agent: *\nAllow: /\n${allows}\nDisallow: /api/\nSitemap: ${protocol}://${host}/sitemap.xml`;
+}
+
// Pull the current live CV into the same shape as a saved-dataset blob so
// the SSR helper has one input format to deal with.
function gatherLiveCvData() {
@@ -1978,17 +2014,8 @@ if (PUBLIC_ONLY) {
});
publicApp.get('/robots.txt', (req, res) => {
- const protocol = req.headers['x-forwarded-proto'] || req.protocol || 'https';
- const host = req.headers['x-forwarded-host'] || req.headers.host || 'localhost';
- const robotsMeta = db.prepare('SELECT value FROM settings WHERE key = ?').get('robotsMeta');
- const metaValue = robotsMeta?.value || 'index, follow';
- const isNoIndex = metaValue.includes('noindex');
res.setHeader('Content-Type', 'text/plain');
- if (isNoIndex) {
- res.send(`User-agent: *\nDisallow: /`);
- } else {
- res.send(`User-agent: *\nAllow: /\nSitemap: ${protocol}://${host}/sitemap.xml\nDisallow: /api/`);
- }
+ res.send(buildRobotsTxt(req));
});
publicApp.use('/shared', express.static(path.join(__dirname, '../public/shared')));
@@ -4400,7 +4427,7 @@ if (PUBLIC_ONLY) {
next();
});
publicApp.get('/sitemap.xml', (req, res) => { const protocol = req.headers['x-forwarded-proto'] || req.protocol || 'https'; const host = req.headers['x-forwarded-host'] || req.headers.host || 'localhost'; res.setHeader('Content-Type', 'application/xml'); res.send(`${protocol}://${host}/${new Date().toISOString().split('T')[0]}weekly1.0`); });
- publicApp.get('/robots.txt', (req, res) => { const protocol = req.headers['x-forwarded-proto'] || req.protocol || 'https'; const host = req.headers['x-forwarded-host'] || req.headers.host || 'localhost'; const robotsMeta = db.prepare('SELECT value FROM settings WHERE key = ?').get('robotsMeta'); const metaValue = robotsMeta?.value || 'index, follow'; const isNoIndex = metaValue.includes('noindex'); res.setHeader('Content-Type', 'text/plain'); if (isNoIndex) { res.send(`User-agent: *\nDisallow: /`); } else { res.send(`User-agent: *\nAllow: /\nSitemap: ${protocol}://${host}/sitemap.xml\nDisallow: /api/`); } });
+ publicApp.get('/robots.txt', (req, res) => { res.setHeader('Content-Type', 'text/plain'); res.send(buildRobotsTxt(req)); });
publicApp.use('/shared', express.static(path.join(__dirname, '../public/shared')));
// Favicon and icons (public uses icon-public.png with eye badge)
const publicIconPathB = path.join(__dirname, '../icon-public.png');
diff --git a/tests/backend.test.js b/tests/backend.test.js
index c50c97a..cf4b04c 100644
--- a/tests/backend.test.js
+++ b/tests/backend.test.js
@@ -2634,6 +2634,67 @@ describe('Backend API', () => {
});
});
+ describe('robots.txt API allow-list', () => {
+ it('does not block public read-only API paths from JS-rendering crawlers', async () => {
+ // Make sure the indexable branch is exercised.
+ await fetch(`${BASE_URL}/api/settings/robotsMeta`, {
+ method: 'PUT',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ value: 'index, follow' }),
+ });
+
+ const res = await fetch(`${PUBLIC_URL}/robots.txt`);
+ assert.strictEqual(res.status, 200);
+ const text = await res.text();
+
+ // Sanity: the rule the public site relies on for hydration must not
+ // be a bare blanket block. The Disallow may still appear as the
+ // catch-all fallback, but explicit Allow rules for the read-only
+ // endpoints must precede it (longer-prefix Allow wins for Google).
+ const requiredAllows = [
+ '/api/profile',
+ '/api/sections',
+ '/api/settings',
+ '/api/experiences',
+ '/api/certifications',
+ '/api/education',
+ '/api/skills',
+ '/api/projects',
+ '/api/timeline',
+ '/api/custom-sections',
+ '/api/cv',
+ '/api/datasets/slug/',
+ '/api/datasets/id/',
+ ];
+ for (const path of requiredAllows) {
+ assert.ok(
+ text.includes(`Allow: ${path}`),
+ `robots.txt is missing Allow rule for ${path}; full body:\n${text}`,
+ );
+ }
+ });
+
+ it('still emits a single global Disallow when robotsMeta is noindex', async () => {
+ await fetch(`${BASE_URL}/api/settings/robotsMeta`, {
+ method: 'PUT',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ value: 'noindex, nofollow' }),
+ });
+
+ const res = await fetch(`${PUBLIC_URL}/robots.txt`);
+ assert.strictEqual(res.status, 200);
+ const text = await res.text();
+ assert.match(text, /^User-agent: \*\nDisallow: \/$/);
+
+ // Restore default so subsequent tests see the indexable branch.
+ await fetch(`${BASE_URL}/api/settings/robotsMeta`, {
+ method: 'PUT',
+ headers: { 'Content-Type': 'application/json' },
+ body: JSON.stringify({ value: 'index, follow' }),
+ });
+ });
+ });
+
describe('Canonical link injection', () => {
it('emits canonical from request host on public root', async () => {
// Node's fetch reserves the Host header, so simulate the deployed-host
diff --git a/version.json b/version.json
index 4d635a4..f9350dc 100644
--- a/version.json
+++ b/version.json
@@ -1,4 +1,4 @@
{
- "version": "1.49.4",
+ "version": "1.49.5",
"changelog": "https://github.com/vincentmakes/cv-manager/blob/main/CHANGELOG.md"
}