diff --git a/docs/norgolith.toml b/docs/norgolith.toml
index dc7d6d6..e4c33e0 100644
--- a/docs/norgolith.toml
+++ b/docs/norgolith.toml
@@ -15,6 +15,17 @@ description = "Latest posts"
ttl = 60
image = "/assets/norgolith.svg"
+# SEO: sitemap.xml, OpenGraph meta tags
+[seo]
+sitemap = true
+open_graph = true
+default_image = "/assets/norgolith.svg"
+
+# Robots.txt: controls crawler access
+[robots]
+enable = true
+preset = "allow_all" # options: "allow_all", "no_llms", "block_all"
+
[extra]
license = "GPLv2"
footer_author_link = "https://github.com/NTBBloodbath"
diff --git a/docs/theme/templates/base.html b/docs/theme/templates/base.html
index a7757a7..2c1c1e3 100644
--- a/docs/theme/templates/base.html
+++ b/docs/theme/templates/base.html
@@ -123,6 +123,36 @@
{% endif %}
+ {# Canonical URL #}
+ {% if metadata.permalink %}
+
+ {% endif %}
+
+ {# OpenGraph + Twitter Cards #}
+ {% if config.seo is defined and config.seo.open_graph %}
+
+ {% if metadata.description and not metadata.description == "nil" %}
+
+ {% endif %}
+ {% if metadata.permalink %}
+
+ {% endif %}
+
+
+ {% if metadata.image %}
+
+
+ {% elif config.seo is defined and config.seo.default_image %}
+
+
+ {% endif %}
+
+
+ {% if metadata.description and not metadata.description == "nil" %}
+
+ {% endif %}
+ {% endif %}
+
{% block title %}{% endblock title %} - {{ config.title | title }}
{% endblock head %}
diff --git a/scripts/update-robots-presets.sh b/scripts/update-robots-presets.sh
new file mode 100755
index 0000000..7aaa243
--- /dev/null
+++ b/scripts/update-robots-presets.sh
@@ -0,0 +1,50 @@
+#!/usr/bin/env bash
+#
+# Fetches the latest ai.robots.txt list from GitHub and updates the
+# ROBOTS_NO_LLMS const in src/cmd/seo.rs.
+#
+# Usage: ./scripts/update-robots-presets.sh
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+REPO_ROOT="$(dirname "$SCRIPT_DIR")"
+SEO_FILE="$REPO_ROOT/src/cmd/seo.rs"
+UPSTREAM_URL="https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/main/robots.txt"
+
+echo "Fetching ai.robots.txt from GitHub..."
+ROBOTS_RAW=$(curl -fsSL "$UPSTREAM_URL")
+
+# Extract only User-agent and Disallow lines
+ROBOTS_FILTERED=$(echo "$ROBOTS_RAW" | grep -E '^User-agent:|^Disallow:')
+
+# Find line numbers for the const block
+START_LINE=$(grep -n '^const ROBOTS_NO_LLMS: &str = r"' "$SEO_FILE" | head -1 | cut -d: -f1)
+# Find the closing line (contains just ");
+END_LINE=$(awk "NR>=$START_LINE && /\";\$/{print NR; exit}" "$SEO_FILE")
+
+if [ -z "$START_LINE" ] || [ -z "$END_LINE" ]; then
+ echo "Error: Could not find ROBOTS_NO_LLMS const in $SEO_FILE"
+ exit 1
+fi
+
+# Build new file
+TEMP_FILE=$(mktemp)
+
+# Lines before the const (1 to START_LINE-1)
+if [ "$START_LINE" -gt 1 ]; then
+ head -n $((START_LINE - 1)) "$SEO_FILE" > "$TEMP_FILE"
+fi
+
+# The new const
+echo "const ROBOTS_NO_LLMS: &str = r\"$ROBOTS_FILTERED\";" >> "$TEMP_FILE"
+
+# Lines after the const (END_LINE+1 to end)
+TOTAL_LINES=$(wc -l < "$SEO_FILE")
+if [ "$END_LINE" -lt "$TOTAL_LINES" ]; then
+ tail -n +$((END_LINE + 1)) "$SEO_FILE" >> "$TEMP_FILE"
+fi
+
+mv "$TEMP_FILE" "$SEO_FILE"
+
+echo "Done. Updated ROBOTS_NO_LLMS const in $SEO_FILE (lines $START_LINE-$END_LINE)"
diff --git a/src/cmd/build.rs b/src/cmd/build.rs
index d588147..31c96e6 100644
--- a/src/cmd/build.rs
+++ b/src/cmd/build.rs
@@ -18,6 +18,7 @@ fn href_root_re() -> &'static regex::Regex {
}
use crate::{cache::BuildCache, config, fs, shared};
+use super::seo;
/// Represents the directory structure of a Norgolith site.
///
@@ -138,16 +139,17 @@ fn generate_xml_feeds(
tera: &Tera,
shared_context: &Context,
public_dir: &Path,
-) -> Result {
+) -> Result<(usize, Vec)> {
let xml_templates = collect_xml_templates(tera);
let count = xml_templates.len();
if count == 0 {
- return Ok(0);
+ return Ok((0, Vec::new()));
}
let mut context = shared_context.clone();
context.insert("now", &chrono::Utc::now());
+ let mut feed_names = Vec::with_capacity(count);
for template_name in &xml_templates {
let rendered = tera
.render(template_name, &context)
@@ -173,9 +175,10 @@ fn generate_xml_feeds(
}
std::fs::write(&output_path, &rendered)
.wrap_err(format!("Failed to write '{}'", output_path.display()))?;
+ feed_names.push(template_name.clone());
}
- Ok(count)
+ Ok((count, feed_names))
}
/// Generates the final public build from intermediate build artifacts
@@ -197,7 +200,7 @@ fn build_contents(
shared_context: &Context,
cache: &mut BuildCache,
minify: bool,
-) -> Result<(usize, BuildTimings)> {
+) -> Result<(usize, Vec, BuildTimings)> {
use rayon::prelude::*;
let entries: Vec<_> = WalkDir::new(&paths.content)
@@ -231,10 +234,12 @@ fn build_contents(
// Collect results and handle errors
let mut buffered_writes = Vec::new();
+ let mut permalinks = Vec::new();
for result in results {
match result {
- Ok(Some((public_path, content, cache_entry))) => {
+ Ok(Some((public_path, content, permalink, cache_entry))) => {
buffered_writes.push((public_path, content));
+ permalinks.push(permalink);
if let Some((key, content_str, metadata)) = cache_entry {
cache.insert(&key, &content_str, metadata);
}
@@ -258,13 +263,13 @@ fn build_contents(
timings.page_write_ms = write_ms;
timings.page_count = built_count;
- Ok((built_count, timings))
+ Ok((built_count, permalinks, timings))
}
/// (cache_key, content, metadata) for cache insertion
type CacheInsert = (PathBuf, String, serde_json::Value);
/// Result of building a single content entry
-type BuildResult = Result