Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions docs/norgolith.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,17 @@ description = "Latest posts"
ttl = 60
image = "/assets/norgolith.svg"

# SEO: sitemap.xml, OpenGraph meta tags
[seo]
sitemap = true
open_graph = true
default_image = "/assets/norgolith.svg"

# Robots.txt: controls crawler access
[robots]
enable = true
preset = "allow_all" # options: "allow_all", "no_llms", "block_all"

[extra]
license = "GPLv2"
footer_author_link = "https://github.com/NTBBloodbath"
Expand Down
30 changes: 30 additions & 0 deletions docs/theme/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,36 @@
<link rel="icon" href="/assets/norgolith.svg" />
{% endif %}

{# Canonical URL #}
{% if metadata.permalink %}
<link rel="canonical" href="{{ metadata.permalink }}" />
{% endif %}

{# OpenGraph + Twitter Cards #}
{% if config.seo is defined and config.seo.open_graph %}
<meta property="og:title" content="{{ metadata.title | default(value=config.title) }}" />
{% if metadata.description and not metadata.description == "nil" %}
<meta property="og:description" content="{{ metadata.description }}" />
{% endif %}
{% if metadata.permalink %}
<meta property="og:url" content="{{ metadata.permalink }}" />
{% endif %}
<meta property="og:site_name" content="{{ config.title }}" />
<meta property="og:locale" content="{{ config.language | replace(from="-", to="_") }}" />
{% if metadata.image %}
<meta property="og:image" content="{{ config.rootUrl }}{{ metadata.image }}" />
<meta name="twitter:image" content="{{ config.rootUrl }}{{ metadata.image }}" />
{% elif config.seo is defined and config.seo.default_image %}
<meta property="og:image" content="{{ config.rootUrl }}{{ config.seo.default_image }}" />
<meta name="twitter:image" content="{{ config.rootUrl }}{{ config.seo.default_image }}" />
{% endif %}
<meta name="twitter:card" content="summary_large_image" />
<meta name="twitter:title" content="{{ metadata.title | default(value=config.title) }}" />
{% if metadata.description and not metadata.description == "nil" %}
<meta name="twitter:description" content="{{ metadata.description }}" />
{% endif %}
{% endif %}

<title>{% block title %}{% endblock title %} - {{ config.title | title }}</title>
{% endblock head %}
</head>
Expand Down
50 changes: 50 additions & 0 deletions scripts/update-robots-presets.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/env bash
#
# Fetches the latest ai.robots.txt list from GitHub and updates the
# ROBOTS_NO_LLMS const in src/cmd/seo.rs.
#
# Usage: ./scripts/update-robots-presets.sh

set -euo pipefail

SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(dirname "$SCRIPT_DIR")"
SEO_FILE="$REPO_ROOT/src/cmd/seo.rs"
UPSTREAM_URL="https://raw.githubusercontent.com/ai-robots-txt/ai.robots.txt/main/robots.txt"

echo "Fetching ai.robots.txt from GitHub..."
ROBOTS_RAW=$(curl -fsSL "$UPSTREAM_URL")

# Extract only User-agent and Disallow lines
ROBOTS_FILTERED=$(echo "$ROBOTS_RAW" | grep -E '^User-agent:|^Disallow:')

# Find line numbers for the const block
START_LINE=$(grep -n '^const ROBOTS_NO_LLMS: &str = r"' "$SEO_FILE" | head -1 | cut -d: -f1)
# Find the closing line (contains just ");
END_LINE=$(awk "NR>=$START_LINE && /\";\$/{print NR; exit}" "$SEO_FILE")

if [ -z "$START_LINE" ] || [ -z "$END_LINE" ]; then
echo "Error: Could not find ROBOTS_NO_LLMS const in $SEO_FILE"
exit 1
fi

# Build new file
TEMP_FILE=$(mktemp)

# Lines before the const (1 to START_LINE-1)
if [ "$START_LINE" -gt 1 ]; then
head -n $((START_LINE - 1)) "$SEO_FILE" > "$TEMP_FILE"
fi

# The new const
echo "const ROBOTS_NO_LLMS: &str = r\"$ROBOTS_FILTERED\";" >> "$TEMP_FILE"

# Lines after the const (END_LINE+1 to end)
TOTAL_LINES=$(wc -l < "$SEO_FILE")
if [ "$END_LINE" -lt "$TOTAL_LINES" ]; then
tail -n +$((END_LINE + 1)) "$SEO_FILE" >> "$TEMP_FILE"
fi

mv "$TEMP_FILE" "$SEO_FILE"

echo "Done. Updated ROBOTS_NO_LLMS const in $SEO_FILE (lines $START_LINE-$END_LINE)"
133 changes: 123 additions & 10 deletions src/cmd/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ fn href_root_re() -> &'static regex::Regex {
}

use crate::{cache::BuildCache, config, fs, shared};
use super::seo;

/// Represents the directory structure of a Norgolith site.
///
Expand Down Expand Up @@ -138,16 +139,17 @@ fn generate_xml_feeds(
tera: &Tera,
shared_context: &Context,
public_dir: &Path,
) -> Result<usize> {
) -> Result<(usize, Vec<String>)> {
let xml_templates = collect_xml_templates(tera);
let count = xml_templates.len();
if count == 0 {
return Ok(0);
return Ok((0, Vec::new()));
}

let mut context = shared_context.clone();
context.insert("now", &chrono::Utc::now());

let mut feed_names = Vec::with_capacity(count);
for template_name in &xml_templates {
let rendered = tera
.render(template_name, &context)
Expand All @@ -173,9 +175,10 @@ fn generate_xml_feeds(
}
std::fs::write(&output_path, &rendered)
.wrap_err(format!("Failed to write '{}'", output_path.display()))?;
feed_names.push(template_name.clone());
}

Ok(count)
Ok((count, feed_names))
}

/// Generates the final public build from intermediate build artifacts
Expand All @@ -197,7 +200,7 @@ fn build_contents(
shared_context: &Context,
cache: &mut BuildCache,
minify: bool,
) -> Result<(usize, BuildTimings)> {
) -> Result<(usize, Vec<String>, BuildTimings)> {
use rayon::prelude::*;

let entries: Vec<_> = WalkDir::new(&paths.content)
Expand Down Expand Up @@ -231,10 +234,12 @@ fn build_contents(

// Collect results and handle errors
let mut buffered_writes = Vec::new();
let mut permalinks = Vec::new();
for result in results {
match result {
Ok(Some((public_path, content, cache_entry))) => {
Ok(Some((public_path, content, permalink, cache_entry))) => {
buffered_writes.push((public_path, content));
permalinks.push(permalink);
if let Some((key, content_str, metadata)) = cache_entry {
cache.insert(&key, &content_str, metadata);
}
Expand All @@ -258,13 +263,13 @@ fn build_contents(
timings.page_write_ms = write_ms;
timings.page_count = built_count;

Ok((built_count, timings))
Ok((built_count, permalinks, timings))
}

/// (cache_key, content, metadata) for cache insertion
type CacheInsert = (PathBuf, String, serde_json::Value);
/// Result of building a single content entry
type BuildResult = Result<Option<(PathBuf, String, Option<CacheInsert>)>>;
type BuildResult = Result<Option<(PathBuf, String, String, Option<CacheInsert>)>>;

/// Processes a single build entry (HTML file with metadata)
///
Expand Down Expand Up @@ -359,7 +364,14 @@ fn build_content_entry(
rendered
};

Ok(Some((public_path, rendered, cache_insert)))
// Extract permalink for SEO generation
let permalink = metadata
.get("permalink")
.and_then(|v| v.as_str())
.unwrap_or("/")
.to_string();

Ok(Some((public_path, rendered, permalink, cache_insert)))
}

/// Generates category listing pages
Expand Down Expand Up @@ -693,6 +705,7 @@ struct BuildTimings {
content_ms: u128,
categories_ms: u128,
feeds_ms: u128,
seo_ms: u128,
assets_ms: u128,
cache_save_ms: u128,
// Per-page sub-timing (sums across all pages)
Expand Down Expand Up @@ -722,6 +735,7 @@ impl BuildTimings {
content_ms: 0,
categories_ms: 0,
feeds_ms: 0,
seo_ms: 0,
assets_ms: 0,
cache_save_ms: 0,
page_file_ms: 0,
Expand Down Expand Up @@ -750,6 +764,7 @@ impl BuildTimings {
.saturating_sub(self.content_ms)
.saturating_sub(self.categories_ms)
.saturating_sub(self.feeds_ms)
.saturating_sub(self.seo_ms)
.saturating_sub(self.assets_ms)
.saturating_sub(self.cache_save_ms);

Expand All @@ -765,6 +780,7 @@ impl BuildTimings {
println!(" {:<30} {:>6}ms ({:>4.1}%)", "Content build (all pages)", self.content_ms, pct(self.content_ms, total_ms));
println!(" {:<30} {:>6}ms ({:>4.1}%)", "Category pages", self.categories_ms, pct(self.categories_ms, total_ms));
println!(" {:<30} {:>6}ms ({:>4.1}%)", "XML feeds", self.feeds_ms, pct(self.feeds_ms, total_ms));
println!(" {:<30} {:>6}ms ({:>4.1}%)", "SEO (sitemap+robots)", self.seo_ms, pct(self.seo_ms, total_ms));
println!(" {:<30} {:>6}ms ({:>4.1}%)", "Asset copy", self.assets_ms, pct(self.assets_ms, total_ms));
println!(" {:<30} {:>6}ms ({:>4.1}%)", "Cache save", self.cache_save_ms, pct(self.cache_save_ms, total_ms));
println!(" {:<30} {:>6}ms ({:>4.1}%)", "Overhead/other", overhead, pct(overhead, total_ms));
Expand Down Expand Up @@ -908,7 +924,7 @@ pub fn build(minify: bool) -> Result<()> {

// Build content
let t = Instant::now();
let (page_count, content_timings) = build_contents(&tera, &paths, &posts, &site_config, &shared_context, &mut cache, minify)?;
let (page_count, permalinks, content_timings) = build_contents(&tera, &paths, &posts, &site_config, &shared_context, &mut cache, minify)?;
timings.content_ms = t.elapsed().as_millis();
timings.page_count = page_count;
// Copy per-page sub-timings from the concurrent build
Expand Down Expand Up @@ -937,7 +953,7 @@ pub fn build(minify: bool) -> Result<()> {

// XML feeds
let t = Instant::now();
let feed_count = generate_xml_feeds(&tera, &shared_context, &paths.public)?;
let (feed_count, feed_names) = generate_xml_feeds(&tera, &shared_context, &paths.public)?;
timings.feeds_ms = t.elapsed().as_millis();
if feed_count > 0 {
println!(
Expand All @@ -949,6 +965,103 @@ pub fn build(minify: bool) -> Result<()> {
);
}

// SEO generation
let t = Instant::now();
let mut seo_count = 0usize;
let seo_enabled = site_config.seo.is_some() || site_config.robots.is_some();
if seo_enabled {
// Sitemap
let sitemap_enabled = site_config
.seo
.as_ref()
.is_none_or(|s| s.sitemap);
if sitemap_enabled {
// Build date map from posts: permalink → updated/created
use std::collections::HashMap;
let date_map: HashMap<&str, &str> = posts.iter()
.filter_map(|p| {
let permalink = p.get("permalink")?.as_str()?;
let date = p.get("updated")
.or_else(|| p.get("created"))?
.as_str()?;
Some((permalink, date))
})
.collect();

let mut urls = Vec::with_capacity(permalinks.len() + 16);

// Homepage
urls.push(seo::SitemapUrl {
loc: "/".into(),
lastmod: None,
});

// Content pages (with dates from posts where available)
for p in &permalinks {
let lastmod = date_map.get(p.as_str()).map(|s| s.to_string());
urls.push(seo::SitemapUrl {
loc: p.clone(),
lastmod,
});
}

// Category pages
if !posts.is_empty() {
let categories = shared::collect_all_posts_categories(&posts);
let categories_dir = &site_config.categories_dir;
urls.push(seo::SitemapUrl {
loc: format!("/{}/", categories_dir),
lastmod: None,
});
for cat in &categories {
urls.push(seo::SitemapUrl {
loc: format!("/{}/{}/", categories_dir, cat),
lastmod: None,
});
}
}

// Feed URLs
for feed_name in &feed_names {
urls.push(seo::SitemapUrl {
loc: format!("/{}", feed_name),
lastmod: None,
});
}

let xml = seo::generate_sitemap_xml(&urls, &site_config.root_url);
let output_path = paths.public.join("sitemap.xml");
std::fs::write(&output_path, &xml)
.wrap_err("Failed to write sitemap.xml")?;
seo_count += 1;
}

// Robots.txt
if let Some(ref robots_config) = site_config.robots {
if robots_config.enable {
let content = seo::generate_robots_txt(
&site_config,
robots_config,
sitemap_enabled,
);
let output_path = paths.public.join("robots.txt");
std::fs::write(&output_path, &content)
.wrap_err("Failed to write robots.txt")?;
seo_count += 1;
}
}
}
timings.seo_ms = t.elapsed().as_millis();
if seo_count > 0 {
println!(
" {} {} {:<12} {}",
"•".green(),
format!("{:<12}", "SEO").bold(),
format!("{} files", seo_count),
shared::get_elapsed_time(t).dimmed()
);
}

// Assets
let t = Instant::now();
let public_assets_dir = paths.public.join("assets");
Expand Down
1 change: 1 addition & 0 deletions src/cmd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod dev;
mod init;
mod new;
mod preview;
pub mod seo;
mod theme;

pub use build::build;
Expand Down
Loading
Loading