Skip to content
Merged
87 changes: 84 additions & 3 deletions dashboard/osa/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -697,6 +697,7 @@ <h2>Admin Access</h2>
let toolsChartInstance = null;
let adminTokenChartInstance = null;
let adminCostChartInstance = null;
let citationsChartInstance = null;

const COLORS = [
'#2563eb', '#1e3a5f', '#059669', '#d97706', '#dc2626',
Expand Down Expand Up @@ -858,11 +859,12 @@ <h2>Communities</h2>
document.title = `${safeName.toUpperCase()} - OSA Dashboard`;

try {
const [summaryResp, usageResp, syncResp, healthResp] = await Promise.all([
const [summaryResp, usageResp, syncResp, healthResp, citationsResp] = await Promise.all([
fetch(`${API_BASE}/${encodeURIComponent(communityId)}/metrics/public`),
fetch(`${API_BASE}/${encodeURIComponent(communityId)}/metrics/public/usage?period=${activePeriod}`),
fetch(`${API_BASE}/sync/status?community_id=${encodeURIComponent(communityId)}`).catch(err => { console.warn('Sync status fetch failed (non-critical):', err.message); return null; }),
fetch(`${API_BASE}/sync/health?community_id=${encodeURIComponent(communityId)}`).catch(err => { console.warn('Health check fetch failed (non-critical):', err.message); return null; }),
fetch(`${API_BASE}/${encodeURIComponent(communityId)}/citations`).catch(err => { console.warn('Citations fetch failed (non-critical):', err.message); return null; }),
]);

const failedStatus = !summaryResp.ok ? summaryResp.status : (!usageResp.ok ? usageResp.status : null);
Expand All @@ -872,8 +874,10 @@ <h2>Communities</h2>
const usage = await usageResp.json();
const sync = syncResp && syncResp.ok ? await syncResp.json() : null;
const health = healthResp && healthResp.ok ? await healthResp.json() : null;
// Citations feed is opt-in per community; a 404 just means it is off.
const citations = citationsResp && citationsResp.ok ? await citationsResp.json() : null;

renderCommunityView(summary, usage, sync, health, communityId);
renderCommunityView(summary, usage, sync, health, citations, communityId);
document.getElementById('adminCard').style.display = '';

if (adminKey) loadAdminData(communityId);
Expand All @@ -885,7 +889,7 @@ <h2>Communities</h2>
}
}

function renderCommunityView(summary, usage, sync, health, communityId) {
function renderCommunityView(summary, usage, sync, health, citations, communityId) {
const app = document.getElementById('app');
const safeName = escapeHtml(communityId);
const meta = communityMeta[communityId] || {};
Expand Down Expand Up @@ -917,6 +921,19 @@ <h2>Communities</h2>
: '';
const links = linkHtml(meta.links, 'community-detail-links');

// Publication citations card: shown only when the community exposes the
// citations feed and at least one canonical paper has citations.
const hasCitations = citations && citations.by_paper
&& Object.keys(citations.by_paper).length > 0;
const citationsCardHtml = hasCitations ? `
<div class="card">
<h2>Publication Citations</h2>
<p style="color:#64748b; font-size:0.9rem; margin-bottom:0.75rem;">
${Number(citations.total || 0).toLocaleString()} papers citing this community's canonical works, by year.
</p>
<div class="chart-container" style="height:360px;"><canvas id="citationsChart"></canvas></div>
</div>` : '';

app.className = '';
app.innerHTML = `
<div class="card">
Expand Down Expand Up @@ -974,10 +991,12 @@ <h3 style="color:#1e3a5f;margin:1.5rem 0 1rem;font-size:1rem;">Admin: Feedback</
<div id="adminFeedback"><div class="loading">Loading feedback...</div></div>
</div>
</div>
${citationsCardHtml}
`;

renderUsageChart(usage);
renderToolsChart(summary.top_tools);
renderCitationsChart(citations);
}

const SYNC_LABELS = {
Expand Down Expand Up @@ -1154,6 +1173,68 @@ <h3 style="color:#1e3a5f;margin:1.5rem 0 1rem;font-size:1rem;">Admin: Feedback</
});
}

// Modern qualitative palette (Tableau 10 + companion tones) for charts that
// stack many series, e.g. the citations chart's 14+ canonical papers. The
// ten saturated hues come first so smaller charts stay high-contrast.
const CITATION_PALETTE = [
'#4e79a7', '#f28e2b', '#e15759', '#76b7b2', '#59a14f',
'#edc948', '#b07aa1', '#ff9da7', '#9c755f', '#bab0ac',
'#a0cbe8', '#ffbe7d', '#8cd17d', '#f1ce63', '#86bcb6',
'#ff9d9a', '#d37295', '#fabfd2', '#d4a6c8', '#d7b5a6',
];

// Distinct color per series. Use the curated palette first; beyond it,
// walk the HSL wheel by the golden angle so overflow colors stay distinct
// and balanced rather than clustering.
function seriesColor(idx) {
if (idx < CITATION_PALETTE.length) return CITATION_PALETTE[idx];
const hue = Math.round((idx * 137.508) % 360);
return `hsl(${hue}, 55%, 55%)`;
}

function renderCitationsChart(citations) {
if (citationsChartInstance) { citationsChartInstance.destroy(); citationsChartInstance = null; }
const canvas = document.getElementById('citationsChart');
if (!canvas || !citations || !citations.by_paper
|| Object.keys(citations.by_paper).length === 0) return;

const byPaper = citations.by_paper;
const labels = citations.labels || {};
// Stacking order follows the configured canonical_dois; any DOI with
// citations but no config entry is appended so nothing is dropped.
const configured = (citations.canonical_dois || []).filter(d => byPaper[d]);
const extras = Object.keys(byPaper).filter(d => !configured.includes(d));
const dois = configured.concat(extras);

// Union of all years present, sorted ascending for the x-axis.
const yearsSet = new Set();
dois.forEach(d => Object.keys(byPaper[d]).forEach(y => yearsSet.add(y)));
const years = Array.from(yearsSet).sort((a, b) => Number(a) - Number(b));

const datasets = dois.map((doi, idx) => ({
label: labels[doi] || doi,
data: years.map(y => byPaper[doi][y] || 0),
backgroundColor: seriesColor(idx),
borderWidth: 0,
}));

citationsChartInstance = new Chart(canvas, {
type: 'bar',
data: { labels: years, datasets },
options: {
responsive: true, maintainAspectRatio: false,
plugins: {
legend: { position: 'bottom', labels: { boxWidth: 12, font: { size: 11 } } },
tooltip: { mode: 'index' },
},
scales: {
x: { stacked: true },
y: { stacked: true, beginAtZero: true, ticks: { precision: 0 } },
},
}
});
}

function changePeriod(period, communityId) {
activePeriod = period;
loadCommunityView(decodeURIComponent(communityId));
Expand Down
188 changes: 187 additions & 1 deletion src/api/routers/community.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from pathlib import Path
from typing import Annotated, Any, Literal

from fastapi import APIRouter, Header, HTTPException, Query, Request
from fastapi import APIRouter, Header, HTTPException, Query, Request, Response
from fastapi.responses import FileResponse, StreamingResponse
from langchain_core.messages import AIMessage, HumanMessage
from langchain_core.messages.utils import count_tokens_approximately
Expand All @@ -34,6 +34,7 @@
from src.assistants.registry import AssistantInfo
from src.core.config.community import WidgetConfig
from src.core.services.litellm_llm import create_openrouter_llm
from src.knowledge.search import FAQResult, get_citation_stats, list_faq_entries
from src.metrics.cost import COST_BLOCK_THRESHOLD, COST_WARN_THRESHOLD, MODEL_PRICING, estimate_cost
from src.metrics.db import (
RequestLogEntry,
Expand Down Expand Up @@ -205,6 +206,79 @@ class CommunityConfigResponse(BaseModel):
status: str = Field(..., description="Health status: healthy, degraded, or error")


class FAQEntryResponse(BaseModel):
"""A single FAQ entry exposed via the public feed."""

question: str = Field(..., description="Synthesized question")
answer: str = Field(..., description="Synthesized answer")
tags: list[str] = Field(default_factory=list, description="Keyword tags")
category: str = Field(..., description="Entry category (how-to, troubleshooting, etc.)")
quality_score: float = Field(..., description="LLM quality score (0.0-1.0)")
message_count: int = Field(..., description="Number of source messages in the thread")
first_message_date: str = Field(..., description="Date of the first message in the thread")
thread_url: str = Field(..., description="URL of the source discussion thread")


class FAQFeedResponse(BaseModel):
"""Paginated public FAQ feed for a community."""

community_id: str = Field(..., description="Community identifier")
total: int = Field(..., description="Total entries matching the filters")
limit: int = Field(..., description="Page size used for this response")
offset: int = Field(..., description="Offset used for this response")
entries: list[FAQEntryResponse] = Field(default_factory=list, description="FAQ entries")


class CitationsFeedResponse(BaseModel):
"""Public citation dashboard data for a community's canonical papers."""

community_id: str = Field(..., description="Community identifier")
total: int = Field(..., description="Total citing papers with a recorded canonical link")
per_year: dict[str, int] = Field(
default_factory=dict, description="Citing-paper count per year across all papers"
)
by_paper: dict[str, dict[str, int]] = Field(
default_factory=dict,
description="Stacked breakdown: canonical DOI -> year -> citing-paper count",
)
canonical_dois: list[str] = Field(
default_factory=list, description="Canonical DOIs tracked for this community"
)
labels: dict[str, str] = Field(
default_factory=dict,
description="Human-readable labels per canonical DOI (DOI -> label), when configured",
)


# Matches bare email addresses so they can be stripped from the public feed.
_EMAIL_PATTERN = re.compile(r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}")


def _redact_emails(text: str) -> str:
"""Replace any email address in ``text`` with a redaction marker.

The FAQ feed is derived from public mailing-list content. The summarizer
strips most personal data, but a handful of entries still embed addresses
(mostly vendor support lines). A public JSON feed should not emit raw
addresses, so they are redacted at serialization time.
"""
return _EMAIL_PATTERN.sub("[email redacted]", text)


def _faq_result_to_response(entry: FAQResult) -> FAQEntryResponse:
"""Convert a knowledge-layer FAQResult into a public response model."""
return FAQEntryResponse(
question=_redact_emails(entry.question),
answer=_redact_emails(entry.answer),
tags=[_redact_emails(tag) for tag in entry.tags],
category=entry.category,
quality_score=entry.quality_score,
message_count=entry.message_count,
first_message_date=entry.first_message_date,
thread_url=entry.thread_url,
)


# ---------------------------------------------------------------------------
# Session Management (In-Memory, per-community isolation)
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -1502,6 +1576,118 @@ async def community_usage_public(
detail="Metrics database is temporarily unavailable.",
)

@router.get("/faq", response_model=FAQFeedResponse)
async def community_faq(
response: Response,
q: str | None = Query(
default=None,
description="Optional full-text search phrase. If omitted, browses all entries.",
max_length=200,
),
category: str | None = Query(
default=None,
description="Filter by category (how-to, troubleshooting, reference, etc.)",
max_length=50,
),
min_quality: float = Query(
default=0.0, ge=0.0, le=1.0, description="Minimum quality score"
),
limit: int = Query(default=50, ge=1, le=200, description="Page size"),
offset: int = Query(default=0, ge=0, description="Pagination offset"),
) -> FAQFeedResponse:
"""Public, read-only FAQ feed for this community.

Returns synthesized question/answer entries generated from the
community's mailing-list and forum archives. Disabled by default;
a community opts in via ``public_feeds.faq: true`` in its config.
Email addresses are redacted from the output. ``total`` is the full
match count before pagination, in both browse and search modes.
"""
config = info.community_config
if config is None or config.public_feeds is None or not config.public_feeds.faq:
raise HTTPException(
status_code=404,
detail="Public FAQ feed is not enabled for this community.",
)

try:
entries, total = list_faq_entries(
project=community_id,
limit=limit,
offset=offset,
query=q,
category=category,
min_quality=min_quality,
)
except sqlite3.Error:
logger.exception("Failed to query FAQ feed for community %s", community_id)
raise HTTPException(
status_code=503,
detail="Knowledge database is temporarily unavailable.",
)
except Exception:
logger.exception("Unexpected error serving FAQ feed for community %s", community_id)
raise HTTPException(
status_code=500,
detail="An unexpected error occurred while building the FAQ feed.",
)

# Public, read-only data; cacheable like the other /…/public endpoints.
response.headers["Cache-Control"] = "public, max-age=3600"
return FAQFeedResponse(
community_id=community_id,
total=total,
limit=limit,
offset=offset,
entries=[_faq_result_to_response(e) for e in entries],
)

@router.get("/citations", response_model=CitationsFeedResponse)
async def community_citations(response: Response) -> CitationsFeedResponse:
"""Public, read-only citation dashboard for this community.

Returns per-year counts of papers citing the community's canonical
works, plus a stacked breakdown keyed by the cited DOI (the shape
behind a citations-per-year chart). Disabled by default; a community
opts in via ``public_feeds.citations: true`` in its config.
"""
config = info.community_config
if config is None or config.public_feeds is None or not config.public_feeds.citations:
raise HTTPException(
status_code=404,
detail="Public citations feed is not enabled for this community.",
)

try:
stats = get_citation_stats(project=community_id)
except sqlite3.Error:
logger.exception("Failed to query citations for community %s", community_id)
raise HTTPException(
status_code=503,
detail="Knowledge database is temporarily unavailable.",
)
except Exception:
logger.exception(
"Unexpected error serving citations feed for community %s", community_id
)
raise HTTPException(
status_code=500,
detail="An unexpected error occurred while building the citations feed.",
)

canonical_dois = list(config.citations.dois) if config.citations else []
labels = dict(config.citations.paper_labels) if config.citations else {}

response.headers["Cache-Control"] = "public, max-age=3600"
return CitationsFeedResponse(
community_id=community_id,
total=stats.total,
per_year=stats.per_year,
by_paper=stats.by_paper,
canonical_dois=canonical_dois,
labels=labels,
)

return router


Expand Down
1 change: 1 addition & 0 deletions src/api/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ def _run_papers_sync_for_community(community_id: str) -> bool:
project=community_id,
openalex_api_key=settings.openalex_api_key,
openalex_email=settings.openalex_email,
aliases=citations.aliases,
)
total += citing_count

Expand Down
Loading
Loading