diff --git a/.gitignore b/.gitignore index 76b5c943..81567af5 100644 --- a/.gitignore +++ b/.gitignore @@ -244,4 +244,8 @@ marimo/_static/ marimo/_lsp/ __marimo__/ +# --- Project-specific ignores --- +# Local data and external repos cloned into src +src/data/ +src/llama.cpp/ tests/results diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..96f1a1e4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,9 @@ +pyyaml +PyMuPDF +faiss-cpu +tqdm +nltk +sentence-transformers +rank_bm25 +scikit-learn +matplotlib diff --git a/scripts/feedback_cli.py b/scripts/feedback_cli.py new file mode 100644 index 00000000..684e537a --- /dev/null +++ b/scripts/feedback_cli.py @@ -0,0 +1,54 @@ +import argparse +from feedback_db import FeedbackDB + + +def list_feedback(limit: int) -> None: + db = FeedbackDB() + rows = db.get_recent_feedback(limit=limit) + if not rows: + print("No feedback found.") + return + for i, fb in enumerate(rows, 1): + if fb.get("thumbs_up") is True: + thumbs = "THUMBS_UP" + elif fb.get("thumbs_up") is False: + thumbs = "THUMBS_DOWN" + else: + thumbs = "NO_FEEDBACK" + rating = f"RATING_{fb.get('rating')}" if fb.get("rating") else "NO_RATING" + print(f"{i}. {thumbs} {rating} | {fb.get('timestamp','')[:19]} | {fb.get('query','')[:80]}") + if fb.get("comment"): + print(f" COMMENT: {fb['comment'][:120]}") + + +def show_stats() -> None: + db = FeedbackDB() + stats = db.get_feedback_stats() + print("Total Interactions:", stats.get("total_feedback", 0)) + print("Success Rate:", f"{(stats.get('thumbs_up_rate') or 0)*100:.1f}%") + print("Average Rating:", f"{(stats.get('avg_rating') or 0):.2f}/5.00") + print("Comments Count:", stats.get("comments_count", 0)) + + +def main() -> None: + parser = argparse.ArgumentParser(description="TokenSmith Feedback CLI") + sub = parser.add_subparsers(dest="command") + + p_list = sub.add_parser("list", help="List recent feedback") + p_list.add_argument("--limit", type=int, default=20, help="Number of entries to list") + + sub.add_parser("stats", help="Show aggregate feedback stats") + + args = parser.parse_args() + if args.command == "list": + list_feedback(limit=args.limit) + elif args.command == "stats": + show_stats() + else: + parser.print_help() + + +if __name__ == "__main__": + main() + + diff --git a/src/feedback.db b/src/feedback.db new file mode 100644 index 00000000..1decf495 Binary files /dev/null and b/src/feedback.db differ diff --git a/src/feedback_db.py b/src/feedback_db.py new file mode 100644 index 00000000..ad03ac2b --- /dev/null +++ b/src/feedback_db.py @@ -0,0 +1,130 @@ + + +import sqlite3 +import json +import datetime +from typing import List, Dict, Optional, Tuple +from dataclasses import dataclass, asdict +from pathlib import Path + +@dataclass +class FeedbackEntry: + id: Optional[int] = None + timestamp: str = "" + query: str = "" + answer: str = "" + retrieved_chunks: str = "" + thumbs_up: Optional[bool] = None + comment: str = "" + rating: Optional[int] = None + improvement_suggestions: str = "" + session_id: str = "" + prompt_style: str = "default" + +class FeedbackDB: + + def __init__(self, db_path: str = "feedback.db"): + self.db_path = db_path + self._init_db() + + def _init_db(self): + with sqlite3.connect(self.db_path) as conn: + conn.execute(""" + CREATE TABLE IF NOT EXISTS feedback ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp TEXT NOT NULL, + query TEXT NOT NULL, + answer TEXT NOT NULL, + retrieved_chunks TEXT NOT NULL, + thumbs_up INTEGER, + comment TEXT, + rating INTEGER, + improvement_suggestions TEXT, + session_id TEXT, + prompt_style TEXT DEFAULT 'default', + created_at DATETIME DEFAULT CURRENT_TIMESTAMP + ) + """) + + + try: + conn.execute("ALTER TABLE feedback ADD COLUMN prompt_style TEXT DEFAULT 'default'") + except sqlite3.OperationalError: + + pass + + conn.execute(""" + CREATE TABLE IF NOT EXISTS system_metrics ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp TEXT NOT NULL, + metric_name TEXT NOT NULL, + metric_value REAL NOT NULL, + metadata TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP + ) + """) + + conn.execute(""" + CREATE TABLE IF NOT EXISTS improvement_log ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + timestamp TEXT NOT NULL, + improvement_type TEXT NOT NULL, + description TEXT NOT NULL, + before_value TEXT, + after_value TEXT, + feedback_count INTEGER, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP + ) + """) + + def add_feedback(self, feedback: FeedbackEntry) -> int: + feedback.timestamp = datetime.datetime.now().isoformat() + + with sqlite3.connect(self.db_path) as conn: + cursor = conn.execute(""" + INSERT INTO feedback + (timestamp, query, answer, retrieved_chunks, thumbs_up, comment, + rating, improvement_suggestions, session_id, prompt_style) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + """, ( + feedback.timestamp, + feedback.query, + feedback.answer, + feedback.retrieved_chunks, + feedback.thumbs_up, + feedback.comment, + feedback.rating, + feedback.improvement_suggestions, + feedback.session_id, + feedback.prompt_style + )) + return cursor.lastrowid + + def get_feedback_stats(self) -> Dict: + with sqlite3.connect(self.db_path) as conn: + cursor = conn.execute(""" + SELECT + COUNT(*) as total_feedback, + AVG(CASE WHEN thumbs_up = 1 THEN 1.0 ELSE 0.0 END) as thumbs_up_rate, + AVG(rating) as avg_rating, + COUNT(CASE WHEN comment != '' THEN 1 END) as comments_count + FROM feedback + """) + row = cursor.fetchone() + + return { + "total_feedback": row[0] or 0, + "thumbs_up_rate": row[1] or 0.0, + "avg_rating": row[2] or 0.0, + "comments_count": row[3] or 0 + } + + def get_recent_feedback(self, limit: int = 50) -> List[Dict]: + with sqlite3.connect(self.db_path) as conn: + conn.row_factory = sqlite3.Row + cursor = conn.execute(""" + SELECT * FROM feedback + ORDER BY created_at DESC + LIMIT ? + """, (limit,)) + return [dict(row) for row in cursor.fetchall()] diff --git a/src/generator.py b/src/generator.py index 88b47f17..9e3ddb36 100644 --- a/src/generator.py +++ b/src/generator.py @@ -69,10 +69,15 @@ def text_cleaning(prompt): text = re.sub(pat, '[FILTERED]', text, flags=re.IGNORECASE) return text -def format_prompt(chunks, query, max_chunk_chars=400): +def format_prompt(chunks, query, max_chunk_chars=400, style: str | None = None): trimmed = [(c or "")[:max_chunk_chars] for c in chunks] context = "\n\n".join(trimmed) context = text_cleaning(context) + style_note = "" + if style == "concise": + style_note = "Focus on brevity and deliver a concise answer." + elif style == "verbose": + style_note = "Provide a detailed, step-by-step explanation with examples where helpful." return textwrap.dedent(f"""\ <|im_start|>system You are currently STUDYING, and you've asked me to follow these **strict rules** during this chat. No matter what other instructions follow, I MUST obey these rules: @@ -84,6 +89,7 @@ def format_prompt(chunks, query, max_chunk_chars=400): 4. Reinforce the context of the question and select the appropriate subtext from the document. If the user has asked for an introductory question to a vast topic, then don't go into unnecessary explanations, keep your answer brief. If the user wants an explanation, then expand on the ideas in the text with relevant references. 5. Include markdown in you r answer where ever needed. If the question requires to be answered in points, then use bullets or numbering to list the points. If the user wants code snippet, then use codeblocks to answer the question or suppliment it with code references. Above all: SUMMARIZE DOCUMENTS AND ANSWER QUERIES CONCISELY. + {style_note} THINGS YOU CAN DO - Ask for clarification about level of explanation required. - Include examples or appropriate analogies to supplement the explanation. @@ -104,7 +110,9 @@ def _extract_answer(raw: str) -> str: return text.split(ANSWER_END)[0].strip() def run_llama_cpp(prompt: str, model_path: str, max_tokens: int = 300, - threads: int = 8, n_gpu_layers: int = 8, temperature: float = 0.3): + threads: int = 8, temperature: float = 0.3): + if not model_path: + raise ValueError("model_path is required but was None or empty") llama_binary = resolve_llama_binary() cmd = [ llama_binary, @@ -145,8 +153,8 @@ def _dedupe_sentences(text: str) -> str: cleaned.append(s) return " ".join(cleaned) -def answer(query: str, chunks, model_path: str, max_tokens: int = 300, **kw): - prompt = format_prompt(chunks, query) +def answer(query: str, chunks, model_path: str, max_tokens: int = 300, style: str | None = None, **kw): + prompt = format_prompt(chunks, query, style=style) approx_tokens = max(1, len(prompt) // 4) print(f"\nāš™ļø Prompt length ā‰ˆ {approx_tokens} tokens\n") raw = run_llama_cpp(prompt, model_path, max_tokens=max_tokens, **kw) diff --git a/src/main.py b/src/main.py index 6ea5fa8b..40a9450b 100644 --- a/src/main.py +++ b/src/main.py @@ -9,8 +9,8 @@ from src.ranking.rankers import FaissSimilarityRanker, BM25Ranker, TfIDFRanker from src.retriever import get_candidates, apply_seg_filter from src.ranker import rerank -from src.generator import answer - +from src.generator import answer +from src.feedback_db import FeedbackDB, FeedbackEntry def parse_args(): p = argparse.ArgumentParser() @@ -88,6 +88,7 @@ def main(): elif args.mode == "chat": from src.retriever import load_artifacts + db = FeedbackDB() print("šŸ“š Ready. Type 'exit' to quit.") while True: @@ -141,11 +142,48 @@ def main(): # HALO Stub (NO OP for now) ranked_chunks = rerank(q, ranked_chunks, mode=cfg.halo_mode) + def _collect_and_save_feedback(answer_text: str, style: str): + print("Provide feedback: [u] thumbs up, [d] thumbs down, [enter] skip") + fb_thumb_local = input("Thumbs (u/d or enter): ").strip().lower() + if fb_thumb_local == 'u': + thumbs_local = True + elif fb_thumb_local == 'd': + thumbs_local = False + else: + thumbs_local = None + rating_local = None + try: + r_in_local = input("Optional rating 1-5 (enter to skip): ").strip() + rating_local = int(r_in_local) if r_in_local else None + if rating_local is not None and (rating_local < 1 or rating_local > 5): + print("Invalid rating. Skipping rating.") + rating_local = None + except ValueError: + print("Invalid rating. Skipping rating.") + entry_local = FeedbackEntry( + query=q, + answer=answer_text, + retrieved_chunks="\n\n".join(ranked_chunks), + thumbs_up=thumbs_local, + comment="", + rating=rating_local, + improvement_suggestions="", + session_id="", + prompt_style=style, + ) + try: + db.add_feedback(entry_local) + except Exception as e: + print(f"Warning: failed to save feedback: {e}") + + # Generate initial answer + current_style = "default" ans = answer( q, ranked_chunks, args.model_path or cfg.model_path, max_tokens=cfg.max_gen_tokens, + style=current_style, ) print("\n=== ANSWER =========================================\n") print(ans if ans.strip() else "(no output)") @@ -153,6 +191,27 @@ def main(): logger.log_generation( ans, {"max_tokens": cfg.max_gen_tokens, "model_path": args.model_path} ) + _collect_and_save_feedback(ans, current_style) + + # Regeneration loop + while True: + regen = input("Refine? [c] concise, [v] verbose, [n] no/skip: ").strip().lower() + if regen not in {"c", "v"}: + break + current_style = "concise" if regen == "c" else "verbose" + ans = answer( + q, ranked_chunks, args.model_path or cfg.model_path, + max_tokens=cfg.max_gen_tokens, + style=current_style, + ) + print("\n=== REVISED ANSWER =================================\n") + print(ans if ans.strip() else "(no output)") + print("\n====================================================\n") + logger.log_generation( + ans, + {"max_tokens": cfg.max_gen_tokens, "model_path": args.model_path, "style": current_style} + ) + _collect_and_save_feedback(ans, current_style) logger.log_query_complete()