From a2e1a1be7e934f3f849886fd1d9ac863aa14d73d Mon Sep 17 00:00:00 2001 From: anishkabasheerbad Date: Sat, 13 Jun 2026 21:50:10 -0400 Subject: [PATCH] Add mock review data, average rating calculation, and review display in a cleaner format --- data-science/analytics/rating_aggregator.py | 47 ++++++++++++++++++++- data-science/analytics/review_sentiment.py | 19 ++++++++- datasets/agent_reviews.json | 36 ++++++++++++++++ 3 files changed, 100 insertions(+), 2 deletions(-) diff --git a/data-science/analytics/rating_aggregator.py b/data-science/analytics/rating_aggregator.py index 016dfde..9f82a02 100644 --- a/data-science/analytics/rating_aggregator.py +++ b/data-science/analytics/rating_aggregator.py @@ -6,16 +6,61 @@ import pandas as pd from pathlib import Path +import json + DATASETS_DIR = Path(__file__).parent.parent.parent / "datasets" def aggregate_ratings() -> pd.DataFrame: - """Aggregate ratings by agent.""" + """Aggregate ratings by agent. + + Definition: + - num_ratings: number of ratings for a specific agent + - average_rating: review ratings added up / num_ratings + - Higher num_ratings = higher confidence score for a particular review + - Agents ranked by average_rating descending + + """ df = pd.read_csv(DATASETS_DIR / "agent_ratings.csv") # TODO: Add std dev, rating count, recent vs all-time return df.sort_values("avg_rating", ascending=False) +def calculate_average_ratings() -> list[dict]: + + """ + Calculate average rating of an agent given review ratings + Read agent_reviews, get ratings and calculate num_ratings and average_rating + """ + + + with open(DATASETS_DIR / "agent_reviews.json", encoding="utf-8") as f: + reviews = json.load(f) + + totals = {} + for review in reviews: + agent_id = review["agent_id"] + if agent_id not in totals: + totals[agent_id] = {"sum": 0, "count": 0} + totals[agent_id]["sum"] += review["rating"] + totals[agent_id]["count"] += 1 + + results = [] + for agent_id, data in totals.items(): + results.append({ + "agent_id": agent_id, + "average_rating": round(data["sum"] / data["count"], 2), + "num_ratings": data["count"], + }) + + return sorted(results, key=lambda x: x["average_rating"], reverse=True) + + if __name__ == "__main__": print(aggregate_ratings().to_string(index=False)) + + print("\nAverage Ratings from Reviews:") + print("-" * 40) + for agent in calculate_average_ratings(): + print(f"{agent['agent_id']}: {agent['average_rating']} ({agent['num_ratings']} reviews)") diff --git a/data-science/analytics/review_sentiment.py b/data-science/analytics/review_sentiment.py index 01aae95..3d6327b 100644 --- a/data-science/analytics/review_sentiment.py +++ b/data-science/analytics/review_sentiment.py @@ -34,5 +34,22 @@ def analyze_sentiment(reviews: list[dict]) -> list[dict]: if __name__ == "__main__": reviews = load_reviews() analyzed = analyze_sentiment(reviews) + + # Group reviews by agent_id + from collections import defaultdict + grouped = defaultdict(list) for r in analyzed: - print(f"[{r['sentiment']}] {r['agent_id']}: {r['review'][:60]}...") + grouped[r["agent_id"]].append(r) + + print("Reviews by Agent:") + print("=" * 50) + for agent_id, agent_reviews in grouped.items(): + print(f"\nAgent: {agent_id}") + print(f"Number of reviews: {len(agent_reviews)}") + print("-" * 40) + for r in agent_reviews: + print(f" Rating: {r['rating']}/5") + print(f" Review: {r['review']}") + print(f" Sentiment: {r['sentiment']}") + print(f" Date: {r['date']}") + print() \ No newline at end of file diff --git a/datasets/agent_reviews.json b/datasets/agent_reviews.json index eb1dd43..0af54e5 100644 --- a/datasets/agent_reviews.json +++ b/datasets/agent_reviews.json @@ -11,12 +11,24 @@ "review": "Good summaries but sometimes misses important emails in long threads.", "date": "2026-05-22" }, + { + "agent_id": "email_summarizer", + "rating": 1, + "review": "Missed critical emails entirely and the summaries were too vague to be useful.", + "date": "2026-06-07" + }, { "agent_id": "github_issue_triage", "rating": 5, "review": "Perfect for our open source project. Label suggestions are spot on.", "date": "2026-05-18" }, + { + "agent_id": "github_issue_triage", + "rating": 3, + "review": "Label suggestions are helpful but priority ranking feels off for complex issues.", + "date": "2026-05-27" + }, { "agent_id": "meeting_notes", "rating": 5, @@ -29,22 +41,46 @@ "review": "Works well for standups. Would love better integration with calendar events.", "date": "2026-05-27" }, + { + "agent_id": "meeting_notes", + "rating": 1, + "review": "Transcription was completely inaccurate and the action items were assigned to the wrong people.", + "date": "2026-05-26" + }, { "agent_id": "resume_reviewer", "rating": 5, "review": "Got actionable feedback that helped me land interviews. Highly recommend.", "date": "2026-05-15" }, + { + "agent_id": "resume_reviewer", + "rating": 3, + "review": "Caught formatting issues well but the content suggestions could be more tailored to my industry.", + "date": "2026-06-13" + }, { "agent_id": "travel_deal_finder", "rating": 3, "review": "Found some deals but prices weren't always accurate. Needs improvement.", "date": "2026-05-19" }, + { + "agent_id": "travel_deal_finder", + "rating": 2, + "review": "Found some flights but the price comparisons were outdated and hotel options were very limited.", + "date": "2026-06-12" + }, { "agent_id": "study_buddy", "rating": 4, "review": "Flashcards are great for exam prep. Study plans could be more customizable.", "date": "2026-05-21" + }, + { + "agent_id": "study_buddy", + "rating": 2, + "review": "Study plans were a good starting point but flashcards felt repetitive and lacked variety.", + "date": "2026-06-11" } ]