diff --git a/data-science/analytics/review_sentiment.py b/data-science/analytics/review_sentiment.py index 01aae95..043e3f7 100644 --- a/data-science/analytics/review_sentiment.py +++ b/data-science/analytics/review_sentiment.py @@ -1,8 +1,15 @@ -"""Review sentiment analysis — placeholder. +"""Simple review sentiment labeler. -TODO: Implement basic sentiment scoring on mock reviews -TODO: Use scikit-learn or simple lexicon-based approach -TODO: Output sentiment labels: positive, neutral, negative +This script reads mock agent reviews and labels each review as +positive, neutral, or negative using a basic keyword-based approach. + +Formula: +sentiment_score = positive_word_count - negative_word_count + +Rules: +- score > 0 -> positive +- score < 0 -> negative +- score == 0 -> neutral """ import json @@ -10,29 +17,69 @@ DATASETS_DIR = Path(__file__).parent.parent.parent / "datasets" +POSITIVE_WORDS = { + "great", "good", "excellent", "helpful", "useful", "fast", + "accurate", "easy", "love", "best", "amazing", "reliable" +} + +NEGATIVE_WORDS = { + "bad", "poor", "slow", "wrong", "confusing", "buggy", + "error", "errors", "difficult", "unhelpful", "hate", "worst" +} + def load_reviews() -> list[dict]: with open(DATASETS_DIR / "agent_reviews.json", encoding="utf-8") as f: return json.load(f) -def analyze_sentiment(reviews: list[dict]) -> list[dict]: - """Placeholder sentiment analysis. +def get_sentiment_score(text: str) -> int: + words = text.lower().split() + + positive_count = sum( + 1 for word in words if word.strip(".,!?") in POSITIVE_WORDS + ) + negative_count = sum( + 1 for word in words if word.strip(".,!?") in NEGATIVE_WORDS + ) + + return positive_count - negative_count + - TODO: Replace with real sentiment model or lexicon - """ +def get_sentiment_label(score: int) -> str: + if score > 0: + return "positive" + if score < 0: + return "negative" + return "neutral" + + +def analyze_sentiment(reviews: list[dict]) -> list[dict]: results = [] + for review in reviews: + score = get_sentiment_score(review["review"]) + sentiment = get_sentiment_label(score) + results.append({ **review, - "sentiment": "neutral", # TODO: compute real sentiment - "sentiment_score": 0.0, + "sentiment": sentiment, + "sentiment_score": score, }) + return results if __name__ == "__main__": reviews = load_reviews() analyzed = analyze_sentiment(reviews) + + print("Review Sentiment Results:") + print("-" * 40) + for r in analyzed: - print(f"[{r['sentiment']}] {r['agent_id']}: {r['review'][:60]}...") + print( + f"[{r['sentiment']}] " + f"score={r['sentiment_score']} " + f"{r['agent_id']}: {r['review'][:60]}..." + )