diff --git a/Documentations/DevSync.md b/Documentations/DevSync.md index 09bb8823..5bb481a3 100644 --- a/Documentations/DevSync.md +++ b/Documentations/DevSync.md @@ -7,9 +7,15 @@ into an interactive SSH session running the agent. Run from anywhere inside the repo: ```bash -./devsync.sh +./devsync.sh # default: python -m chatdku.core.agent +./devsync.sh chatdku/core/agent.py # as a file path +./devsync.sh chatdku.core.agent # as a module (runs with python -m) ``` +Arguments containing `/` or ending in `.py` are run as file paths. Everything +else is treated as a module name and run with `python -m`. Absolute local +paths are accepted and automatically stripped to repo-relative before sync. + ## What it does 1. Resolves the remote user (prefers `gh api user`, falls back to `whoami`) and diff --git a/GUIDE.md b/GUIDE.md index 016601a1..f89794c3 100644 --- a/GUIDE.md +++ b/GUIDE.md @@ -2,19 +2,20 @@ This is a set of guides intended for you to get ready to contribute to our project. This guide is intended for **newcomers**, as well as, our **members**. -I (Temuulen) will be explaining our core dependencies as well as any other useful stuff you should learn about before getting into coding. +I (Temuulen) will be explaining our core dependencies as well as any other useful stuff you should learn about before getting into coding. > [!IMPORTANT] -> This is a work in progess. Please tell me what you don't understand about this guide and our project and I will add it to this document for future use. +> This is a work in progress. Please tell me what you don't understand about this guide and our project and I will add it to this document for future use. When I was coming into this project, even though it was structured very clearly, it was hard to get my head around everything. I felt like the code was just very messy and there were just a lot of things that did not have clear explanations. -And most of our code is like that even today. However, with this guide I hope you will at least have some support and start contributing faster. +And most of our code is like that even today. However, with this guide I hope you will at least have some support and start contributing faster. -> Please remember that at first you will be learning *slow* to **develop** faster in the future by following this guide. +> Please remember that at first you will be learning _slow_ to **develop** faster in the future by following this guide. Here are some list of members and their respective roles they **self-assigned** themselves into: + - Anar: Frontend (React.js), Syllabi SQL agent tool - Munish: Backend (Flask, Django), System health monitoring - Temuulen: Agent logic (DSPy), Document ingestion Logic (Transferring to ZhiWei) @@ -23,11 +24,12 @@ Here are some list of members and their respective roles they **self-assigned** ### 1. Python -First, obviously you need to know python. While we don't require you to be a pythonic expert, a quality code is generally preferred. So, what makes a code ***good code***? +First, obviously you need to know python. While we don't require you to be a pythonic expert, a quality code is generally preferred. So, what makes a code **_good code_**? This is completely subjective, but there are some qualities that you can start from: -- Functions have [docstrings](https://numpydoc.readthedocs.io/en/latest/format.html) -- Account for future contributers to understand the code + +- Functions have [docstrings](https://numpydoc.readthedocs.io/en/latest/format.html) +- Account for future contributors to understand the code - Obvious naming practices and using python naming practices. I mean I can go on and on about coding practices. What you need to understand is that you need to build scalable code, accounting for any other person to review your code and understand it. @@ -39,13 +41,13 @@ I mean I can go on and on about coding practices. What you need to understand is > While these things seem very annoying at first, believe me that they will help. > When I come back to DKU next Spring, I plan to give every member a crash course on a new GIT workflow. Please read all the articles I will be linking to. -Git is a version control system that intelligently tracks changes in files. +Git is a version control system that intelligently tracks changes in files. Git is particularly useful when you and a group of people are all making changes to the same files at the same time. Typically, to do this in a Git-based workflow, you would: -- Create a branch to ***show the intent of your work***. -- Create issues ***before*** you do the work/code. +- Create a branch to **_show the intent of your work_**. +- Create issues **_before_** you do the work/code. - Make edits to the files independently and safely on your own personal branch. - Close or update issues [with your commits or Merge Requests](https://docs.gitlab.com/user/project/issues/managing_issues/#closing-issues-automatically) - Let Git intelligently merge your specific changes back into the main copy of files, so that your changes don't impact other people's updates. @@ -55,35 +57,37 @@ Typically, to do this in a Git-based workflow, you would: > Our `Main` branch is a **SACRED** branch. DO NOT PUSH CODE WITHOUT PROPER REVIEW FROM OTHER MEMBERS. Please read these articles: -- [Github Flow](https://docs.github.com/en/get-started/using-github/github-flow) -- [Always start with an issue](https://web.archive.org/web/20230214040753/https://about.gitlab.com/blog/2016/03/03/start-with-an-issue/) - - Try creating an issue now on what you want to do next. - - Also if you don't see our issue board under the projects tab in our repo. Please contact Mingxi and ask to be added to the Project issue board. + +- [GitHub Flow](https://docs.github.com/en/get-started/using-github/github-flow) +- [Always start with an issue](https://web.archive.org/web/20230214040753/https://about.gitlab.com/blog/2016/03/03/start-with-an-issue/) + - Try creating an issue now on what you want to do next. + - Also if you don't see our issue board under the projects tab in our repo. Please contact Mingxi and ask to be added to the Project issue board. - [Write good commit messages!](https://cbea.ms/git-commit/) - [Issue board](https://about.gitlab.com/blog/announcing-the-gitlab-issue-board/) - - While we are not using Gitlab, Github has the same feature called "Project". -- [It's all connected in Gitlab](https://about.gitlab.com/2016/03/08/gitlab-tutorial-its-all-connected/) - - Again, Github has the equilavent features at [here](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls) + - While we are not using GitLab, GitHub has the same feature called "Project". +- [It's all connected in Gitlab](https://about.gitlab.com/2016/03/08/gitlab-tutorial-its-all-connected/) + - Again, GitHub has the equivalent features at [here](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls) -As you incorperate these steps into your developer journey, you will be better equipped for real world team-coding. -All the industry experts follow some form of stardards using GIT. You should learn to use it properly while you are here with us. +As you incorporate these steps into your developer journey, you will be better equipped for real world team-coding. +All the industry experts follow some form of standards using GIT. You should learn to use it properly while you are here with us. -And [here is a longer video](https://www.youtube.com/watch?v=1ffBJ4sVUb4) that gives you more in-depth details on how GIT works. +And [here is a longer video](https://www.youtube.com/watch?v=1ffBJ4sVUb4) that gives you more in-depth details on how GIT works. -Here is an [interactive](https://learngitbranching.js.org/?locale=en_US) Git simulator for you to practice. +Here is an [interactive](https://learngitbranching.js.org/?locale=en_US) Git simulator for you to practice. ### 3. Using the Terminal Using the terminal, you can do a lot of stuff with it. I assure you that to get better at it you just have to use it daily. At first you might google a lot of stuff, and that is **okay!**. All of us started out like that. Here are some of the common commands I use when working with CHATDKU: + - `ssh`: Used to connect to our server -- `git`: Working with github +- `git`: Working with GitHub - `sftp`: ssh like file transferring - `nvidia-smi`: Used to inspect GPUs Again, just google these stuff and learn. Good luck! It will be worth it. -## Role-specific guides +## Role-specific Guides Please be careful when interacting with Docker. It hosts our Embedding Model, Vector Database, and Redis Database. @@ -93,9 +97,30 @@ Please be careful when interacting with Docker. It hosts our Embedding Model, Ve - For creating tools: https://github.com/Glitterccc/ChatDKU/issues/122 - Arize Phoenix for observability: https://arize.com/docs/phoenix -### Document ingestion +### Iterating on the agent with `devsync.sh` + +Edit code on your laptop, then push and run it on the shared dev server in one +command. From the repo root: + +```bash +./devsync.sh # runs the agent +``` + +```bash +./devsync.sh chatdku/core/tools/your_file.py # runs any file you're hacking on +``` + +The script rsyncs your working tree, runs `uv sync`, and drops you into a live +session on the remote. Your `.venv/`, `.env`, and `.git/` are left alone. + +See [Documentations/DevSync.md](Documentations/DevSync.md) for configuration, +Windows-specific notes, and troubleshooting. If you're new, also skim +[Documentations/Shared-Secrets.md](Documentations/Shared-Secrets.md) — once an +admin adds you to `chatdku_devs`, all project secrets load into your remote +shell automatically, no `.env` copying needed. + +### Document Ingestion - Llamaindex for document ingestion: https://developers.llamaindex.ai/python/framework/getting_started/concepts - ChromaDB for vector store: https://docs.trychroma.com/docs/overview/introduction - Redis for keyword search: https://redis.io/docs/latest/develop/ - diff --git a/chatdku/core/agent.py b/chatdku/core/agent.py index c64a4e54..434eb066 100755 --- a/chatdku/core/agent.py +++ b/chatdku/core/agent.py @@ -12,7 +12,10 @@ from chatdku.core.dspy_classes.synthesizer import Synthesizer from chatdku.core.tools.course_schedule import CourseScheduleLookupOuter from chatdku.core.tools.get_prerequisites import PrerequisiteLookupOuter -from chatdku.core.tools.llama_index import KeywordRetrieverOuter, VectorRetrieverOuter +from chatdku.core.tools.llama_index_tools import ( + KeywordRetrieverOuter, + VectorRetrieverOuter, +) from chatdku.core.tools.major_requirements import MajorRequirementsLookupOuter from chatdku.core.tools.syllabi_tool.query_curriculum_db import QueryCurriculumOuter from chatdku.core.utils import format_trajectory, load_conversation, span_start diff --git a/chatdku/core/tools/llama_index.py b/chatdku/core/tools/llama_index_tools.py similarity index 100% rename from chatdku/core/tools/llama_index.py rename to chatdku/core/tools/llama_index_tools.py diff --git a/chatdku/django/chatdku_django/chat/tools.py b/chatdku/django/chatdku_django/chat/tools.py index 738d5387..98a5bd0a 100644 --- a/chatdku/django/chatdku_django/chat/tools.py +++ b/chatdku/django/chatdku_django/chat/tools.py @@ -1,4 +1,4 @@ -from chatdku.core.tools.llama_index import KeywordRetrieverOuter, VectorRetrieverOuter +from chatdku.core.tools.llama_index_tools import KeywordRetrieverOuter, VectorRetrieverOuter from chatdku.core.tools.syllabi_tool.query_curriculum_db import QueryCurriculumOuter diff --git a/chatdku/django/chatdku_django/chat/views.py b/chatdku/django/chatdku_django/chat/views.py index 65af7a08..5b39d547 100644 --- a/chatdku/django/chatdku_django/chat/views.py +++ b/chatdku/django/chatdku_django/chat/views.py @@ -30,7 +30,7 @@ from rest_framework.views import APIView from chatdku.core.agent import Agent -from chatdku.core.tools.llama_index import KeywordRetrieverOuter, VectorRetrieverOuter +from chatdku.core.tools.llama_index_tools import KeywordRetrieverOuter, VectorRetrieverOuter from chatdku.core.tools.syllabi_tool.query_curriculum_db import QueryCurriculumOuter from chat.tools import get_tools diff --git a/devsync.sh b/devsync.sh index b92cde66..e2b4c083 100644 --- a/devsync.sh +++ b/devsync.sh @@ -1,5 +1,12 @@ #!/usr/bin/env bash -# devsync.sh — rsync local sources to the dev server, then drop into an interactive agent session. +# devsync.sh — rsync local sources to the dev server, then run Python remotely. +# +# Usage: +# ./devsync.sh # runs: python -m chatdku.core.agent +# ./devsync.sh path/to/file.py # runs: python path/to/file.py +# ./devsync.sh chatdku.core.agent # runs: python -m chatdku.core.agent +# Arguments with `/` or a `.py` suffix are treated as file paths; everything +# else is treated as a module name and run with `python -m`. set -euo pipefail BOLD="\033[1m" @@ -21,6 +28,34 @@ SERVER="${CHATDKU_SERVER:-${_SSH_USER}@10.200.14.82}" REMOTE_DIR="${CHATDKU_REMOTE_DIR:-~/ChatDKU-DevSync}" LOCAL_DIR="$(git rev-parse --show-toplevel)" +# Accept a leading `-m` / `--module` flag for familiarity; we always decide +# file-vs-module from the argument shape below. +if [[ "${1:-}" == "-m" || "${1:-}" == "--module" ]]; then + shift +fi + +TARGET="${1:-}" +if [[ -n "$TARGET" ]]; then + if [[ "$TARGET" != *"/"* && "$TARGET" != *.py ]]; then + # Looks like a module (e.g. chatdku.core.agent) — run with -m + REMOTE_RUN_CMD="uv run python -m $(printf %q "$TARGET")" + RUN_DESC="python -m $TARGET" + else + # Treat as a file path + if [[ "$TARGET" = /* ]]; then + TARGET="${TARGET#"$LOCAL_DIR"/}" + fi + if [[ ! -f "$LOCAL_DIR/$TARGET" ]]; then + warn "target '$TARGET' not found under $LOCAL_DIR — syncing anyway" + fi + REMOTE_RUN_CMD="uv run python $(printf %q "$TARGET")" + RUN_DESC="python $TARGET" + fi +else + REMOTE_RUN_CMD="uv run python -m chatdku.core.agent" + RUN_DESC="agent" +fi + step "preparing remote directory $REMOTE_DIR on $SERVER" ssh "${SERVER}" "mkdir -p ${REMOTE_DIR}" @@ -40,6 +75,8 @@ info "syncing ${BOLD}$LOCAL_DIR${RESET}${CYAN} → ${BOLD}$SERVER:$REMOTE_DIR" rsync -avz --delete \ --exclude='.git/' \ + --exclude='.venv/' \ + --exclude='venv/' \ --exclude='__pycache__/' \ --exclude='*.pyc' \ --exclude='*.egg-info/' \ @@ -52,5 +89,5 @@ rsync -avz --delete \ success "synced" -info "connecting to ${BOLD}$SERVER${RESET}${CYAN} — running agent" -ssh -t "${SERVER}" "bash -l -c 'cd ${REMOTE_DIR} && uv sync && uv run python -m chatdku.core.agent'" +info "connecting to ${BOLD}$SERVER${RESET}${CYAN} — running ${BOLD}${RUN_DESC}${RESET}" +ssh -t "${SERVER}" "bash -l -c 'cd ${REMOTE_DIR} && uv sync && ${REMOTE_RUN_CMD}'" diff --git a/tests/test_load_retriever.py b/tests/test_load_retriever.py new file mode 100644 index 00000000..c1aedf14 --- /dev/null +++ b/tests/test_load_retriever.py @@ -0,0 +1,262 @@ +""" +Used to load test the retriever +Generated using Claude Sonnet 4.5 +""" + +import time +from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor, as_completed +from dataclasses import dataclass +from typing import List + +from chatdku.core.tools.llama_index_tools import DocRetrieverOuter + +DocumentRetriever = DocRetrieverOuter({}) + + +@dataclass +class QueryResult: + thread_id: int + query_size: str + semantic_query: str + success: bool + elapsed_time: float + num_results: int + error: str = None + + +def test_concurrent_queries(num_users: int = 3, rounds: int = 3): + """ + Load test with multiple concurrent users making queries of different sizes. + + Args: + num_users: Number of concurrent users to simulate + rounds: Number of rounds each user will query + """ + + # Query templates of different sizes + QUERY_TEMPLATES = { + "small": [ + ("hello", "COMPSCI"), + ("advisor", "courses"), + ("requirements", "prerequisites"), + ], + "medium": [ + ("How often should I visit my advisor?", "machine learning courses"), + ("What are the graduation requirements?", "software engineering electives"), + ("When should I register for classes?", "course registration deadlines"), + ], + "large": [ + ( + "What are the courses of Applied Mathematics and what prerequisites do I need?", + "applied mathematics prerequisites requirements", + ), + ( + "I'm interested in machine learning and artificial intelligence. What courses should I take?", + "machine learning AI courses curriculum path", + ), + ( + "Can you explain the difference between software engineering and computer science programs?", + "software engineering vs computer science degree requirements", + ), + ], + "extra_large": [ + ( + """The professor sent me this as requirement to be my SW mentor: + Please send me your CV and transcript. + In particular, please send me your planned proposal draft and try your best to answer the following: + Research topic and key question, + Existing works and their limitation, + Your Idea and workplan + Once I received the above, I will schedule an in-person meeting with you. + What do I need to do?""", + "senior project advisor requirements CV transcript proposal research", + ), + ], + } + + def user_query_task(user_id: int, round_num: int) -> QueryResult: + """Simulate a single user making a query""" + # Vary query sizes across users and rounds + query_sizes = ["small", "medium", "large", "extra_large"] + size_idx = (user_id + round_num) % len(query_sizes) + query_size = query_sizes[size_idx] + + # Select a query from the size category + queries = QUERY_TEMPLATES[query_size] + query_idx = user_id % len(queries) + semantic_q, keyword_q = queries[query_idx] + + start_time = time.time() + results, internal = DocumentRetriever(semantic_q, keyword_q) + elapsed = time.time() - start_time + + success = True if len(results) == 10 else False + + return QueryResult( + thread_id=user_id, + query_size=query_size, + semantic_query=( + semantic_q[:50] + "..." if len(semantic_q) > 50 else semantic_q + ), + success=success, + elapsed_time=elapsed, + num_results=len(results), + ) + + print(f"🚀 Starting load test: {num_users} concurrent users, {rounds} rounds each") + print(f"📊 Total queries: {num_users * rounds}\n") + + all_results: List[QueryResult] = [] + overall_start = time.time() + + # Execute queries concurrently + with ThreadPoolExecutor(max_workers=num_users) as executor: + futures = [] + for round_num in range(rounds): + for user_id in range(num_users): + future = executor.submit(user_query_task, user_id, round_num) + futures.append(future) + + # Collect results as they complete + for future in as_completed(futures): + result = future.result() + all_results.append(result) + + status = "✓" if result.success else "✗" + print( + f"{status} User {result.thread_id:2d} | {result.query_size:12s} | " + f"{result.elapsed_time:5.2f}s | {result.num_results:3d} results" + ) + + overall_elapsed = time.time() - overall_start + + # Analyze results + print("\n" + "=" * 70) + print("📈 LOAD TEST RESULTS") + print("=" * 70) + + successful = [r for r in all_results if r.success] + failed = [r for r in all_results if not r.success] + + print( + f"\n✓ Successful queries: {len(successful)}/{len(all_results)} " + f"({len(successful)/len(all_results)*100:.1f}%)" + ) + print( + f"✗ Failed queries: {len(failed)}/{len(all_results)} " + f"({len(failed)/len(all_results)*100:.1f}%)" + ) + + if successful: + times = [r.elapsed_time for r in successful] + print("\n⏱️ Response Times:") + print(f" Average: {sum(times)/len(times):.3f}s") + print(f" Min: {min(times):.3f}s") + print(f" Max: {max(times):.3f}s") + print(f" Median: {sorted(times)[len(times)//2]:.3f}s") + + # Break down by query size + print("\n📏 Performance by Query Size:") + by_size = defaultdict(list) + for r in successful: + by_size[r.query_size].append(r.elapsed_time) + + for size in ["small", "medium", "large", "extra_large"]: + if size in by_size: + times = by_size[size] + avg = sum(times) / len(times) + print(f" {size:12s}: {avg:.3f}s avg ({len(times)} queries)") + + print(f"\n⚡ Throughput: {len(all_results)/overall_elapsed:.2f} queries/second") + print(f"🕐 Total time: {overall_elapsed:.2f}s") + + if failed: + print("\n❌ Failed Query Details:") + for r in failed[:5]: # Show first 5 failures + print(f" User {r.thread_id} ({r.query_size}): {r.error}") + + # Assertions for test validation + assert len(successful) > 0, "All queries failed" + assert len(successful) / len(all_results) > 0.8, "Success rate below 80%" + + if successful: + avg_time = sum(r.elapsed_time for r in successful) / len(successful) + assert avg_time < 10, f"Average response time too high: {avg_time:.2f}s" + + print("\n✅ Load test completed successfully!") + return all_results + + +def test_same_query_concurrent(num_users: int = 3): + """ + Load test where all users make the SAME query simultaneously. + Tests caching behavior and concurrent access patterns. + """ + + SAME_QUERY = ( + "What are the machine learning courses available?", + "machine learning courses prerequisites", + ) + + print(f"🚀 Starting same-query load test: {num_users} users, same query") + print(f"📝 Query: '{SAME_QUERY[0]}'\n") + + results = [] + + def query_task(user_id: int): + start = time.time() + docs, internal = DocumentRetriever(SAME_QUERY[0], SAME_QUERY[1]) + elapsed = time.time() - start + doc_len = len(docs) + if doc_len == 10: + success = True + else: + doc_len = doc_len - 1 + success = False + + return (user_id, success, elapsed, doc_len, None) + + overall_start = time.time() + + with ThreadPoolExecutor(max_workers=num_users) as executor: + futures = [executor.submit(query_task, i) for i in range(num_users)] + + for future in as_completed(futures): + user_id, success, elapsed, num_results, error = future.result() + results.append((success, elapsed, num_results)) + + status = "✓" if success else "✗" + print( + f"{status} User {user_id:2d} | {elapsed:5.2f}s | {num_results:3d} results" + ) + + overall_elapsed = time.time() - overall_start + + # Analysis + print("\n" + "=" * 70) + print("📈 SAME-QUERY LOAD TEST RESULTS") + print("=" * 70) + + successful = [r for r in results if r[0]] + times = [r[1] for r in successful] + + print( + f"\n✓ Success rate: {len(successful)}/{len(results)} " + f"({len(successful)/len(results)*100:.1f}%)" + ) + + if times: + print("\n⏱️ Response Times:") + print(f" Average: {sum(times)/len(times):.3f}s") + print(f" Min: {min(times):.3f}s") + print(f" Max: {max(times):.3f}s") + print( + f" Std Dev: {(sum((t-sum(times)/len(times))**2 for t in times)/len(times))**0.5:.3f}s" + ) + + print(f"\n⚡ Throughput: {len(results)/overall_elapsed:.2f} queries/second") + print(f"🕐 Wall clock time: {overall_elapsed:.2f}s") + + print("\n✅ Same-query load test completed!") + return results diff --git a/tests/test_retriever.py b/tests/test_retriever.py index 3ba3536d..a2f09ffc 100644 --- a/tests/test_retriever.py +++ b/tests/test_retriever.py @@ -2,7 +2,7 @@ import pytest -from chatdku.core.tools.llama_index import ( +from chatdku.core.tools.llama_index_tools import ( KeywordRetrieverOuter, QueryTimeoutError, VectorRetrieverOuter,