From 658ba205c5386d9269c2623bb6e04493b66af802 Mon Sep 17 00:00:00 2001 From: pomegranar Date: Mon, 13 Apr 2026 22:58:06 +0800 Subject: [PATCH 1/6] added an awesome devsync command for quick agent iteration and testing. it uses UV and Rsync, and creates a new directory called ChatDKU-DevSync. I recommend everyone to move env variables to ~/.profile --- Documentations/Env-variables.md | 12 ++++++- Documentations/Phoenix.md | 2 +- devsync.sh | 56 +++++++++++++++++++++++++++++++++ 3 files changed, 68 insertions(+), 2 deletions(-) create mode 100755 devsync.sh diff --git a/Documentations/Env-variables.md b/Documentations/Env-variables.md index acbef929..5307d2f1 100644 --- a/Documentations/Env-variables.md +++ b/Documentations/Env-variables.md @@ -1,6 +1,16 @@ # Delete this for public version -Put these environment variables in your `.bashrc` or `.zshrc`. +Put these environment variables in `~/.profile`. + +Prefer `~/.profile` over `.bashrc` or `.zshrc` because: +- It is shell-agnostic (works for both bash and zsh users). +- It is sourced by login shells, so variables are available to all programs started from that session — including scripts that SSH in non-interactively. +- Unlike `.bashrc`/`.zshrc`, it is not loaded in interactive-only contexts that can cause errors in scripts (prompts, completions, plugins, etc.). + +If you use zsh and `~/.profile` is not being sourced automatically, add this to your `~/.zprofile`: +```bash +source ~/.profile +``` ``` export OPENAI_API_KEY='dummy' diff --git a/Documentations/Phoenix.md b/Documentations/Phoenix.md index 07d91e59..ac1b701d 100644 --- a/Documentations/Phoenix.md +++ b/Documentations/Phoenix.md @@ -21,7 +21,7 @@ pip install arize-phoenix ## Setup -Run the following command, or set it in your .bashrc: +Run the following command, or set it in your `~/.profile` (see `Env-variables.md`): ```bash export OTEL_EXPORTER_OTLP_HEADERS='Authorization=Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJqdGkiOiJBcGlLZXk6MyJ9.TTBhMzMyyevVPEQIGqVPbdzSW6V9QhnYQtErH-KCeqM' diff --git a/devsync.sh b/devsync.sh new file mode 100755 index 00000000..b92cde66 --- /dev/null +++ b/devsync.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# devsync.sh — rsync local sources to the dev server, then drop into an interactive agent session. +set -euo pipefail + +BOLD="\033[1m" +DIM="\033[2m" +CYAN="\033[36m" +GREEN="\033[32m" +YELLOW="\033[33m" +RED="\033[31m" +RESET="\033[0m" + +info() { echo -e "${CYAN}${BOLD}→${RESET} $*"; } +success() { echo -e "${GREEN}${BOLD}✓${RESET} $*"; } +step() { echo -e "${YELLOW}${BOLD}»${RESET} ${DIM}$*${RESET}"; } +warn() { echo -e "${RED}${BOLD}!${RESET} $*"; } + +_GH_USER="$(gh api user -q .login 2>/dev/null || true)" +_SSH_USER="${_GH_USER:-$(whoami)}" +SERVER="${CHATDKU_SERVER:-${_SSH_USER}@10.200.14.82}" +REMOTE_DIR="${CHATDKU_REMOTE_DIR:-~/ChatDKU-DevSync}" +LOCAL_DIR="$(git rev-parse --show-toplevel)" + +step "preparing remote directory $REMOTE_DIR on $SERVER" +ssh "${SERVER}" "mkdir -p ${REMOTE_DIR}" + +step "linking ~/.env → ${REMOTE_DIR}/.env" +ssh "${SERVER}" ' + if [ -f ~/.env ]; then + ln -sf ~/.env '"${REMOTE_DIR}"'/.env + else + echo "WARN: ~/.env not found on server — skipping link" + fi +' +if ssh "${SERVER}" '[ ! -f '"${REMOTE_DIR}"'/.env ]'; then + warn "no .env in ${REMOTE_DIR} — the agent may fail to start" +fi + +info "syncing ${BOLD}$LOCAL_DIR${RESET}${CYAN} → ${BOLD}$SERVER:$REMOTE_DIR" + +rsync -avz --delete \ + --exclude='.git/' \ + --exclude='__pycache__/' \ + --exclude='*.pyc' \ + --exclude='*.egg-info/' \ + --exclude='.env' \ + --exclude='node_modules/' \ + --exclude='frontend/build/' \ + --filter=':- .gitignore' \ + "$LOCAL_DIR/" \ + "$SERVER:$REMOTE_DIR/" + +success "synced" + +info "connecting to ${BOLD}$SERVER${RESET}${CYAN} — running agent" +ssh -t "${SERVER}" "bash -l -c 'cd ${REMOTE_DIR} && uv sync && uv run python -m chatdku.core.agent'" From 99280c7b85be5bd645b1f7f61db4bbb8615c2959 Mon Sep 17 00:00:00 2001 From: "coderabbitai[bot]" <136622811+coderabbitai[bot]@users.noreply.github.com> Date: Mon, 13 Apr 2026 15:38:38 +0000 Subject: [PATCH 2/6] fix: apply CodeRabbit auto-fixes Fixed 2 file(s) based on 2 unresolved review comments. Co-authored-by: CodeRabbit --- Documentations/Env-variables.md | 10 ++++++++-- devsync.sh | 14 +++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) mode change 100755 => 100644 devsync.sh diff --git a/Documentations/Env-variables.md b/Documentations/Env-variables.md index 5307d2f1..2a5181c3 100644 --- a/Documentations/Env-variables.md +++ b/Documentations/Env-variables.md @@ -4,9 +4,15 @@ Put these environment variables in `~/.profile`. Prefer `~/.profile` over `.bashrc` or `.zshrc` because: - It is shell-agnostic (works for both bash and zsh users). -- It is sourced by login shells, so variables are available to all programs started from that session — including scripts that SSH in non-interactively. +- It is sourced by login shells, so variables are available to all programs started from interactive login sessions. - Unlike `.bashrc`/`.zshrc`, it is not loaded in interactive-only contexts that can cause errors in scripts (prompts, completions, plugins, etc.). +**Important:** `~/.profile` is NOT sourced by non-interactive, non-login shells created by default for OpenSSH remote commands. For environment variables to be available in non-interactive SSH sessions, consider: +- Using `ssh -t` to force a pseudo-terminal and login shell (as done in `devsync.sh`) +- Setting `AcceptEnv` in SSH client config and `AcceptEnv`/`SetEnv` in sshd_config +- Using `~/.ssh/environment` (if `PermitUserEnvironment` is enabled on the server) +- Exporting variables directly in the remote command string + If you use zsh and `~/.profile` is not being sourced automatically, add this to your `~/.zprofile`: ```bash source ~/.profile @@ -30,4 +36,4 @@ export DB_PASSWORD="securepassword123" export DB_HOST="localhost" export DB_PORT="5432" export DB_NAME="chatdku_db" -``` +``` \ No newline at end of file diff --git a/devsync.sh b/devsync.sh old mode 100755 new mode 100644 index b92cde66..4b6b125e --- a/devsync.sh +++ b/devsync.sh @@ -22,17 +22,17 @@ REMOTE_DIR="${CHATDKU_REMOTE_DIR:-~/ChatDKU-DevSync}" LOCAL_DIR="$(git rev-parse --show-toplevel)" step "preparing remote directory $REMOTE_DIR on $SERVER" -ssh "${SERVER}" "mkdir -p ${REMOTE_DIR}" +ssh "${SERVER}" "mkdir -p \"${REMOTE_DIR}\"" step "linking ~/.env → ${REMOTE_DIR}/.env" -ssh "${SERVER}" ' +ssh "${SERVER}" " if [ -f ~/.env ]; then - ln -sf ~/.env '"${REMOTE_DIR}"'/.env + ln -sf ~/.env \"${REMOTE_DIR}\"/.env else - echo "WARN: ~/.env not found on server — skipping link" + echo \"WARN: ~/.env not found on server — skipping link\" fi -' -if ssh "${SERVER}" '[ ! -f '"${REMOTE_DIR}"'/.env ]'; then +" +if ssh "${SERVER}" "[ ! -f \"${REMOTE_DIR}\"/.env ]"; then warn "no .env in ${REMOTE_DIR} — the agent may fail to start" fi @@ -53,4 +53,4 @@ rsync -avz --delete \ success "synced" info "connecting to ${BOLD}$SERVER${RESET}${CYAN} — running agent" -ssh -t "${SERVER}" "bash -l -c 'cd ${REMOTE_DIR} && uv sync && uv run python -m chatdku.core.agent'" +ssh -t "${SERVER}" "bash -l -c 'cd \"${REMOTE_DIR}\" && uv sync && uv run python -m chatdku.core.agent'" \ No newline at end of file From ec160d6dd1843414f24b951c6757fa3cb2dd6f3f Mon Sep 17 00:00:00 2001 From: "Anar.N" Date: Tue, 14 Apr 2026 10:50:04 +0800 Subject: [PATCH 3/6] undid Coderabbit's junk commit, better documentation for env var handling. --- Documentations/Env-variables.md | 15 +++++++++------ devsync.sh | 14 +++++++------- 2 files changed, 16 insertions(+), 13 deletions(-) diff --git a/Documentations/Env-variables.md b/Documentations/Env-variables.md index 2a5181c3..62bc70a1 100644 --- a/Documentations/Env-variables.md +++ b/Documentations/Env-variables.md @@ -3,21 +3,24 @@ Put these environment variables in `~/.profile`. Prefer `~/.profile` over `.bashrc` or `.zshrc` because: + - It is shell-agnostic (works for both bash and zsh users). - It is sourced by login shells, so variables are available to all programs started from interactive login sessions. - Unlike `.bashrc`/`.zshrc`, it is not loaded in interactive-only contexts that can cause errors in scripts (prompts, completions, plugins, etc.). -**Important:** `~/.profile` is NOT sourced by non-interactive, non-login shells created by default for OpenSSH remote commands. For environment variables to be available in non-interactive SSH sessions, consider: -- Using `ssh -t` to force a pseudo-terminal and login shell (as done in `devsync.sh`) -- Setting `AcceptEnv` in SSH client config and `AcceptEnv`/`SetEnv` in sshd_config +**Important:** `~/.profile` is NOT sourced by non-interactive or non-login shells by default for SSH. For environment variables to be available in non-interactive SSH sessions, consider: + +- Adding `source $HOME/.profile` to your `~/.bashrc` and/or `~/.zshsrc` file (recommended). - Using `~/.ssh/environment` (if `PermitUserEnvironment` is enabled on the server) -- Exporting variables directly in the remote command string -If you use zsh and `~/.profile` is not being sourced automatically, add this to your `~/.zprofile`: +You can also manually source environment variables like so: + ```bash source ~/.profile ``` +## Important variables: + ``` export OPENAI_API_KEY='dummy' export PHOENIX_ENABLE_AUTH='True' @@ -36,4 +39,4 @@ export DB_PASSWORD="securepassword123" export DB_HOST="localhost" export DB_PORT="5432" export DB_NAME="chatdku_db" -``` \ No newline at end of file +``` diff --git a/devsync.sh b/devsync.sh index 4b6b125e..b92cde66 100644 --- a/devsync.sh +++ b/devsync.sh @@ -22,17 +22,17 @@ REMOTE_DIR="${CHATDKU_REMOTE_DIR:-~/ChatDKU-DevSync}" LOCAL_DIR="$(git rev-parse --show-toplevel)" step "preparing remote directory $REMOTE_DIR on $SERVER" -ssh "${SERVER}" "mkdir -p \"${REMOTE_DIR}\"" +ssh "${SERVER}" "mkdir -p ${REMOTE_DIR}" step "linking ~/.env → ${REMOTE_DIR}/.env" -ssh "${SERVER}" " +ssh "${SERVER}" ' if [ -f ~/.env ]; then - ln -sf ~/.env \"${REMOTE_DIR}\"/.env + ln -sf ~/.env '"${REMOTE_DIR}"'/.env else - echo \"WARN: ~/.env not found on server — skipping link\" + echo "WARN: ~/.env not found on server — skipping link" fi -" -if ssh "${SERVER}" "[ ! -f \"${REMOTE_DIR}\"/.env ]"; then +' +if ssh "${SERVER}" '[ ! -f '"${REMOTE_DIR}"'/.env ]'; then warn "no .env in ${REMOTE_DIR} — the agent may fail to start" fi @@ -53,4 +53,4 @@ rsync -avz --delete \ success "synced" info "connecting to ${BOLD}$SERVER${RESET}${CYAN} — running agent" -ssh -t "${SERVER}" "bash -l -c 'cd \"${REMOTE_DIR}\" && uv sync && uv run python -m chatdku.core.agent'" \ No newline at end of file +ssh -t "${SERVER}" "bash -l -c 'cd ${REMOTE_DIR} && uv sync && uv run python -m chatdku.core.agent'" From 721f2c28512bd2d3ab72b184ae4881f8fe3a6731 Mon Sep 17 00:00:00 2001 From: "Anar.N" Date: Tue, 14 Apr 2026 13:35:33 +0800 Subject: [PATCH 4/6] added shared secrets scripts. Uses a linux user group called chatdku_devs and hooks to bash and zsh --- Documentations/DevSync.md | 75 ++++++++++++++++ Documentations/Shared-Secrets.md | 100 ++++++++++++++++++++++ scripts/secrets/add_user.sh | 33 ++++++++ scripts/secrets/admin_setup.sh | 113 +++++++++++++++++++++++++ scripts/secrets/chatdku_env.sh.example | 25 ++++++ scripts/secrets/remove_user.sh | 31 +++++++ 6 files changed, 377 insertions(+) create mode 100644 Documentations/DevSync.md create mode 100644 Documentations/Shared-Secrets.md create mode 100755 scripts/secrets/add_user.sh create mode 100755 scripts/secrets/admin_setup.sh create mode 100644 scripts/secrets/chatdku_env.sh.example create mode 100755 scripts/secrets/remove_user.sh diff --git a/Documentations/DevSync.md b/Documentations/DevSync.md new file mode 100644 index 00000000..09bb8823 --- /dev/null +++ b/Documentations/DevSync.md @@ -0,0 +1,75 @@ +# DevSync + +`devsync.sh` is a local helper for fast agent iteration on a remote dev box. It +rsyncs your working tree up, links `~/.env` into the project, then drops you +into an interactive SSH session running the agent. + +Run from anywhere inside the repo: + +```bash +./devsync.sh +``` + +## What it does + +1. Resolves the remote user (prefers `gh api user`, falls back to `whoami`) and + the target host. +2. `ssh`es in, ensures `~/ChatDKU-DevSync` exists, and symlinks `~/.env` into + it so the agent can read secrets. +3. `rsync -avz --delete` from the repo root, respecting `.gitignore` and + skipping `.git/`, `__pycache__/`, `*.pyc`, `*.egg-info/`, `node_modules/`, + `frontend/build/`, and `.env`. +4. Opens an interactive shell that runs `uv sync && uv run python -m chatdku.core.agent`. + +## Configuration + +Everything is overridable via environment variables: + +| Variable | Default | Purpose | +| --------------------- | ------------------------------ | ---------------------------------------- | +| `CHATDKU_SERVER` | `@10.200.14.82` | Remote `user@host` for ssh and rsync. | +| `CHATDKU_REMOTE_DIR` | `~/ChatDKU-DevSync` | Path on the remote to sync into. | + +Set them in `~/.profile` (or your shell rc) if you want a non-default target: + +```bash +export CHATDKU_SERVER=myuser@some.host +export CHATDKU_REMOTE_DIR='~/ChatDKU-DevSync-experiment' +``` + +## Secrets on the remote + +The script expects a `~/.env` on the remote host and symlinks it into the +synced directory. Local `.env` files are intentionally **not** pushed +(`--exclude='.env'`). + +If the remote host is the shared dev box and you're a member of +`chatdku_devs`, you do not need a `~/.env` at all — secrets are loaded into +your shell automatically by the system-wide hook. See +[Shared-Secrets](Shared-Secrets.md) for setup and onboarding. In that case the +`WARN: ~/.env not found` message is harmless. + +## Platform support + +| Client OS | Works | Notes | +| --------------- | ----- | ------------------------------------------------------------------ | +| Linux | yes | Native. | +| macOS | yes | Native. `rsync` is preinstalled. | +| Windows (WSL) | yes | Use this — bash, rsync, ssh all Just Work. | +| Windows (Git Bash) | partial | Needs `rsync` installed separately (not bundled). Paths may need conversion. | +| Windows (native cmd/PowerShell) | no | No bash, no rsync. Use WSL. | + +Requirements on the client: `bash`, `ssh`, `rsync`, optionally `gh` (for user +resolution). + +## Troubleshooting + +- **`WARN: ~/.env not found on server`** — expected on the shared dev host for + `chatdku_devs` members. On any other host, create `~/.env` on the remote + before running again. +- **`rsync: command not found`** — not installed on the client. On Windows use + WSL; on minimal Linux images install via the package manager. +- **Wrong remote user** — the `gh` fallback picks up your GitHub login, which + may not match your SSH user. Set `CHATDKU_SERVER` explicitly. +- **Syncs too much / too little** — `--filter=':- .gitignore'` means rsync + honors your `.gitignore`. Add patterns there rather than editing the script. diff --git a/Documentations/Shared-Secrets.md b/Documentations/Shared-Secrets.md new file mode 100644 index 00000000..72f1cf2a --- /dev/null +++ b/Documentations/Shared-Secrets.md @@ -0,0 +1,100 @@ +# Shared Secrets + +> This document is only relevant for people developing ChatDKU. + +Shared project credentials (API keys, DB passwords) live in a single root-owned +file on the dev host and are auto-loaded into the shell of every `chatdku_devs` +group member. No per-user setup, no `.env` files to copy around. + +- **Master file:** `/datapool/secrets/chatdku_env.sh` (mode `640`, `root:chatdku_devs`) +- **Group:** `chatdku_devs` +- **Shell hooks:** `/etc/profile.d/chatdku.sh` (bash) and a guarded block in `/etc/zsh/zshrc` +- **Threat model:** prevent git leaks, laptop copies, and Slack pastes. Not + encrypted at rest — relies on filesystem perms and trust of group members. + +--- + +# For members + +**Nothing to do.** Once an admin adds you to `chatdku_devs`, log out and back in. +Every new shell will have the project env vars (`OPENAI_API_KEY`, +`REDIS_PASSWORD`, …) loaded automatically. + +Check it worked: + +```bash +groups | tr ' ' '\n' | grep chatdku_devs # should print the group name +echo "${REDIS_HOST:-unset}" # should print a hostname, not "unset" +``` + +If you see nothing, start a fresh login shell (`exit` + ssh back in). Group +membership is only refreshed on new login. + +**Do not copy the values into your own `.bashrc`, `.zshrc`, or project `.env` +files.** That defeats the rotation story. If you need a secret in a script, +read it from the env at runtime. + +--- + +# For admins + +Instructions below require sudo privileges. + +### One-time setup + +Run as root on the shared dev host: + +```bash +sudo ./scripts/secrets/admin_setup.sh +sudo $EDITOR /datapool/secrets/chatdku_env.sh # replace REPLACE_ME values +``` + +The script is idempotent — re-running it will not overwrite the master file or +duplicate the shell hooks. + +### Onboard a member + +```bash +sudo ./scripts/secrets/add_user.sh +``` + +Tell them to log out and back in. + +### Revoke a member + +```bash +sudo ./scripts/secrets/remove_user.sh +``` + +Their existing shells keep their env until logout, so **rotate any secret they +could have read** (edit the master file). Active processes holding a secret in +memory are out of scope — treat rotation as the real revocation. + +### Rotate a secret + +```bash +sudo $EDITOR /datapool/secrets/chatdku_env.sh +``` + +Members pick up the new value on their next login shell. For long-running +processes, restart them. + +### Add a new env var + +Edit the master file and add an `export FOO="..."` line. Also update +`scripts/secrets/chatdku_env.sh.example` (which is the git-tracked reference) so +new admins know the var exists. + +--- + +## Known limits/risks + +- **No encryption at rest.** Anyone with root on the dev host, or access to + `/datapool` backups, can read the file. If that's in-scope for your threat + model, switch to `sops` + `age`. +- **No access log.** You know who is in the group; you do not know who read + what or when. +- **Full env inheritance.** Every process a member runs inherits every secret. + A compromised user account leaks the full set. +- **New shell required.** Group membership and secret changes only apply to + new login shells, not to sessions already open. diff --git a/scripts/secrets/add_user.sh b/scripts/secrets/add_user.sh new file mode 100755 index 00000000..618b6db6 --- /dev/null +++ b/scripts/secrets/add_user.sh @@ -0,0 +1,33 @@ +#!/usr/bin/env bash +# Add a user to the chatdku_devs group. +# Usage: sudo ./add_user.sh + +set -euo pipefail + +GROUP="chatdku_devs" + +if [[ $EUID -ne 0 ]]; then + echo "error: must be run as root (try: sudo $0 )" >&2 + exit 1 +fi + +if [[ $# -ne 1 ]]; then + echo "usage: sudo $0 " >&2 + exit 1 +fi + +USER_TO_ADD="$1" + +if ! id -u "$USER_TO_ADD" > /dev/null 2>&1; then + echo "error: user '$USER_TO_ADD' does not exist" >&2 + exit 1 +fi + +if ! getent group "$GROUP" > /dev/null; then + echo "error: group '$GROUP' missing — run admin_setup.sh first" >&2 + exit 1 +fi + +usermod -aG "$GROUP" "$USER_TO_ADD" +echo "added '$USER_TO_ADD' to '$GROUP'" +echo "they must start a new login shell (log out / ssh back in) to pick up env" diff --git a/scripts/secrets/admin_setup.sh b/scripts/secrets/admin_setup.sh new file mode 100755 index 00000000..8efa934b --- /dev/null +++ b/scripts/secrets/admin_setup.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +# One-time admin setup for ChatDKU shared secrets. +# Run on the shared dev host as root: sudo ./admin_setup.sh +# +# Creates the chatdku_devs group, the master env file at +# /datapool/secrets/chatdku_env.sh, and system-wide shell hooks that +# auto-source it for group members. Idempotent — safe to re-run. + +set -euo pipefail + +GROUP="chatdku_devs" +SECRETS_DIR="/datapool/secrets" +SECRETS_FILE="${SECRETS_DIR}/chatdku_env.sh" +BASH_HOOK="/etc/profile.d/chatdku.sh" +ZSH_HOOK="/etc/zsh/zshrc" +ZSH_MARKER="# >>> chatdku shared env >>>" +ZSH_END_MARKER="# <<< chatdku shared env <<<" + +if [[ $EUID -ne 0 ]]; then + echo "error: must be run as root (try: sudo $0)" >&2 + exit 1 +fi + +# 1. Group +if ! getent group "$GROUP" > /dev/null; then + groupadd "$GROUP" + echo "created group: $GROUP" +else + echo "group $GROUP already exists" +fi + +# 2. Secrets dir — 750 so only group members can enter it +install -d -o root -g "$GROUP" -m 750 "$SECRETS_DIR" + +# 3. Seed master env file if missing +if [[ ! -f "$SECRETS_FILE" ]]; then + cat > "$SECRETS_FILE" <<'EOF' +# ChatDKU shared environment variables. +# Edited by project admins. Auto-sourced for members of chatdku_devs. +# Replace REPLACE_ME placeholders before handing out group access. + +# --- LLM / API credentials --- +export OPENAI_API_KEY="sk-REPLACE_ME" +export ANTHROPIC_API_KEY="sk-ant-REPLACE_ME" +export HF_TOKEN="hf_REPLACE_ME" + +# --- Redis (vector + keyword store) --- +export REDIS_HOST="redis.internal" +export REDIS_PORT="6379" +export REDIS_PASSWORD="REPLACE_ME" + +# --- Phoenix observability --- +export PHOENIX_API_KEY="REPLACE_ME" +export PHOENIX_COLLECTOR_ENDPOINT="http://phoenix.internal:6006" + +# --- Postgres (syllabi / course metadata) --- +export POSTGRES_HOST="pg.internal" +export POSTGRES_PORT="5432" +export POSTGRES_USER="chatdku" +export POSTGRES_PASSWORD="REPLACE_ME" +export POSTGRES_DB="chatdku" +EOF + echo "seeded $SECRETS_FILE — edit it to replace REPLACE_ME values" +else + echo "$SECRETS_FILE already exists, not overwriting" +fi +chown root:"$GROUP" "$SECRETS_FILE" +chmod 640 "$SECRETS_FILE" + +# 4. Bash hook (runs for login shells) +cat > "$BASH_HOOK" </dev/null | tr ' ' '\n' | grep -qx "$GROUP"; then + if [ -r "$SECRETS_FILE" ]; then + set -a + . "$SECRETS_FILE" + set +a + fi +fi +EOF +chmod 644 "$BASH_HOOK" +echo "installed $BASH_HOOK" + +# 5. Zsh hook (covers interactive non-login shells too) +if [[ -d /etc/zsh ]]; then + touch "$ZSH_HOOK" + if ! grep -qF "$ZSH_MARKER" "$ZSH_HOOK"; then + cat >> "$ZSH_HOOK" </dev/null | tr ' ' '\n' | grep -qx "$GROUP"; then + if [ -r "$SECRETS_FILE" ]; then + set -a + . "$SECRETS_FILE" + set +a + fi +fi +$ZSH_END_MARKER +EOF + echo "appended zsh hook to $ZSH_HOOK" + else + echo "zsh hook already present in $ZSH_HOOK" + fi +fi + +cat < + 3. They log out and back in — env is live. +EOF diff --git a/scripts/secrets/chatdku_env.sh.example b/scripts/secrets/chatdku_env.sh.example new file mode 100644 index 00000000..b5eee101 --- /dev/null +++ b/scripts/secrets/chatdku_env.sh.example @@ -0,0 +1,25 @@ +# ChatDKU shared environment variables — EXAMPLE / template. +# The real file lives at /datapool/secrets/chatdku_env.sh on the dev host +# and is auto-sourced for members of the chatdku_devs group. +# Never commit real values to git. + +# --- LLM / API credentials --- +export OPENAI_API_KEY="sk-REPLACE_ME" +export ANTHROPIC_API_KEY="sk-ant-REPLACE_ME" +export HF_TOKEN="hf_REPLACE_ME" + +# --- Redis (vector + keyword store) --- +export REDIS_HOST="redis.internal" +export REDIS_PORT="6379" +export REDIS_PASSWORD="REPLACE_ME" + +# --- Phoenix observability --- +export PHOENIX_API_KEY="REPLACE_ME" +export PHOENIX_COLLECTOR_ENDPOINT="http://phoenix.internal:6006" + +# --- Postgres (syllabi / course metadata) --- +export POSTGRES_HOST="pg.internal" +export POSTGRES_PORT="5432" +export POSTGRES_USER="chatdku" +export POSTGRES_PASSWORD="REPLACE_ME" +export POSTGRES_DB="chatdku" diff --git a/scripts/secrets/remove_user.sh b/scripts/secrets/remove_user.sh new file mode 100755 index 00000000..9c86ffc1 --- /dev/null +++ b/scripts/secrets/remove_user.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash +# Remove a user from the chatdku_devs group. +# Usage: sudo ./remove_user.sh +# +# Reminder: after revoking access, rotate any secret the user could read. +# Their existing shells keep their env until they log out. + +set -euo pipefail + +GROUP="chatdku_devs" + +if [[ $EUID -ne 0 ]]; then + echo "error: must be run as root (try: sudo $0 )" >&2 + exit 1 +fi + +if [[ $# -ne 1 ]]; then + echo "usage: sudo $0 " >&2 + exit 1 +fi + +USER_TO_REMOVE="$1" + +if ! id -u "$USER_TO_REMOVE" > /dev/null 2>&1; then + echo "error: user '$USER_TO_REMOVE' does not exist" >&2 + exit 1 +fi + +gpasswd -d "$USER_TO_REMOVE" "$GROUP" +echo "removed '$USER_TO_REMOVE' from '$GROUP'" +echo "next: rotate any secrets they may have seen — edit /datapool/secrets/chatdku_env.sh" From e785336fa41e670a2d63891e28ecb190a6ff7f73 Mon Sep 17 00:00:00 2001 From: "Anar.N" Date: Tue, 14 Apr 2026 14:56:55 +0800 Subject: [PATCH 5/6] Rename shadowing llama_index module and extend devsync.sh for use with file or module arguments. --- Documentations/DevSync.md | 8 ++- GUIDE.md | 63 +++++++++++++------ chatdku/core/agent.py | 2 +- .../{llama_index.py => llama_index_tools.py} | 0 chatdku/django/chatdku_django/chat/tools.py | 2 +- chatdku/django/chatdku_django/chat/views.py | 2 +- devsync.sh | 43 ++++++++++++- tests/test_load_retriever.py | 2 +- tests/test_retriever.py | 2 +- 9 files changed, 96 insertions(+), 28 deletions(-) rename chatdku/core/tools/{llama_index.py => llama_index_tools.py} (100%) diff --git a/Documentations/DevSync.md b/Documentations/DevSync.md index 09bb8823..5bb481a3 100644 --- a/Documentations/DevSync.md +++ b/Documentations/DevSync.md @@ -7,9 +7,15 @@ into an interactive SSH session running the agent. Run from anywhere inside the repo: ```bash -./devsync.sh +./devsync.sh # default: python -m chatdku.core.agent +./devsync.sh chatdku/core/agent.py # as a file path +./devsync.sh chatdku.core.agent # as a module (runs with python -m) ``` +Arguments containing `/` or ending in `.py` are run as file paths. Everything +else is treated as a module name and run with `python -m`. Absolute local +paths are accepted and automatically stripped to repo-relative before sync. + ## What it does 1. Resolves the remote user (prefers `gh api user`, falls back to `whoami`) and diff --git a/GUIDE.md b/GUIDE.md index 016601a1..c9339918 100644 --- a/GUIDE.md +++ b/GUIDE.md @@ -2,19 +2,20 @@ This is a set of guides intended for you to get ready to contribute to our project. This guide is intended for **newcomers**, as well as, our **members**. -I (Temuulen) will be explaining our core dependencies as well as any other useful stuff you should learn about before getting into coding. +I (Temuulen) will be explaining our core dependencies as well as any other useful stuff you should learn about before getting into coding. > [!IMPORTANT] -> This is a work in progess. Please tell me what you don't understand about this guide and our project and I will add it to this document for future use. +> This is a work in progess. Please tell me what you don't understand about this guide and our project and I will add it to this document for future use. When I was coming into this project, even though it was structured very clearly, it was hard to get my head around everything. I felt like the code was just very messy and there were just a lot of things that did not have clear explanations. -And most of our code is like that even today. However, with this guide I hope you will at least have some support and start contributing faster. +And most of our code is like that even today. However, with this guide I hope you will at least have some support and start contributing faster. -> Please remember that at first you will be learning *slow* to **develop** faster in the future by following this guide. +> Please remember that at first you will be learning _slow_ to **develop** faster in the future by following this guide. Here are some list of members and their respective roles they **self-assigned** themselves into: + - Anar: Frontend (React.js), Syllabi SQL agent tool - Munish: Backend (Flask, Django), System health monitoring - Temuulen: Agent logic (DSPy), Document ingestion Logic (Transferring to ZhiWei) @@ -23,10 +24,11 @@ Here are some list of members and their respective roles they **self-assigned** ### 1. Python -First, obviously you need to know python. While we don't require you to be a pythonic expert, a quality code is generally preferred. So, what makes a code ***good code***? +First, obviously you need to know python. While we don't require you to be a pythonic expert, a quality code is generally preferred. So, what makes a code **_good code_**? This is completely subjective, but there are some qualities that you can start from: -- Functions have [docstrings](https://numpydoc.readthedocs.io/en/latest/format.html) + +- Functions have [docstrings](https://numpydoc.readthedocs.io/en/latest/format.html) - Account for future contributers to understand the code - Obvious naming practices and using python naming practices. @@ -39,13 +41,13 @@ I mean I can go on and on about coding practices. What you need to understand is > While these things seem very annoying at first, believe me that they will help. > When I come back to DKU next Spring, I plan to give every member a crash course on a new GIT workflow. Please read all the articles I will be linking to. -Git is a version control system that intelligently tracks changes in files. +Git is a version control system that intelligently tracks changes in files. Git is particularly useful when you and a group of people are all making changes to the same files at the same time. Typically, to do this in a Git-based workflow, you would: -- Create a branch to ***show the intent of your work***. -- Create issues ***before*** you do the work/code. +- Create a branch to **_show the intent of your work_**. +- Create issues **_before_** you do the work/code. - Make edits to the files independently and safely on your own personal branch. - Close or update issues [with your commits or Merge Requests](https://docs.gitlab.com/user/project/issues/managing_issues/#closing-issues-automatically) - Let Git intelligently merge your specific changes back into the main copy of files, so that your changes don't impact other people's updates. @@ -55,27 +57,29 @@ Typically, to do this in a Git-based workflow, you would: > Our `Main` branch is a **SACRED** branch. DO NOT PUSH CODE WITHOUT PROPER REVIEW FROM OTHER MEMBERS. Please read these articles: + - [Github Flow](https://docs.github.com/en/get-started/using-github/github-flow) -- [Always start with an issue](https://web.archive.org/web/20230214040753/https://about.gitlab.com/blog/2016/03/03/start-with-an-issue/) - - Try creating an issue now on what you want to do next. - - Also if you don't see our issue board under the projects tab in our repo. Please contact Mingxi and ask to be added to the Project issue board. +- [Always start with an issue](https://web.archive.org/web/20230214040753/https://about.gitlab.com/blog/2016/03/03/start-with-an-issue/) + - Try creating an issue now on what you want to do next. + - Also if you don't see our issue board under the projects tab in our repo. Please contact Mingxi and ask to be added to the Project issue board. - [Write good commit messages!](https://cbea.ms/git-commit/) - [Issue board](https://about.gitlab.com/blog/announcing-the-gitlab-issue-board/) - - While we are not using Gitlab, Github has the same feature called "Project". -- [It's all connected in Gitlab](https://about.gitlab.com/2016/03/08/gitlab-tutorial-its-all-connected/) - - Again, Github has the equilavent features at [here](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls) + - While we are not using Gitlab, Github has the same feature called "Project". +- [It's all connected in Gitlab](https://about.gitlab.com/2016/03/08/gitlab-tutorial-its-all-connected/) + - Again, Github has the equilavent features at [here](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls) As you incorperate these steps into your developer journey, you will be better equipped for real world team-coding. -All the industry experts follow some form of stardards using GIT. You should learn to use it properly while you are here with us. +All the industry experts follow some form of stardards using GIT. You should learn to use it properly while you are here with us. -And [here is a longer video](https://www.youtube.com/watch?v=1ffBJ4sVUb4) that gives you more in-depth details on how GIT works. +And [here is a longer video](https://www.youtube.com/watch?v=1ffBJ4sVUb4) that gives you more in-depth details on how GIT works. -Here is an [interactive](https://learngitbranching.js.org/?locale=en_US) Git simulator for you to practice. +Here is an [interactive](https://learngitbranching.js.org/?locale=en_US) Git simulator for you to practice. ### 3. Using the Terminal Using the terminal, you can do a lot of stuff with it. I assure you that to get better at it you just have to use it daily. At first you might google a lot of stuff, and that is **okay!**. All of us started out like that. Here are some of the common commands I use when working with CHATDKU: + - `ssh`: Used to connect to our server - `git`: Working with github - `sftp`: ssh like file transferring @@ -93,9 +97,30 @@ Please be careful when interacting with Docker. It hosts our Embedding Model, Ve - For creating tools: https://github.com/Glitterccc/ChatDKU/issues/122 - Arize Phoenix for observability: https://arize.com/docs/phoenix +### Iterating on the agent with `devsync.sh` + +Edit code on your laptop, then push and run it on the shared dev box in one +command. From the repo root: + +```bash +./devsync.sh # runs the agent +``` + +```bash +./devsync.sh chatdku/core/tools/your_file.py # runs any file you're hacking on +``` + +The script rsyncs your working tree, runs `uv sync`, and drops you into a live +session on the remote. Your `.venv/`, `.env`, and `.git/` are left alone. + +See [Documentations/DevSync.md](Documentations/DevSync.md) for configuration, +Windows-specific notes, and troubleshooting. If you're new, also skim +[Documentations/Shared-Secrets.md](Documentations/Shared-Secrets.md) — once an +admin adds you to `chatdku_devs`, all project secrets load into your remote +shell automatically, no `.env` copying needed. + ### Document ingestion - Llamaindex for document ingestion: https://developers.llamaindex.ai/python/framework/getting_started/concepts - ChromaDB for vector store: https://docs.trychroma.com/docs/overview/introduction - Redis for keyword search: https://redis.io/docs/latest/develop/ - diff --git a/chatdku/core/agent.py b/chatdku/core/agent.py index 1eff9bb7..251f0c80 100755 --- a/chatdku/core/agent.py +++ b/chatdku/core/agent.py @@ -10,7 +10,7 @@ from chatdku.core.dspy_classes.plan import Planner, format_trajectory from chatdku.core.dspy_classes.synthesizer import Synthesizer from chatdku.core.tools.get_prerequisites import PrerequisiteLookupOuter -from chatdku.core.tools.llama_index import KeywordRetrieverOuter, VectorRetrieverOuter +from chatdku.core.tools.llama_index_tools import KeywordRetrieverOuter, VectorRetrieverOuter from chatdku.core.tools.syllabi_tool.query_curriculum_db import QueryCurriculumOuter from chatdku.core.utils import load_conversation, span_start from chatdku.setup import setup, use_phoenix diff --git a/chatdku/core/tools/llama_index.py b/chatdku/core/tools/llama_index_tools.py similarity index 100% rename from chatdku/core/tools/llama_index.py rename to chatdku/core/tools/llama_index_tools.py diff --git a/chatdku/django/chatdku_django/chat/tools.py b/chatdku/django/chatdku_django/chat/tools.py index 738d5387..98a5bd0a 100644 --- a/chatdku/django/chatdku_django/chat/tools.py +++ b/chatdku/django/chatdku_django/chat/tools.py @@ -1,4 +1,4 @@ -from chatdku.core.tools.llama_index import KeywordRetrieverOuter, VectorRetrieverOuter +from chatdku.core.tools.llama_index_tools import KeywordRetrieverOuter, VectorRetrieverOuter from chatdku.core.tools.syllabi_tool.query_curriculum_db import QueryCurriculumOuter diff --git a/chatdku/django/chatdku_django/chat/views.py b/chatdku/django/chatdku_django/chat/views.py index 65af7a08..5b39d547 100644 --- a/chatdku/django/chatdku_django/chat/views.py +++ b/chatdku/django/chatdku_django/chat/views.py @@ -30,7 +30,7 @@ from rest_framework.views import APIView from chatdku.core.agent import Agent -from chatdku.core.tools.llama_index import KeywordRetrieverOuter, VectorRetrieverOuter +from chatdku.core.tools.llama_index_tools import KeywordRetrieverOuter, VectorRetrieverOuter from chatdku.core.tools.syllabi_tool.query_curriculum_db import QueryCurriculumOuter from chat.tools import get_tools diff --git a/devsync.sh b/devsync.sh index b92cde66..e2b4c083 100644 --- a/devsync.sh +++ b/devsync.sh @@ -1,5 +1,12 @@ #!/usr/bin/env bash -# devsync.sh — rsync local sources to the dev server, then drop into an interactive agent session. +# devsync.sh — rsync local sources to the dev server, then run Python remotely. +# +# Usage: +# ./devsync.sh # runs: python -m chatdku.core.agent +# ./devsync.sh path/to/file.py # runs: python path/to/file.py +# ./devsync.sh chatdku.core.agent # runs: python -m chatdku.core.agent +# Arguments with `/` or a `.py` suffix are treated as file paths; everything +# else is treated as a module name and run with `python -m`. set -euo pipefail BOLD="\033[1m" @@ -21,6 +28,34 @@ SERVER="${CHATDKU_SERVER:-${_SSH_USER}@10.200.14.82}" REMOTE_DIR="${CHATDKU_REMOTE_DIR:-~/ChatDKU-DevSync}" LOCAL_DIR="$(git rev-parse --show-toplevel)" +# Accept a leading `-m` / `--module` flag for familiarity; we always decide +# file-vs-module from the argument shape below. +if [[ "${1:-}" == "-m" || "${1:-}" == "--module" ]]; then + shift +fi + +TARGET="${1:-}" +if [[ -n "$TARGET" ]]; then + if [[ "$TARGET" != *"/"* && "$TARGET" != *.py ]]; then + # Looks like a module (e.g. chatdku.core.agent) — run with -m + REMOTE_RUN_CMD="uv run python -m $(printf %q "$TARGET")" + RUN_DESC="python -m $TARGET" + else + # Treat as a file path + if [[ "$TARGET" = /* ]]; then + TARGET="${TARGET#"$LOCAL_DIR"/}" + fi + if [[ ! -f "$LOCAL_DIR/$TARGET" ]]; then + warn "target '$TARGET' not found under $LOCAL_DIR — syncing anyway" + fi + REMOTE_RUN_CMD="uv run python $(printf %q "$TARGET")" + RUN_DESC="python $TARGET" + fi +else + REMOTE_RUN_CMD="uv run python -m chatdku.core.agent" + RUN_DESC="agent" +fi + step "preparing remote directory $REMOTE_DIR on $SERVER" ssh "${SERVER}" "mkdir -p ${REMOTE_DIR}" @@ -40,6 +75,8 @@ info "syncing ${BOLD}$LOCAL_DIR${RESET}${CYAN} → ${BOLD}$SERVER:$REMOTE_DIR" rsync -avz --delete \ --exclude='.git/' \ + --exclude='.venv/' \ + --exclude='venv/' \ --exclude='__pycache__/' \ --exclude='*.pyc' \ --exclude='*.egg-info/' \ @@ -52,5 +89,5 @@ rsync -avz --delete \ success "synced" -info "connecting to ${BOLD}$SERVER${RESET}${CYAN} — running agent" -ssh -t "${SERVER}" "bash -l -c 'cd ${REMOTE_DIR} && uv sync && uv run python -m chatdku.core.agent'" +info "connecting to ${BOLD}$SERVER${RESET}${CYAN} — running ${BOLD}${RUN_DESC}${RESET}" +ssh -t "${SERVER}" "bash -l -c 'cd ${REMOTE_DIR} && uv sync && ${REMOTE_RUN_CMD}'" diff --git a/tests/test_load_retriever.py b/tests/test_load_retriever.py index a1dec823..c1aedf14 100644 --- a/tests/test_load_retriever.py +++ b/tests/test_load_retriever.py @@ -9,7 +9,7 @@ from dataclasses import dataclass from typing import List -from chatdku.core.tools.llama_index import DocRetrieverOuter +from chatdku.core.tools.llama_index_tools import DocRetrieverOuter DocumentRetriever = DocRetrieverOuter({}) diff --git a/tests/test_retriever.py b/tests/test_retriever.py index 3ba3536d..a2f09ffc 100644 --- a/tests/test_retriever.py +++ b/tests/test_retriever.py @@ -2,7 +2,7 @@ import pytest -from chatdku.core.tools.llama_index import ( +from chatdku.core.tools.llama_index_tools import ( KeywordRetrieverOuter, QueryTimeoutError, VectorRetrieverOuter, From 81515b68c8fad7542fe767d40d0e2bc403dffb76 Mon Sep 17 00:00:00 2001 From: Ar-temis Date: Tue, 14 Apr 2026 15:11:47 +0800 Subject: [PATCH 6/6] Fixed some spelling errors --- GUIDE.md | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/GUIDE.md b/GUIDE.md index c9339918..f89794c3 100644 --- a/GUIDE.md +++ b/GUIDE.md @@ -5,7 +5,7 @@ This guide is intended for **newcomers**, as well as, our **members**. I (Temuulen) will be explaining our core dependencies as well as any other useful stuff you should learn about before getting into coding. > [!IMPORTANT] -> This is a work in progess. Please tell me what you don't understand about this guide and our project and I will add it to this document for future use. +> This is a work in progress. Please tell me what you don't understand about this guide and our project and I will add it to this document for future use. When I was coming into this project, even though it was structured very clearly, it was hard to get my head around everything. I felt like the code was just very messy and there were just a lot of things that did not have clear explanations. @@ -29,7 +29,7 @@ First, obviously you need to know python. While we don't require you to be a pyt This is completely subjective, but there are some qualities that you can start from: - Functions have [docstrings](https://numpydoc.readthedocs.io/en/latest/format.html) -- Account for future contributers to understand the code +- Account for future contributors to understand the code - Obvious naming practices and using python naming practices. I mean I can go on and on about coding practices. What you need to understand is that you need to build scalable code, accounting for any other person to review your code and understand it. @@ -58,18 +58,18 @@ Typically, to do this in a Git-based workflow, you would: Please read these articles: -- [Github Flow](https://docs.github.com/en/get-started/using-github/github-flow) +- [GitHub Flow](https://docs.github.com/en/get-started/using-github/github-flow) - [Always start with an issue](https://web.archive.org/web/20230214040753/https://about.gitlab.com/blog/2016/03/03/start-with-an-issue/) - Try creating an issue now on what you want to do next. - Also if you don't see our issue board under the projects tab in our repo. Please contact Mingxi and ask to be added to the Project issue board. - [Write good commit messages!](https://cbea.ms/git-commit/) - [Issue board](https://about.gitlab.com/blog/announcing-the-gitlab-issue-board/) - - While we are not using Gitlab, Github has the same feature called "Project". + - While we are not using GitLab, GitHub has the same feature called "Project". - [It's all connected in Gitlab](https://about.gitlab.com/2016/03/08/gitlab-tutorial-its-all-connected/) - - Again, Github has the equilavent features at [here](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls) + - Again, GitHub has the equivalent features at [here](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/autolinked-references-and-urls) -As you incorperate these steps into your developer journey, you will be better equipped for real world team-coding. -All the industry experts follow some form of stardards using GIT. You should learn to use it properly while you are here with us. +As you incorporate these steps into your developer journey, you will be better equipped for real world team-coding. +All the industry experts follow some form of standards using GIT. You should learn to use it properly while you are here with us. And [here is a longer video](https://www.youtube.com/watch?v=1ffBJ4sVUb4) that gives you more in-depth details on how GIT works. @@ -81,13 +81,13 @@ Using the terminal, you can do a lot of stuff with it. I assure you that to get All of us started out like that. Here are some of the common commands I use when working with CHATDKU: - `ssh`: Used to connect to our server -- `git`: Working with github +- `git`: Working with GitHub - `sftp`: ssh like file transferring - `nvidia-smi`: Used to inspect GPUs Again, just google these stuff and learn. Good luck! It will be worth it. -## Role-specific guides +## Role-specific Guides Please be careful when interacting with Docker. It hosts our Embedding Model, Vector Database, and Redis Database. @@ -99,7 +99,7 @@ Please be careful when interacting with Docker. It hosts our Embedding Model, Ve ### Iterating on the agent with `devsync.sh` -Edit code on your laptop, then push and run it on the shared dev box in one +Edit code on your laptop, then push and run it on the shared dev server in one command. From the repo root: ```bash @@ -119,7 +119,7 @@ Windows-specific notes, and troubleshooting. If you're new, also skim admin adds you to `chatdku_devs`, all project secrets load into your remote shell automatically, no `.env` copying needed. -### Document ingestion +### Document Ingestion - Llamaindex for document ingestion: https://developers.llamaindex.ai/python/framework/getting_started/concepts - ChromaDB for vector store: https://docs.trychroma.com/docs/overview/introduction