From f066fd0f66129a622fe5371bc270d6cc66982403 Mon Sep 17 00:00:00 2001 From: MinaNourhashemi Date: Mon, 4 May 2026 16:34:24 -0700 Subject: [PATCH 1/3] docs: add ci_cd and python rules, reference publishing.md --- .rules/ci_cd.md | 42 ++++++++++++++++++++++++++++++++++++++++++ .rules/python.md | 27 +++++++++++++++++++++++++++ CLAUDE.md | 3 +++ 3 files changed, 72 insertions(+) create mode 100644 .rules/ci_cd.md create mode 100644 .rules/python.md diff --git a/.rules/ci_cd.md b/.rules/ci_cd.md new file mode 100644 index 0000000..3c2674e --- /dev/null +++ b/.rules/ci_cd.md @@ -0,0 +1,42 @@ +# CI/CD Workflow Standards + +## Current Pipelines + +### Typo Check (`typos.yml`) +Runs on every push/PR. Catches spelling errors in all Markdown and text files. +Config: `.typos.toml` (custom overrides for technical terms). + +## Adding New Workflows + +### Triggers +- `on: [push, pull_request]` for quality gates +- `on: push: branches: [main]` for deploy/publish steps +- Always pin action versions: `actions/checkout@v4` (not `@master`) + +### Pipeline Order (fail fast, cheap first) +1. Lint/typo check +2. Link validation (broken URLs) +3. Build (if applicable) +4. Deploy (main branch only) + +## Markdown/Content Checks to Add + +```yaml +# Example: broken link check +- name: Check links + uses: lycheeverse/lychee-action@v1 + with: + args: --verbose --no-progress '**/*.md' +``` + +## Key Practices +- Never commit secrets; use GitHub Secrets +- Deploy (osc-docs publish) only from protected main branch +- Document required environment setup in session READMEs + +## Week 4 Reference +Week 4 of the course covers CI/CD in depth. Use this repo's workflows +as live examples during that session. + +--- +*Every workflow failure is a production bug prevented.* diff --git a/.rules/python.md b/.rules/python.md new file mode 100644 index 0000000..5ec1435 --- /dev/null +++ b/.rules/python.md @@ -0,0 +1,27 @@ +# Python Standards (Practicum Code) + +## Environment +- **Package Manager:** UV only (not pip, conda, or virtualenv) +- **Config:** `pyproject.toml` + +## Quick Reference +```bash +uv init my-analysis && cd my-analysis +uv add numpy pandas mne +uv run python analysis.py +uv run pytest +``` + +## Style +- Formatter: `ruff format` +- Linter: `ruff check --fix` +- Type hints on all public functions + +## Never Do This +- Never `pip install`; use `uv add` +- Never use `os.path`; use `pathlib.Path` +- Never bare `except:` or silent `pass` +- Never commit `.env` or hardcoded credentials + +--- +*UV for everything. Ruff for style. Real data for tests.* diff --git a/CLAUDE.md b/CLAUDE.md index 3943e60..b1c8b79 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -70,12 +70,15 @@ The live demos use HBN EEG data to analyze neural responses to movie shot change - `.context/ideas.md` -- Pedagogical decisions, content design - `.context/research.md` -- Technical investigations, tool evaluations - `.context/scratch_history.md` -- Failed attempts, lessons learned +- `.context/publishing.md` -- Step-by-step workflow for publishing sessions to courses.osc.earth ## Rules - `.rules/git.md` -- Version control standards - `.rules/documentation.md` -- Content and documentation standards - `.rules/code_review.md` -- PR review process - `.rules/self_improve.md` -- Evolving course standards from experience +- `.rules/ci_cd.md` -- GitHub Actions workflow standards (see week 4) +- `.rules/python.md` -- Python/UV standards for practicum code --- Check .context/plan.md for what to work on next. From 8fd9a9a2e662b3a1466cd669a2ac550b461cf579 Mon Sep 17 00:00:00 2001 From: MinaNourhashemi Date: Mon, 4 May 2026 17:07:26 -0700 Subject: [PATCH 2/3] feat: Phase 1 preprocessing pipeline (R3-mini, closes #11) --- .gitignore | 7 ++ .../week-03/practicum/config/cfg_r3mini.m | 33 ++++++++ sessions/week-03/practicum/phase1/README.md | 63 +++++++++++++++ .../practicum/phase1/p1_channel_reject.m | 49 +++++++++++ .../week-03/practicum/phase1/p1_cleanline.m | 38 +++++++++ .../week-03/practicum/phase1/p1_highpass.m | 16 ++++ .../week-03/practicum/phase1/p1_import_bids.m | 26 ++++++ .../week-03/practicum/phase1/run_phase1.m | 81 +++++++++++++++++++ 8 files changed, 313 insertions(+) create mode 100644 sessions/week-03/practicum/config/cfg_r3mini.m create mode 100644 sessions/week-03/practicum/phase1/README.md create mode 100644 sessions/week-03/practicum/phase1/p1_channel_reject.m create mode 100644 sessions/week-03/practicum/phase1/p1_cleanline.m create mode 100644 sessions/week-03/practicum/phase1/p1_highpass.m create mode 100644 sessions/week-03/practicum/phase1/p1_import_bids.m create mode 100644 sessions/week-03/practicum/phase1/run_phase1.m diff --git a/.gitignore b/.gitignore index 1a3e995..32c80c6 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,10 @@ node_modules/ .env .env.local + +# EEG data -- large binary files, never committed +derivatives/ +*.set +*.fdt +*.bdf +*.edf diff --git a/sessions/week-03/practicum/config/cfg_r3mini.m b/sessions/week-03/practicum/config/cfg_r3mini.m new file mode 100644 index 0000000..1858ef7 --- /dev/null +++ b/sessions/week-03/practicum/config/cfg_r3mini.m @@ -0,0 +1,33 @@ +function cfg = cfg_r3mini() +% Configuration for HBN R3-mini (100 Hz, 20 subjects, development run). +% Switch to cfg_r3full.m once the pipeline validates end-to-end on mini. + +% Data paths -- update bids_root to your local R3-mini copy. +% Download: aws s3 sync s3://fcp-indi/data/Projects/HBN/BIDS_EEG/cmi_bids_R3 \ +% --exclude "*" --include "sub-NDAR*/ses-HBNsite*/eeg/*ThePresent*" +% (Select 20 subjects from participants.tsv with EEG availability flag = 1.) +cfg.bids_root = fullfile(getenv('HOME'), 'data', 'HBN', 'R3-mini'); +cfg.deriv_root = fullfile(fileparts(mfilename('fullpath')), '..', '..', '..', 'derivatives'); + +% Task +cfg.task = 'ThePresent'; + +% Sampling rate -- R3-mini is already downsampled to 100 Hz. +% Full R3 is 500 Hz; resample before this pipeline if using full data. +cfg.srate = 100; + +% Preprocessing parameters +cfg.highpass_hz = 1; % Hz; FIR highpass via pop_eegfiltnew +cfg.cleanline_hz = [60 120 180]; % US line noise harmonics; adjust for 50 Hz countries + +% clean_rawdata channel rejection thresholds (ASR and window rejection off). +% ChannelCriterion: correlation with neighboring channels (0.85 is conservative). +% FlatlineCriterion: max flat-line duration in seconds before channel is dropped. +% LineNoiseCriterion: max line-noise Z-score (applied after CleanLine; catches residual). +cfg.chan_criterion = 0.85; +cfg.flatline_criterion = 5; +cfg.linenoise_criterion = 4; + +% subjects: cell array of subject IDs to process, or {} to use all found in bids_root. +% Explicit list is preferred for reproducibility on R3-mini. +cfg.subjects = {}; diff --git a/sessions/week-03/practicum/phase1/README.md b/sessions/week-03/practicum/phase1/README.md new file mode 100644 index 0000000..bcd02a0 --- /dev/null +++ b/sessions/week-03/practicum/phase1/README.md @@ -0,0 +1,63 @@ +# Phase 1: BIDS Import and Preprocessing + +Part of the HBN ERSP practicum (see `../project_brief.md`). + +## What this phase does + +Imports the HBN R3-mini BIDS dataset and runs four preprocessing steps, producing one cleaned EEG set per subject in `derivatives/preproc/`. + +| Step | Function | Operation | +|------|----------|-----------| +| 1 | `p1_import_bids` | `pop_importbids` -- loads BIDS, attaches channel locations and events | +| 2 | `p1_highpass` | `pop_eegfiltnew` -- 1 Hz FIR highpass to remove DC drift | +| 3 | `p1_cleanline` | `pop_cleanline` -- removes 60/120/180 Hz line noise | +| 4 | `p1_channel_reject` | `clean_rawdata` -- drops flat/noisy channels (ASR off) | + +## Prerequisites + +- EEGLAB 2024+ on MATLAB path +- Plugins: `Biosig`, `CleanLine`, `clean_rawdata` (ships with EEGLAB 2024) +- matlab-mcp-tools configured if driving from Claude Code +- R3-mini data downloaded to `~/data/HBN/R3-mini` (or update `config/cfg_r3mini.m`) + +## Running + +**Interactive (MATLAB command window):** +```matlab +addpath(genpath('sessions/week-03/practicum')); +run_phase1('r3mini'); +``` + +**Command line (from repo root):** +```bash +matlab -nodisplay -nosplash \ + -r "addpath(genpath('sessions/week-03/practicum')); run_phase1('r3mini'); exit" +``` + +## Configuration + +Edit `config/cfg_r3mini.m` to set: +- `cfg.bids_root` -- path to your local R3-mini copy +- `cfg.subjects` -- list specific subject IDs or leave `{}` for all found + +## Outputs + +``` +derivatives/ +└── preproc/ + ├── _preproc.set (one per subject, not committed to git) + └── phase1_report.mat (channel retention counts, committed if small) +``` + +`derivatives/` is listed in `.gitignore`. Data files are never committed. + +## Acceptance criteria (closes #11) + +- [ ] `run_phase1('r3mini')` completes without error on R3-mini +- [ ] All subjects retain >80% of channels (warning printed otherwise) +- [ ] `phase1_report.mat` saved with per-subject channel counts +- [ ] No data files committed to git + +## Deviations from reference pipeline + +The reference (`study_handy_scripts.m`) runs highpass before CleanLine. This order is preserved here. The reference uses `clean_rawdata` with default ASR settings; this pipeline disables ASR (`BurstCriterion = 'off'`) because ASR modifies the continuous signal in ways that can bias ICA decomposition (Phase 2). Channel-level rejection only. diff --git a/sessions/week-03/practicum/phase1/p1_channel_reject.m b/sessions/week-03/practicum/phase1/p1_channel_reject.m new file mode 100644 index 0000000..94ecfb5 --- /dev/null +++ b/sessions/week-03/practicum/phase1/p1_channel_reject.m @@ -0,0 +1,49 @@ +function [ALLEEG, report] = p1_channel_reject(ALLEEG, cfg) +% Reject bad channels using clean_rawdata (channel-level criteria only). +% +% ASR (artifact subspace reconstruction) and window rejection are explicitly +% disabled -- those steps would alter the continuous signal in ways that +% interact with ICA (Phase 2). Channel rejection here only removes electrodes +% that are flat, noisy, or poorly correlated with neighbors. +% +% Thresholds from cfg (see cfg_r3mini.m for values and justification): +% ChannelCriterion: minimum correlation with neighbor channels +% FlatlineCriterion: max seconds of flat signal before rejection +% LineNoiseCriterion: residual line-noise Z-score after CleanLine +% +% Returns report: struct array with subject ID and channel counts. +% +% Requires: clean_rawdata plugin (EEGLAB 2024+ ships it by default). + +report = struct('subject', {}, 'n_orig', {}, 'n_kept', {}, 'n_rejected', {}); + +for i = 1:length(ALLEEG) + n_orig = ALLEEG(i).nbchan; + + ALLEEG(i) = clean_rawdata(ALLEEG(i), ... + 'FlatlineCriterion', cfg.flatline_criterion, ... + 'ChannelCriterion', cfg.chan_criterion, ... + 'LineNoiseCriterion', cfg.linenoise_criterion, ... + 'Highpass', 'off', ... % already done in p1_highpass + 'BurstCriterion', 'off', ... % ASR off + 'WindowCriterion', 'off', ... % window rejection off + 'BurstRejection', 'off', ... + 'Distance', 'Euclidian'); + + n_kept = ALLEEG(i).nbchan; + n_rej = n_orig - n_kept; + + report(i).subject = ALLEEG(i).subject; + report(i).n_orig = n_orig; + report(i).n_kept = n_kept; + report(i).n_rejected = n_rej; + + fprintf('[p1_channel_reject] Subject %s: kept %d/%d channels (%d rejected).\n', ... + ALLEEG(i).subject, n_kept, n_orig, n_rej); + + if n_kept / n_orig < 0.80 + warning('[p1_channel_reject] Subject %s: less than 80%% channels retained -- inspect manually.', ... + ALLEEG(i).subject); + end +end +end diff --git a/sessions/week-03/practicum/phase1/p1_cleanline.m b/sessions/week-03/practicum/phase1/p1_cleanline.m new file mode 100644 index 0000000..bc3ce9f --- /dev/null +++ b/sessions/week-03/practicum/phase1/p1_cleanline.m @@ -0,0 +1,38 @@ +function ALLEEG = p1_cleanline(ALLEEG, cfg) +% Remove line noise at US power-line harmonics using the CleanLine plugin. +% +% Targets cfg.cleanline_hz (default [60 120 180] Hz for 60 Hz countries). +% CleanLine fits and subtracts sinusoidal components without affecting +% broadband signal, which is preferable to a notch filter for ERSP analysis. +% +% Key parameters justified: +% winsize/winstep = 4 s: balances frequency resolution and stationarity. +% bandwidth = 2 Hz: narrow enough not to smear adjacent bands. +% p = 0.01: conservative detection threshold; reduces false removals. +% scanforlines = 1: lets CleanLine search nearby frequencies in case +% the actual line drifts slightly from the nominal value. +% +% Requires: CleanLine plugin (EEGLAB plugin manager or manual install). + +cleanline_opts = { ... + 'bandwidth', 2, ... + 'chanlist', [], ... % [] = all channels + 'computepower', 0, ... + 'linefreqs', cfg.cleanline_hz, ... + 'normSpectrum', 0, ... + 'p', 0.01, ... + 'pad', 2, ... + 'plotfigures', 0, ... + 'scanforlines', 1, ... + 'sigtype', 'Channels', ... + 'tau', 100, ... + 'verb', 0, ... + 'winsize', 4, ... + 'winstep', 4 }; + +for i = 1:length(ALLEEG) + ALLEEG(i) = pop_cleanline(ALLEEG(i), cleanline_opts{:}); + fprintf('[p1_cleanline] Subject %s: line noise removed at %s Hz.\n', ... + ALLEEG(i).subject, num2str(cfg.cleanline_hz)); +end +end diff --git a/sessions/week-03/practicum/phase1/p1_highpass.m b/sessions/week-03/practicum/phase1/p1_highpass.m new file mode 100644 index 0000000..0430a4a --- /dev/null +++ b/sessions/week-03/practicum/phase1/p1_highpass.m @@ -0,0 +1,16 @@ +function ALLEEG = p1_highpass(ALLEEG, cfg) +% Apply a zero-phase FIR highpass filter to each dataset in ALLEEG. +% +% Uses pop_eegfiltnew (EEGLAB's built-in wrapper around firfilt). +% Cutoff: cfg.highpass_hz (default 1 Hz). +% 1 Hz removes slow DC drift without distorting the 0-500 ms epoch window. +% Filter order is set automatically by pop_eegfiltnew based on cutoff and srate. +% +% Requires: EEGLAB 2024+. + +for i = 1:length(ALLEEG) + ALLEEG(i) = pop_eegfiltnew(ALLEEG(i), cfg.highpass_hz, []); + fprintf('[p1_highpass] Subject %s: highpass %.1f Hz applied.\n', ... + ALLEEG(i).subject, cfg.highpass_hz); +end +end diff --git a/sessions/week-03/practicum/phase1/p1_import_bids.m b/sessions/week-03/practicum/phase1/p1_import_bids.m new file mode 100644 index 0000000..cb64c05 --- /dev/null +++ b/sessions/week-03/practicum/phase1/p1_import_bids.m @@ -0,0 +1,26 @@ +function ALLEEG = p1_import_bids(cfg) +% Import HBN BIDS dataset for one task using pop_importbids. +% +% Returns ALLEEG (array of EEG structs), one per subject/session. +% Raw sets are written to /raw/ for checkpoint recovery. +% +% Requires: EEGLAB 2024+ with Biosig plugin. + +out_dir = fullfile(cfg.deriv_root, 'raw'); +if ~exist(out_dir, 'dir'), mkdir(out_dir); end + +import_opts = { ... + 'outputdir', out_dir, ... + 'task', cfg.task, ... + 'bidsevent', 'on', ... + 'bidschanloc', 'on' }; + +if ~isempty(cfg.subjects) + import_opts = [import_opts, {'subjects', cfg.subjects}]; +end + +[~, ALLEEG] = pop_importbids(cfg.bids_root, import_opts{:}); + +fprintf('[p1_import_bids] Imported %d dataset(s) for task %s.\n', ... + length(ALLEEG), cfg.task); +end diff --git a/sessions/week-03/practicum/phase1/run_phase1.m b/sessions/week-03/practicum/phase1/run_phase1.m new file mode 100644 index 0000000..81cfeaf --- /dev/null +++ b/sessions/week-03/practicum/phase1/run_phase1.m @@ -0,0 +1,81 @@ +function run_phase1(cfg_name) +% Phase 1 master script: BIDS import -> highpass -> cleanline -> channel rejection. +% +% Usage (interactive): +% run_phase1('r3mini') +% +% Usage (command line, from repo root): +% matlab -nodisplay -nosplash \ +% -r "addpath(genpath('sessions/week-03/practicum')); run_phase1('r3mini'); exit" +% +% cfg_name: name suffix for a config function in practicum/config/. +% 'r3mini' loads config/cfg_r3mini.m (default). +% Add cfg_r3full.m for the full 183-subject run once R3-mini validates. +% +% Outputs: +% derivatives/preproc/_preproc.set -- cleaned continuous EEG +% derivatives/preproc/phase1_report.mat -- channel rejection summary + +if nargin < 1, cfg_name = 'r3mini'; end + +% Resolve paths relative to this file so the script is location-independent. +script_dir = fileparts(mfilename('fullpath')); +practicum_dir = fullfile(script_dir, '..'); +addpath(genpath(practicum_dir)); + +% Load configuration. +cfg_fn = str2func(['cfg_' cfg_name]); +cfg = cfg_fn(); + +% Ensure output directories exist. +preproc_dir = fullfile(cfg.deriv_root, 'preproc'); +if ~exist(preproc_dir, 'dir'), mkdir(preproc_dir); end + +fprintf('\n=== Phase 1: Preprocessing (%s) ===\n\n', cfg_name); + +eeglab nogui; + +% Step 1: Import BIDS. +fprintf('Step 1/4: Import BIDS...\n'); +ALLEEG = p1_import_bids(cfg); + +% Step 2: Highpass filter. +fprintf('\nStep 2/4: Highpass filter (%.1f Hz)...\n', cfg.highpass_hz); +ALLEEG = p1_highpass(ALLEEG, cfg); + +% Step 3: CleanLine. +fprintf('\nStep 3/4: CleanLine (%s Hz)...\n', num2str(cfg.cleanline_hz)); +ALLEEG = p1_cleanline(ALLEEG, cfg); + +% Step 4: Channel rejection. +fprintf('\nStep 4/4: Channel rejection...\n'); +[ALLEEG, report] = p1_channel_reject(ALLEEG, cfg); + +% Save preprocessed sets. +fprintf('\nSaving to %s...\n', preproc_dir); +for i = 1:length(ALLEEG) + out_name = sprintf('%s_preproc.set', ALLEEG(i).subject); + pop_saveset(ALLEEG(i), 'filename', out_name, 'filepath', preproc_dir); + fprintf(' Saved: %s\n', out_name); +end + +% Save channel rejection report. +report_path = fullfile(preproc_dir, 'phase1_report.mat'); +save(report_path, 'report', 'cfg'); +fprintf('\nChannel rejection report saved: %s\n', report_path); + +% Print summary table. +fprintf('\n--- Channel retention summary ---\n'); +fprintf('%-30s %5s %5s %5s\n', 'Subject', 'Orig', 'Kept', 'Rej'); +for i = 1:length(report) + fprintf('%-30s %5d %5d %5d\n', ... + report(i).subject, report(i).n_orig, report(i).n_kept, report(i).n_rejected); +end + +n_warn = sum([report.n_kept] ./ [report.n_orig] < 0.80); +if n_warn > 0 + fprintf('\nWARNING: %d subject(s) retained <80%% channels. Inspect before Phase 2.\n', n_warn); +end + +fprintf('\n=== Phase 1 complete ===\n'); +end From c4091433ca9b11fc04531d774db9b638118f6f3a Mon Sep 17 00:00:00 2001 From: MinaNourhashemi Date: Wed, 6 May 2026 11:04:21 -0700 Subject: [PATCH 3/3] fix: align channel retention warning to 90% threshold --- sessions/week-03/practicum/phase1/README.md | 2 +- sessions/week-03/practicum/phase1/run_phase1.m | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sessions/week-03/practicum/phase1/README.md b/sessions/week-03/practicum/phase1/README.md index bcd02a0..1e1dcfb 100644 --- a/sessions/week-03/practicum/phase1/README.md +++ b/sessions/week-03/practicum/phase1/README.md @@ -54,7 +54,7 @@ derivatives/ ## Acceptance criteria (closes #11) - [ ] `run_phase1('r3mini')` completes without error on R3-mini -- [ ] All subjects retain >80% of channels (warning printed otherwise) +- [ ] All subjects retain >90% of channels (warning printed otherwise) - [ ] `phase1_report.mat` saved with per-subject channel counts - [ ] No data files committed to git diff --git a/sessions/week-03/practicum/phase1/run_phase1.m b/sessions/week-03/practicum/phase1/run_phase1.m index 81cfeaf..c201e7d 100644 --- a/sessions/week-03/practicum/phase1/run_phase1.m +++ b/sessions/week-03/practicum/phase1/run_phase1.m @@ -72,9 +72,9 @@ function run_phase1(cfg_name) report(i).subject, report(i).n_orig, report(i).n_kept, report(i).n_rejected); end -n_warn = sum([report.n_kept] ./ [report.n_orig] < 0.80); +n_warn = sum([report.n_kept] ./ [report.n_orig] < 0.90); if n_warn > 0 - fprintf('\nWARNING: %d subject(s) retained <80%% channels. Inspect before Phase 2.\n', n_warn); + fprintf('\nWARNING: %d subject(s) retained <90%% channels. Inspect before Phase 2.\n', n_warn); end fprintf('\n=== Phase 1 complete ===\n');