From 65d7d19d929a82179cd519dcf66b8ddea5c2d761 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Jun 2026 08:17:57 +0000 Subject: [PATCH] Add interactive GUI for running the hub prioritization model Refactor the complete pipeline to be configuration-driven and add a native desktop GUI (pywebview) for running it without code. Pipeline (scripts/run_complete_pipeline.py): - New RunConfig dataclass describing one run: input files, output dir, optional-step toggles, and run metadata (run_by, remarks, run_id) - CompleteHubPipeline now takes a RunConfig; all hardcoded input paths, skip-flags and output locations are sourced from it (CLI still works via default_run_config()) - resolve_inputs_from_directory(): auto-detect input files in a folder - run_pipeline() shared entry point for CLI and GUI - Per-run output directory plus a run log (run_log.json / run_log.txt) capturing input files used (with size/mtime/sha256), timestamps, who ran it, remarks, options, outputs produced and a results summary - Dependency self-check now only prompts when run directly (import-safe) GUI (app/run_model_gui.py + app/gui/*): - Native folder/file pickers; pick a directory to auto-detect inputs or choose each file individually - Name + remarks fields, option toggles, output-dir selection - Live log streaming and a results panel with output file shortcuts Docs: app/RUN_MODEL_GUI.md, README pointer, pywebview in requirements.txt --- README.md | 12 + app/RUN_MODEL_GUI.md | 77 +++++ app/gui/app.js | 192 ++++++++++++ app/gui/index.html | 94 ++++++ app/gui/style.css | 122 ++++++++ app/run_model_gui.py | 274 ++++++++++++++++++ requirements.txt | 3 + scripts/run_complete_pipeline.py | 482 +++++++++++++++++++++++++++---- 8 files changed, 1199 insertions(+), 57 deletions(-) create mode 100644 app/RUN_MODEL_GUI.md create mode 100644 app/gui/app.js create mode 100644 app/gui/index.html create mode 100644 app/gui/style.css create mode 100644 app/run_model_gui.py diff --git a/README.md b/README.md index 8f019cb..5c6b987 100644 --- a/README.md +++ b/README.md @@ -66,6 +66,18 @@ print(results.nlargest(10, 'final_score')) python scripts/run_pipeline.py ``` +### Interactive GUI (no code) + +Run the model from a desktop app with native file pickers — select inputs, +enter who is running it and any remarks, and get outputs plus a full run log: + +```bash +pip install -r requirements.txt # includes pywebview +python app/run_model_gui.py +``` + +See **[app/RUN_MODEL_GUI.md](app/RUN_MODEL_GUI.md)** for details. + --- ## Project Structure diff --git a/app/RUN_MODEL_GUI.md b/app/RUN_MODEL_GUI.md new file mode 100644 index 0000000..c85819a --- /dev/null +++ b/app/RUN_MODEL_GUI.md @@ -0,0 +1,77 @@ +# Run Model GUI + +An interactive desktop application for running the full hub prioritization +pipeline without touching code. The interface is built with HTML/JS and rendered +in a **native window** via [pywebview](https://pywebview.flowrl.com/), so it can +open real OS folder/file pickers. + +## What it does + +1. **Select inputs** — either pick a single folder and let the app auto-detect + every input file by name, or pick each file individually with a native + "Browse…" dialog. +2. **Record who & why** — enter your name and free-text remarks describing the + run. +3. **Run the pipeline** — outputs (CSV, GeoJSON, interactive map) are written to + an output directory of your choice. +4. **Run log** — every run writes `run_log.json` and `run_log.txt` into the + output folder, recording: + - run id + start/finish timestamps + duration + - who ran it and their remarks + - every input file used, with size, last-modified time and SHA-256 checksum + - the options used and the output files produced + - a results summary (hub counts by tier) and success/error status + +## Install + +```bash +pip install -r requirements.txt +``` + +`pywebview` needs a rendering backend: + +- **Windows** — uses the built-in Edge WebView2 (usually already present). +- **macOS** — uses the built-in WebKit (no extra install). +- **Linux** — install one backend, e.g. + `pip install pyqt5 pyqtwebengine` **or** the system + `python3-gi gir1.2-webkit2-4.1` packages. + +## Run + +```bash +python app/run_model_gui.py +``` + +## Input files + +| Field | Required | Typical filename | +|-------|----------|------------------| +| Transit nodes (CSV) | ✅ | `All_nodes+lines.csv` | +| Lines & planned modes (CSV) | ✅ | `Lines_and_Planned_Mode.csv` | +| Demand forecast (Excel/CSV) | optional | `Demand_2050_all.xlsx` | +| Metro areas (SHP) | optional | `metro.shp` | +| Districts (SHP) | optional | `districts.shp` | +| TAZ zones / demographics (SHP) | optional | `TAZ_2050.shp` | +| Bus terminals (SHP) | optional | `bus_terminals.shp` | + +Directory auto-detection matches files by extension and name keywords, so naming +your files close to the conventions above lets the app find them automatically. + +## Output + +Default output directory is `data/results/run_/`. Each run is +self-contained in its own folder, including the intermediate artefacts +(`processed/`), the final outputs, the streaming `run_.log`, and the +`run_log.json` / `run_log.txt` manifest. + +## How it fits the code + +The GUI is a thin front-end over `scripts/run_complete_pipeline.py`: + +- `RunConfig` — describes one run (inputs, output dir, options, metadata). +- `run_pipeline(config)` — the shared entry point used by both the GUI and the + command line (`python scripts/run_complete_pipeline.py`). +- `resolve_inputs_from_directory(dir)` — powers the folder auto-detect. + +Running the script directly still works exactly as before, using the default +paths under `data/raw/`. diff --git a/app/gui/app.js b/app/gui/app.js new file mode 100644 index 0000000..0acf0a6 --- /dev/null +++ b/app/gui/app.js @@ -0,0 +1,192 @@ +// Front-end logic for the Run Model GUI. +// Communicates with the Python backend via window.pywebview.api. + +let FIELDS = []; // [{field,label,required}] +const selected = {}; // field -> path +let pollTimer = null; + +function $(id) { return document.getElementById(id); } + +// Wait until pywebview has injected the API +window.addEventListener('pywebviewready', init); + +async function init() { + FIELDS = await window.pywebview.api.get_input_fields(); + buildFileRows(); + $('outputDir').value = await window.pywebview.api.default_output_dir(); + + document.querySelectorAll('input[name="mode"]').forEach(r => + r.addEventListener('change', applyMode)); + applyMode(); +} + +function applyMode() { + const mode = document.querySelector('input[name="mode"]:checked').value; + $('dirMode').style.display = (mode === 'dir') ? 'block' : 'none'; + // In directory mode the per-file Browse buttons are hidden (auto-detected); + // in files mode they are shown. + document.querySelectorAll('.fileBrowse').forEach(b => { + b.style.display = (mode === 'files') ? 'inline-block' : 'none'; + }); +} + +function buildFileRows() { + const tbody = $('fileRows'); + tbody.innerHTML = ''; + FIELDS.forEach(f => { + const tr = document.createElement('tr'); + const tag = f.required + ? 'required' + : 'optional'; + tr.innerHTML = ` + ${f.label}${tag} + — not selected — + + `; + tbody.appendChild(tr); + }); +} + +function setFile(field, path) { + selected[field] = path || null; + const el = $('fn_' + field); + if (path) { + el.textContent = path; + el.classList.remove('missing'); + } else { + el.textContent = '— not selected —'; + el.classList.add('missing'); + } +} + +async function browseFile(field) { + const path = await window.pywebview.api.pick_file(field); + if (path) setFile(field, path); +} + +async function browseDir() { + const dir = await window.pywebview.api.pick_folder(); + if (!dir) return; + $('dirPath').value = dir; + await rescan(); +} + +async function rescan() { + const dir = $('dirPath').value; + if (!dir) { alert('Choose a directory first.'); return; } + const res = await window.pywebview.api.scan_directory(dir); + if (!res.ok) { alert('Scan failed: ' + res.error); return; } + FIELDS.forEach(f => setFile(f.field, res.matches[f.field])); +} + +async function browseOutput() { + const dir = await window.pywebview.api.pick_folder(); + if (dir) $('outputDir').value = dir; +} + +async function runModel() { + const runBy = $('runBy').value.trim(); + if (!runBy) { alert('Please enter your name before running.'); return; } + + // Validate required inputs are present + const missing = FIELDS.filter(f => f.required && !selected[f.field]).map(f => f.label); + if (missing.length) { alert('Missing required inputs:\n- ' + missing.join('\n- ')); return; } + + const payload = { + inputs: { ...selected }, + output_dir: $('outputDir').value.trim(), + run_by: runBy, + remarks: $('remarks').value, + skip_demand_data: $('skip_demand_data').checked, + skip_spatial_layers: $('skip_spatial_layers').checked, + skip_demographics: $('skip_demographics').checked, + run_mc_distribution: $('run_mc_distribution').checked, + }; + + const res = await window.pywebview.api.start_run(payload); + if (!res.ok) { setStatus(res.error, 'err'); return; } + + $('runBtn').disabled = true; + $('progressCard').style.display = 'block'; + $('resultPanel').style.display = 'none'; + $('logBox').textContent = ''; + setStatus('Running…'); + pollTimer = setInterval(poll, 1000); +} + +function setStatus(text, kind) { + const el = $('runStatus'); + el.textContent = text || ''; + el.className = 'status' + (kind ? ' ' + kind : ''); +} + +async function poll() { + const s = await window.pywebview.api.poll(); + const box = $('logBox'); + box.textContent = s.log || ''; + box.scrollTop = box.scrollHeight; + + if (s.done) { + clearInterval(pollTimer); + $('runBtn').disabled = false; + if (s.error) { + setStatus('Run failed', 'err'); + } else { + setStatus('Run complete', 'ok'); + } + renderResult(s.manifest, s.error); + } +} + +function renderResult(manifest, error) { + const panel = $('resultPanel'); + panel.style.display = 'block'; + if (!manifest) { + panel.innerHTML = ``; + return; + } + + const ok = manifest.status === 'success'; + let html = ``; + + const sum = manifest.results_summary || {}; + if (sum.total_hubs != null) { + html += '
'; + html += `
${sum.total_hubs}total hubs
`; + const byTier = sum.hubs_by_tier || {}; + Object.keys(byTier).forEach(t => { + html += `
${byTier[t]}${escapeHtml(t)}
`; + }); + html += '
'; + } + + html += `

Output directory:

+
    +
  • ${escapeHtml(manifest.output_dir)} +
  • `; + (manifest.outputs || []).forEach(p => { + html += `
  • ${escapeHtml(p)} +
  • `; + }); + html += '
'; + + panel.innerHTML = html; +} + +async function openPath(path) { + await window.pywebview.api.open_path(path); +} + +function escapeHtml(s) { + return String(s == null ? '' : s) + .replace(/&/g, '&').replace(//g, '>'); +} +function jsstr(s) { + return String(s == null ? '' : s).replace(/\\/g, '\\\\').replace(/'/g, "\\'"); +} diff --git a/app/gui/index.html b/app/gui/index.html new file mode 100644 index 0000000..dd8e51e --- /dev/null +++ b/app/gui/index.html @@ -0,0 +1,94 @@ + + + + + + Hub Prioritization - Run Model + + + +
+
+

🚆 Hub Prioritization — Run Model

+

Select inputs, run the pipeline, and produce outputs with a full run log.

+
+ + +
+

1. Run details

+
+ + +
+
+ + +
+
+ + +
+

2. Input data

+
+ + +
+ + +
+
+ + + +
+

The folder is scanned for the expected files by name and extension.

+
+ + + + + + + +
InputFile
+
+ + +
+

3. Options

+
+ + + + +
+
+ + +
+

4. Output location

+
+ + +
+

Outputs (CSV, GeoJSON, map) and a run log (run_log.json / run_log.txt) are written here.

+
+ + +
+ + +
+ + + +
+ + + + diff --git a/app/gui/style.css b/app/gui/style.css new file mode 100644 index 0000000..cf07981 --- /dev/null +++ b/app/gui/style.css @@ -0,0 +1,122 @@ +:root { + --bg: #f4f6f9; + --card: #ffffff; + --ink: #1f2933; + --muted: #6b7280; + --accent: #2563eb; + --accent-d: #1d4ed8; + --green: #16a34a; + --red: #dc2626; + --amber: #d97706; + --border: #e5e7eb; +} + +* { box-sizing: border-box; } + +body { + margin: 0; + font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Arial, sans-serif; + background: var(--bg); + color: var(--ink); + font-size: 14px; +} + +.app { max-width: 980px; margin: 0 auto; padding: 24px 20px 60px; } + +header h1 { margin: 0 0 4px; font-size: 24px; } +.subtitle { margin: 0 0 18px; color: var(--muted); } + +.card { + background: var(--card); + border: 1px solid var(--border); + border-radius: 12px; + padding: 18px 20px; + margin-bottom: 16px; + box-shadow: 0 1px 2px rgba(0,0,0,0.03); +} + +.card h2 { margin: 0 0 14px; font-size: 16px; } + +.row { margin-bottom: 12px; } +.row label { display: block; font-weight: 600; margin-bottom: 5px; } +.row.inline { display: flex; gap: 8px; align-items: center; } + +input[type=text], textarea { + width: 100%; + padding: 9px 11px; + border: 1px solid var(--border); + border-radius: 8px; + font-size: 14px; + font-family: inherit; + background: #fff; +} +input[readonly] { background: #f9fafb; } +textarea { resize: vertical; } + +.req { color: var(--red); } +.hint { color: var(--muted); font-size: 12px; margin: 6px 0 0; } + +.btn { + background: var(--accent); + color: #fff; + border: none; + border-radius: 8px; + padding: 9px 14px; + font-size: 14px; + cursor: pointer; + white-space: nowrap; +} +.btn:hover { background: var(--accent-d); } +.btn.ghost { background: #eef2ff; color: var(--accent-d); } +.btn.ghost:hover { background: #e0e7ff; } +.btn.run { font-size: 16px; padding: 12px 28px; background: var(--green); } +.btn.run:hover { background: #15803d; } +.btn:disabled { opacity: 0.5; cursor: not-allowed; } + +.modeToggle { display: flex; gap: 22px; margin-bottom: 12px; } +.modeToggle label { font-weight: 600; cursor: pointer; } +.modePanel { margin-bottom: 14px; } + +table.files { width: 100%; border-collapse: collapse; margin-top: 6px; } +table.files th, table.files td { + text-align: left; padding: 8px 6px; border-bottom: 1px solid var(--border); + vertical-align: middle; +} +table.files th { color: var(--muted); font-size: 12px; text-transform: uppercase; letter-spacing: .03em; } +.filename { font-family: ui-monospace, "SF Mono", Menlo, Consolas, monospace; font-size: 12px; } +.filename.missing { color: var(--muted); font-style: italic; } +.tag { font-size: 10px; padding: 1px 6px; border-radius: 99px; margin-left: 6px; } +.tag.req { background: #fee2e2; color: var(--red); } +.tag.ok { background: #dcfce7; color: var(--green); } +.tag.opt { background: #f3f4f6; color: var(--muted); } + +.opts { display: grid; grid-template-columns: 1fr 1fr; gap: 10px; } +.opts label { font-weight: 500; cursor: pointer; } + +.runbar { display: flex; align-items: center; gap: 16px; margin: 4px 0 16px; } +.status { color: var(--muted); } +.status.err { color: var(--red); font-weight: 600; } +.status.ok { color: var(--green); font-weight: 600; } + +.logbox { + background: #0f172a; color: #d1d5db; + padding: 14px; border-radius: 8px; + height: 260px; overflow-y: auto; + font-family: ui-monospace, Menlo, Consolas, monospace; font-size: 12px; + white-space: pre-wrap; line-height: 1.45; +} + +#resultPanel { margin-top: 16px; } +.banner { padding: 12px 14px; border-radius: 8px; font-weight: 600; margin-bottom: 12px; } +.banner.ok { background: #dcfce7; color: #166534; } +.banner.err { background: #fee2e2; color: #991b1b; } +.outlist { list-style: none; padding: 0; margin: 0; } +.outlist li { + display: flex; justify-content: space-between; align-items: center; + padding: 7px 0; border-bottom: 1px solid var(--border); +} +.outlist .filename { word-break: break-all; } +.summary { display: flex; gap: 22px; margin: 8px 0 14px; } +.summary .stat { background: #f9fafb; border: 1px solid var(--border); border-radius: 8px; padding: 10px 16px; } +.summary .stat b { display: block; font-size: 20px; } +.summary .stat span { color: var(--muted); font-size: 12px; } diff --git a/app/run_model_gui.py b/app/run_model_gui.py new file mode 100644 index 0000000..8521f60 --- /dev/null +++ b/app/run_model_gui.py @@ -0,0 +1,274 @@ +""" +Hub Prioritization - Run Model GUI +================================== + +An interactive desktop GUI (HTML/JS rendered in a native window via pywebview) +for running the full hub prioritization pipeline. + +Features +-------- +* Native OS folder / file pickers for selecting inputs. +* Select a single directory and have the app auto-detect all input files, + or pick each file individually. +* Enter who is running the model and free-text remarks. +* Choose an output directory; every run writes its outputs there together + with a run log (run_log.json / run_log.txt) recording which files were used, + timestamps, who ran it, the remarks, and the outputs produced. +* Live log streaming while the pipeline runs. + +Run with: + python app/run_model_gui.py +""" + +import sys +import json +import logging +import threading +from pathlib import Path +from datetime import datetime + +# --- Make the project importable --------------------------------------------- +PROJECT_ROOT = Path(__file__).resolve().parent.parent +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + +try: + import webview # pywebview +except ImportError: + print("ERROR: pywebview is not installed.\n" + "Install it with: pip install pywebview\n" + "(On Linux you may also need a backend, e.g. PyGObject + WebKit2GTK,\n" + " or pyqt5/pyqtwebengine.)") + sys.exit(1) + +# Import the pipeline. NOTE: importing this module runs a dependency check that +# may prompt on the console if core packages are missing - install +# requirements.txt first. +from scripts.run_complete_pipeline import ( + RunConfig, + run_pipeline, + resolve_inputs_from_directory, + INPUT_FILE_HINTS, + REQUIRED_INPUTS, + logger as pipeline_logger, +) +from src.config import RESULTS_DIR + +GUI_DIR = Path(__file__).resolve().parent / "gui" + +# Ordered list of input fields shown in the GUI (field, label, required) +INPUT_FIELDS = [ + ("transit_nodes", "Transit nodes (CSV)", True), + ("lines_modes", "Lines & planned modes (CSV)", True), + ("demand", "Demand forecast (Excel/CSV)", False), + ("metro_areas", "Metro areas (SHP)", False), + ("districts", "Districts (SHP)", False), + ("taz_zones", "TAZ zones / demographics (SHP)", False), + ("bus_terminals", "Bus terminals (SHP)", False), +] + +# File-type filters for the native open dialog, per field +FILE_TYPES = { + "transit_nodes": ("CSV files (*.csv)", "All files (*.*)"), + "lines_modes": ("CSV files (*.csv)", "All files (*.*)"), + "demand": ("Spreadsheet (*.xlsx;*.xls;*.csv)", "All files (*.*)"), + "metro_areas": ("Shapefile (*.shp)", "All files (*.*)"), + "districts": ("Shapefile (*.shp)", "All files (*.*)"), + "taz_zones": ("Shapefile (*.shp)", "All files (*.*)"), + "bus_terminals": ("Shapefile (*.shp)", "All files (*.*)"), +} + + +class _LogCapture(logging.Handler): + """Logging handler that buffers formatted records for the GUI to poll.""" + + def __init__(self, sink): + super().__init__(level=logging.INFO) + self._sink = sink + self.setFormatter(logging.Formatter("%(message)s")) + + def emit(self, record): + try: + self._sink(self.format(record)) + except Exception: + pass + + +class Api: + """Methods exposed to the JavaScript front-end via pywebview.""" + + def __init__(self): + self.window = None + self._lock = threading.Lock() + self._log_lines = [] + self._running = False + self._done = False + self._error = None + self._manifest = None + self._handler = None + + # -- metadata for the UI ------------------------------------------------ + def get_input_fields(self): + return [{"field": f, "label": l, "required": r} for f, l, r in INPUT_FIELDS] + + def default_output_dir(self): + ts = datetime.now().strftime("%Y%m%d_%H%M%S") + return str(RESULTS_DIR / f"run_{ts}") + + # -- native dialogs ----------------------------------------------------- + def pick_folder(self): + result = self.window.create_file_dialog(webview.FOLDER_DIALOG) + if not result: + return None + return result[0] if isinstance(result, (list, tuple)) else result + + def pick_file(self, field=None): + file_types = FILE_TYPES.get(field, ("All files (*.*)",)) + result = self.window.create_file_dialog( + webview.OPEN_DIALOG, allow_multiple=False, file_types=file_types + ) + if not result: + return None + return result[0] if isinstance(result, (list, tuple)) else result + + # -- scan a directory for inputs --------------------------------------- + def scan_directory(self, directory): + """Auto-detect input files inside a directory.""" + try: + return {"ok": True, "matches": resolve_inputs_from_directory(directory)} + except Exception as e: + return {"ok": False, "error": str(e)} + + # -- run ---------------------------------------------------------------- + def _append_log(self, line): + with self._lock: + self._log_lines.append(line) + + def start_run(self, payload): + """Validate the payload and start the pipeline in a background thread. + + payload keys: inputs (dict field->path), output_dir, run_by, remarks, + skip_demand_data, skip_spatial_layers, skip_demographics, + run_mc_distribution. + """ + with self._lock: + if self._running: + return {"ok": False, "error": "A run is already in progress."} + + inputs = payload.get("inputs", {}) or {} + + # Build the config + try: + cfg = RunConfig( + transit_nodes=inputs.get("transit_nodes") or None, + lines_modes=inputs.get("lines_modes") or None, + demand=inputs.get("demand") or None, + metro_areas=inputs.get("metro_areas") or None, + districts=inputs.get("districts") or None, + taz_zones=inputs.get("taz_zones") or None, + bus_terminals=inputs.get("bus_terminals") or None, + output_dir=payload.get("output_dir") or None, + skip_demand_data=bool(payload.get("skip_demand_data")), + skip_spatial_layers=bool(payload.get("skip_spatial_layers")), + skip_demographics=bool(payload.get("skip_demographics")), + run_mc_distribution=bool(payload.get("run_mc_distribution")), + run_by=payload.get("run_by", "").strip(), + remarks=payload.get("remarks", "").strip(), + ) + except Exception as e: + return {"ok": False, "error": f"Invalid configuration: {e}"} + + problems = cfg.validate() + if problems: + return {"ok": False, "error": " ; ".join(problems)} + + # Reset state + with self._lock: + self._log_lines = [] + self._running = True + self._done = False + self._error = None + self._manifest = None + + # Attach log capture + self._handler = _LogCapture(self._append_log) + pipeline_logger.addHandler(self._handler) + + thread = threading.Thread(target=self._worker, args=(cfg,), daemon=True) + thread.start() + return {"ok": True, "output_dir": str(cfg.output_dir)} + + def _worker(self, cfg): + try: + self._append_log(f"Starting run {cfg.run_id} (by {cfg.run_by or 'unknown'})...") + run_pipeline(cfg) + # Read the manifest the pipeline wrote + manifest_path = Path(cfg.output_dir) / "run_log.json" + if manifest_path.exists(): + with open(manifest_path, encoding="utf-8") as f: + self._manifest = json.load(f) + except Exception as e: + with self._lock: + self._error = str(e) + self._append_log(f"ERROR: {e}") + # Try to still load a manifest (the pipeline writes one on failure) + try: + manifest_path = Path(cfg.output_dir) / "run_log.json" + if manifest_path.exists(): + with open(manifest_path, encoding="utf-8") as f: + self._manifest = json.load(f) + except Exception: + pass + finally: + with self._lock: + self._running = False + self._done = True + if self._handler is not None: + pipeline_logger.removeHandler(self._handler) + self._handler = None + + def poll(self): + """Return current run state for the UI to render.""" + with self._lock: + return { + "running": self._running, + "done": self._done, + "error": self._error, + "log": "\n".join(self._log_lines), + "manifest": self._manifest, + } + + def open_path(self, path): + """Open a file/folder in the OS default application.""" + import os + import subprocess + try: + p = Path(path) + target = str(p if p.exists() else p.parent) + if sys.platform.startswith("darwin"): + subprocess.Popen(["open", target]) + elif sys.platform.startswith("win"): + os.startfile(target) # type: ignore[attr-defined] + else: + subprocess.Popen(["xdg-open", target]) + return {"ok": True} + except Exception as e: + return {"ok": False, "error": str(e)} + + +def main(): + api = Api() + window = webview.create_window( + "Hub Prioritization - Run Model", + url=str(GUI_DIR / "index.html"), + js_api=api, + width=1100, + height=860, + min_size=(900, 700), + ) + api.window = window + webview.start() + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt index dcbd7ba..5fe67cf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,3 +38,6 @@ sphinx>=6.0.0 # Streamlit App streamlit>=1.28.0 plotly>=5.18.0 + +# Desktop GUI for running the model (native window + file dialogs) +pywebview>=4.4.1 diff --git a/scripts/run_complete_pipeline.py b/scripts/run_complete_pipeline.py index 5df92d7..29a17fe 100644 --- a/scripts/run_complete_pipeline.py +++ b/scripts/run_complete_pipeline.py @@ -90,8 +90,11 @@ def check_and_install_dependencies(): print("Please install packages manually and run again.") sys.exit(1) -# Run dependency check -check_and_install_dependencies() +# Run the interactive dependency check only when this script is executed +# directly. When the module is imported (e.g. by the GUI), we must not block on +# input(); missing imports will surface naturally as ImportError instead. +if __name__ == "__main__": + check_and_install_dependencies() # ============================================================================ # Now import everything else @@ -114,6 +117,12 @@ def check_and_install_dependencies(): print("\nPlease ensure the project structure is intact.") sys.exit(1) +import json +import logging +import hashlib +from dataclasses import dataclass, field, asdict +from typing import Optional, Dict, List + import geopandas as gpd import pandas as pd from datetime import datetime @@ -159,7 +168,7 @@ def check_and_install_dependencies(): # ============================================================================ -# CONFIGURE YOUR INPUT FILE PATHS HERE +# DEFAULT INPUT FILE PATHS (used by the CLI / as fallbacks) # ============================================================================ # Transit network (REQUIRED) @@ -175,10 +184,161 @@ def check_and_install_dependencies(): INPUT_TAZ_ZONES = RAW_DATA_DIR / "TAZ_2050.shp" # With POP_2050 and EMPL_2050 INPUT_BUS_TERMINALS = RAW_DATA_DIR / "bus_terminals.shp" # Optional -# Processing options -SKIP_DEMAND_DATA = False # Set True if you don't have demand data yet -SKIP_SPATIAL_LAYERS = False # Set True if you don't have spatial layers yet -SKIP_DEMOGRAPHICS = False # Set True if you don't have TAZ data yet + +# ============================================================================ +# RUN CONFIGURATION +# ============================================================================ +# A RunConfig fully describes one execution of the pipeline: which input files +# to use, where to write outputs, which optional steps to run, and run metadata +# (who ran it and why). The GUI builds a RunConfig from user selections; the +# CLI builds a default one from the paths above. + +# Field name -> list of filename match hints (lowercase substrings / patterns) +# used by resolve_inputs_from_directory() to auto-detect files in a folder. +INPUT_FILE_HINTS = { + 'transit_nodes': {'ext': ['.csv'], 'contains': ['nodes']}, + 'lines_modes': {'ext': ['.csv'], 'contains': ['lines', 'mode']}, + 'demand': {'ext': ['.xlsx', '.xls', '.csv'], 'contains': ['demand', 'nodes_w_results']}, + 'metro_areas': {'ext': ['.shp'], 'contains': ['metro']}, + 'districts': {'ext': ['.shp'], 'contains': ['district', 'machoz']}, + 'taz_zones': {'ext': ['.shp'], 'contains': ['taz']}, + 'bus_terminals': {'ext': ['.shp'], 'contains': ['terminal', 'bus_term']}, +} + +# Which inputs are mandatory for a run to proceed +REQUIRED_INPUTS = ['transit_nodes', 'lines_modes'] + + +@dataclass +class RunConfig: + """Full configuration for a single pipeline run.""" + + # --- Input files (None = not provided) --- + transit_nodes: Optional[Path] = None + lines_modes: Optional[Path] = None + demand: Optional[Path] = None + metro_areas: Optional[Path] = None + districts: Optional[Path] = None + taz_zones: Optional[Path] = None + bus_terminals: Optional[Path] = None + + # --- Output --- + output_dir: Optional[Path] = None + + # --- Optional-step toggles --- + skip_demand_data: bool = False + skip_spatial_layers: bool = False + skip_demographics: bool = False + run_mc_distribution: bool = False + + # --- Run metadata (for the run log) --- + run_by: str = "" + remarks: str = "" + run_id: str = field(default_factory=lambda: datetime.now().strftime('%Y%m%d_%H%M%S')) + + def __post_init__(self): + # Normalise provided paths to Path objects + for f in ['transit_nodes', 'lines_modes', 'demand', 'metro_areas', + 'districts', 'taz_zones', 'bus_terminals', 'output_dir']: + val = getattr(self, f) + if val is not None and not isinstance(val, Path): + setattr(self, f, Path(val)) + + # Default output dir: data/results/run_/ + if self.output_dir is None: + self.output_dir = RESULTS_DIR / f"run_{self.run_id}" + + @property + def results_dir(self) -> Path: + """Directory for final outputs (CSV / GeoJSON / map / logs).""" + self.output_dir.mkdir(parents=True, exist_ok=True) + return self.output_dir + + @property + def processed_dir(self) -> Path: + """Directory for intermediate artefacts.""" + d = self.output_dir / "processed" + d.mkdir(parents=True, exist_ok=True) + return d + + def validate(self) -> List[str]: + """Return a list of human-readable problems (empty = OK).""" + problems = [] + for key in REQUIRED_INPUTS: + val = getattr(self, key) + if val is None: + problems.append(f"Missing required input: {key}") + elif not Path(val).exists(): + problems.append(f"Required input not found on disk: {val}") + return problems + + +def default_run_config(**overrides) -> RunConfig: + """Build a RunConfig from the default RAW_DATA_DIR paths (CLI behaviour).""" + cfg = RunConfig( + transit_nodes=INPUT_TRANSIT_NODES, + lines_modes=INPUT_LINES_MODES, + demand=INPUT_DEMAND_CSV if INPUT_DEMAND_CSV.exists() else None, + metro_areas=INPUT_METRO_AREAS if INPUT_METRO_AREAS.exists() else None, + districts=INPUT_DISTRICTS if INPUT_DISTRICTS.exists() else None, + taz_zones=INPUT_TAZ_ZONES if INPUT_TAZ_ZONES.exists() else None, + bus_terminals=INPUT_BUS_TERMINALS if INPUT_BUS_TERMINALS.exists() else None, + ) + for k, v in overrides.items(): + setattr(cfg, k, v) + cfg.__post_init__() + return cfg + + +def resolve_inputs_from_directory(directory) -> Dict[str, Optional[str]]: + """Scan a directory and auto-match expected input files by name/extension. + + Args: + directory: Folder to scan (str or Path). + + Returns: + Dict mapping each input field name to a matched file path (or None). + """ + directory = Path(directory) + found: Dict[str, Optional[str]] = {k: None for k in INPUT_FILE_HINTS} + + if not directory.is_dir(): + return found + + files = [p for p in directory.rglob('*') if p.is_file()] + used: set = set() # paths already claimed by an earlier field + + def score(path: Path, hints) -> int: + """Number of hint words found in the filename stem (0 = no name match).""" + stem = path.stem.lower() + return sum(1 for c in hints['contains'] if c in stem) + + # Fields are processed in INPUT_FILE_HINTS order, which is arranged so that + # the more specific names (e.g. 'nodes') claim their file before a more + # generic hint (e.g. 'lines', which also appears in 'All_nodes+lines.csv'). + for field_name, hints in INPUT_FILE_HINTS.items(): + best = None + best_score = 0 + for ext in hints['ext']: + candidates = [p for p in files + if p.suffix.lower() == ext and p not in used] + # Prefer the candidate matching the most hint words + for p in candidates: + s = score(p, hints) + if s > best_score: + best, best_score = p, s + if best is not None: + break + # If nothing name-matched but exactly one file of a single allowed + # extension exists, accept it (unambiguous type, e.g. a lone .shp). + if len(hints['ext']) == 1 and len(candidates) == 1: + best = candidates[0] + break + if best is not None: + used.add(best) + found[field_name] = str(best) + + return found # ============================================================================ @@ -186,9 +346,23 @@ def check_and_install_dependencies(): class CompleteHubPipeline: """Complete pipeline with all data sources.""" - def __init__(self): + def __init__(self, config: Optional[RunConfig] = None): + self.config = config if config is not None else default_run_config() self.logger = logger - self.timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') + self.timestamp = self.config.run_id + self.started_at = datetime.now() + self.output_files: List[str] = [] + self.status = "running" + self.error_message = None + + # Attach a per-run log file inside the output directory + self._run_log_path = self.config.results_dir / f"run_{self.timestamp}.log" + self._run_log_handler = logging.FileHandler(self._run_log_path, encoding='utf-8') + self._run_log_handler.setLevel(logging.DEBUG) + self._run_log_handler.setFormatter( + logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') + ) + logger.addHandler(self._run_log_handler) # Data holders self.transit_nodes = None @@ -219,43 +393,45 @@ def step_1_load_all_data(self): logger.info("STEP 1: LOAD ALL INPUT DATA") logger.info("="*80) + cfg = self.config + # Transit network (required) logger.info("\n1.1: Loading transit nodes...") - self.transit_nodes = loaders.load_transit_nodes(INPUT_TRANSIT_NODES) + self.transit_nodes = loaders.load_transit_nodes(cfg.transit_nodes) logger.info("\n1.2: Loading lines and modes...") - self.lines_modes = loaders.load_lines_and_modes(INPUT_LINES_MODES) + self.lines_modes = loaders.load_lines_and_modes(cfg.lines_modes) # Demand data (optional) - if not SKIP_DEMAND_DATA and INPUT_DEMAND_CSV.exists(): + if not cfg.skip_demand_data and cfg.demand and Path(cfg.demand).exists(): logger.info("\n1.3: Loading demand data...") - self.demand_data = loaders.load_demand_data(INPUT_DEMAND_CSV) + self.demand_data = loaders.load_demand_data(cfg.demand) else: logger.warning("⚠ Skipping demand data (file not found or disabled)") # Spatial layers (optional) - if not SKIP_SPATIAL_LAYERS: - if INPUT_METRO_AREAS.exists(): + if not cfg.skip_spatial_layers: + if cfg.metro_areas and Path(cfg.metro_areas).exists(): logger.info("\n1.4: Loading metro areas...") - self.metro_areas = loaders.load_metro_areas(INPUT_METRO_AREAS) + self.metro_areas = loaders.load_metro_areas(cfg.metro_areas) else: logger.warning("⚠ Metro areas not found") - if INPUT_DISTRICTS.exists(): + if cfg.districts and Path(cfg.districts).exists(): logger.info("\n1.5: Loading districts...") - self.districts = loaders.load_districts(INPUT_DISTRICTS) + self.districts = loaders.load_districts(cfg.districts) else: logger.warning("⚠ Districts not found") - if INPUT_TAZ_ZONES.exists() and not SKIP_DEMOGRAPHICS: + if cfg.taz_zones and Path(cfg.taz_zones).exists() and not cfg.skip_demographics: logger.info("\n1.6: Loading TAZ zones...") - self.taz_zones = loaders.load_taz_zones(INPUT_TAZ_ZONES) + self.taz_zones = loaders.load_taz_zones(cfg.taz_zones) else: logger.warning("⚠ TAZ zones not found or skipped") - if INPUT_BUS_TERMINALS.exists(): + if cfg.bus_terminals and Path(cfg.bus_terminals).exists(): logger.info("\n1.7: Loading bus terminals...") - self.bus_terminals = loaders.load_bus_terminals(INPUT_BUS_TERMINALS) + self.bus_terminals = loaders.load_bus_terminals(cfg.bus_terminals) else: logger.warning("⚠ Bus terminals not found (optional)") @@ -284,7 +460,7 @@ def step_2_create_h3_hexagons(self): ) # Save intermediate - output_path = PROCESSED_DATA_DIR / f"h3_hexagons_{self.timestamp}.csv" + output_path = self.config.processed_dir / f"h3_hexagons_{self.timestamp}.csv" export_df = self.h3_hexagons.copy() export_df['geometry'] = export_df['geometry'].apply(lambda x: x.wkt) export_df.to_csv(output_path, index=False, encoding='utf-8-sig') @@ -305,7 +481,7 @@ def step_3_group_hexagons(self): self.grouped_hubs = merging.aggregate_groups(hexagons_grouped) # Save - output_path = PROCESSED_DATA_DIR / f"grouped_hubs_{self.timestamp}.csv" + output_path = self.config.processed_dir / f"grouped_hubs_{self.timestamp}.csv" export_df = self.grouped_hubs.copy() export_df['geometry'] = export_df['geometry'].apply(lambda x: x.wkt) export_df.to_csv(output_path, index=False, encoding='utf-8-sig') @@ -324,7 +500,7 @@ def step_4_add_demand_data(self): logger.info("STEP 4: ADD DEMAND DATA") logger.info("="*80) - if SKIP_DEMAND_DATA: + if self.config.skip_demand_data: logger.warning("⚠ Skipping demand data - disabled") self.hubs_with_demand = self.grouped_hubs.copy() self.hubs_with_demand['TotalDemand'] = 5000 # Placeholder @@ -595,23 +771,32 @@ def process_sheet(df, sheet_name, region_name): logger.info(f" Processed {rows_processed} rows, {len(result)} unique nodes") return result - # Try to find demand file (Excel or CSV) - demand_excel = RAW_DATA_DIR / "Demand_2050_all.xlsx" - demand_xls = RAW_DATA_DIR / "Demand_2050_all.xls" - # Also check for the original filename pattern - demand_excel_alt = RAW_DATA_DIR / "Nodes_w_results_21082025.xlsx" - demand_csv = INPUT_DEMAND_CSV - - node_demand = {} + # Determine demand file from the run configuration. Excel (multi-sheet) + # is preferred; a single CSV is also supported. Fall back to the legacy + # default locations only when no demand file was configured. + configured_demand = Path(self.config.demand) if self.config.demand else None - try: - # Try Excel file first (with multiple sheets) - excel_file = None - for ef in [demand_excel, demand_xls, demand_excel_alt]: + excel_file = None + demand_csv = None + if configured_demand and configured_demand.exists(): + if configured_demand.suffix.lower() in ('.xlsx', '.xls'): + excel_file = configured_demand + else: + demand_csv = configured_demand + else: + # Legacy fallbacks (CLI without explicit config) + for ef in [RAW_DATA_DIR / "Demand_2050_all.xlsx", + RAW_DATA_DIR / "Demand_2050_all.xls", + RAW_DATA_DIR / "Nodes_w_results_21082025.xlsx"]: if ef.exists(): excel_file = ef break + if excel_file is None and INPUT_DEMAND_CSV.exists(): + demand_csv = INPUT_DEMAND_CSV + + node_demand = {} + try: if excel_file: logger.info(f"Loading demand from Excel: {excel_file}") @@ -654,7 +839,7 @@ def process_sheet(df, sheet_name, region_name): logger.warning(f" Error loading sheet '{sheet}': {e}") # If no Excel or no data found, try CSV - elif demand_csv.exists(): + elif demand_csv and demand_csv.exists(): logger.info(f"Loading demand from CSV: {demand_csv}") df_demand = pd.read_csv(demand_csv, encoding='utf-8-sig') logger.info(f"Loaded {len(df_demand)} rows") @@ -666,7 +851,7 @@ def process_sheet(df, sheet_name, region_name): else: logger.warning("⚠ No demand file found!") - logger.info(f" Looked for: {demand_excel}, {demand_xls}, {demand_excel_alt}, {demand_csv}") + logger.info(f" Configured demand file: {configured_demand}") self.hubs_with_demand = self.grouped_hubs.copy() self.hubs_with_demand['TotalDemand'] = 5000 # Placeholder self.hubs_with_demand['TotalTransfers'] = 0 @@ -898,7 +1083,7 @@ def step_5_add_spatial_tags(self): hubs_proj = hubs_tagged.to_crs('EPSG:2039') # Tag with district (for region determination) - if self.districts is not None and not SKIP_SPATIAL_LAYERS: + if self.districts is not None and not self.config.skip_spatial_layers: logger.info("Spatial join with districts...") try: # Find district column @@ -963,7 +1148,7 @@ def step_6_add_demographics(self): logger.info("STEP 6: ADD DEMOGRAPHIC DATA") logger.info("="*80) - if SKIP_DEMOGRAPHICS or self.taz_zones is None: + if self.config.skip_demographics or self.taz_zones is None: logger.warning("⚠ Skipping demographics - no TAZ data") # Add placeholder columns for zone in ['zone1', 'zone2', 'zone3']: @@ -987,17 +1172,18 @@ def step_6_add_demographics(self): processor = InfluenceAreaProcessor() # Save hubs temporarily - temp_csv = PROCESSED_DATA_DIR / f"temp_hubs_{self.timestamp}.csv" + temp_csv = self.config.processed_dir / f"temp_hubs_{self.timestamp}.csv" export_df = self.hubs_with_demand.copy() export_df['geometry'] = export_df['geometry'].apply(lambda x: x.wkt if x else None) export_df.to_csv(temp_csv, index=False, encoding='utf-8-sig') # Process influence areas try: + _terminals = self.config.bus_terminals result_gdf = processor.process_full_pipeline( hubs_csv=str(temp_csv), - taz_shp=str(INPUT_TAZ_ZONES), - terminals_shp=str(INPUT_BUS_TERMINALS) if INPUT_BUS_TERMINALS.exists() else None, + taz_shp=str(self.config.taz_zones), + terminals_shp=str(_terminals) if _terminals and Path(_terminals).exists() else None, output_csv=None # Don't save intermediate file ) self.hubs_with_demographics = result_gdf @@ -1125,13 +1311,14 @@ def step_11_run_mc_distribution(self): logger.info("STEP 11: MONTE CARLO DISTRIBUTION ANALYSIS (OPTIONAL)") logger.info("="*80) - # Check if user wants to run MC distribution - run_mc_dist = os.environ.get('RUN_MC_DISTRIBUTION', 'false').lower() == 'true' + # Check if user wants to run MC distribution (config flag or env var) + run_mc_dist = self.config.run_mc_distribution or \ + os.environ.get('RUN_MC_DISTRIBUTION', 'false').lower() == 'true' if not run_mc_dist: logger.info("⊘ Skipping MC distribution analysis (not requested)") - logger.info(" To enable: Set environment variable RUN_MC_DISTRIBUTION=true") - logger.info(" or modify this script to set run_mc_dist = True") + logger.info(" To enable: set run_mc_distribution=True in the RunConfig") + logger.info(" or set environment variable RUN_MC_DISTRIBUTION=true") logger.info("✓ Step 11 complete (skipped)") return @@ -1163,7 +1350,7 @@ def step_11_run_mc_distribution(self): score_matrix.index = self.scored_hubs.index # Run distribution analysis - mc_dist_dir = RESULTS_DIR / f'mc_distribution_{self.timestamp}' + mc_dist_dir = self.config.results_dir / f'mc_distribution_{self.timestamp}' mc_results = run_mc_distribution_analysis( score_matrix=score_matrix, output_dir=str(mc_dist_dir), @@ -1197,28 +1384,175 @@ def step_12_export_results(self): logger.info("STEP 12: EXPORT RESULTS") logger.info("="*80) + results_dir = self.config.results_dir + # CSV - csv_path = RESULTS_DIR / f"hub_prioritization_results_{self.timestamp}.csv" + csv_path = results_dir / f"hub_prioritization_results_{self.timestamp}.csv" export_df = self.scored_hubs.copy() export_df['geometry'] = export_df['geometry'].apply(lambda x: x.wkt if x else None) export_df.to_csv(csv_path, index=False, encoding='utf-8-sig') + self.output_files.append(str(csv_path)) logger.info(f"✓ CSV: {csv_path}") # GeoJSON - geojson_path = RESULTS_DIR / f"hub_results_{self.timestamp}.geojson" + geojson_path = results_dir / f"hub_results_{self.timestamp}.geojson" self.scored_hubs.to_file(geojson_path, driver='GeoJSON') + self.output_files.append(str(geojson_path)) logger.info(f"✓ GeoJSON: {geojson_path}") # Map try: - map_path = RESULTS_DIR / f"hub_map_{self.timestamp}.html" + map_path = results_dir / f"hub_map_{self.timestamp}.html" maps.create_hub_map(self.scored_hubs, color_by='final_score', output_file=str(map_path)) + self.output_files.append(str(map_path)) logger.info(f"✓ Map: {map_path}") except Exception as e: logger.warning(f"Could not create map: {e}") logger.info("✓ Step 12 complete") + # ------------------------------------------------------------------ + # Run logging / manifest + # ------------------------------------------------------------------ + + @staticmethod + def _file_info(path) -> Dict: + """Return name/size/mtime/sha256 metadata for an input file.""" + info = {'path': str(path), 'exists': False} + try: + p = Path(path) + if p.exists(): + stat = p.stat() + info.update({ + 'exists': True, + 'name': p.name, + 'size_bytes': stat.st_size, + 'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(timespec='seconds'), + }) + # SHA-256 (skip very large files for speed) + if stat.st_size <= 500 * 1024 * 1024: + h = hashlib.sha256() + with open(p, 'rb') as fh: + for chunk in iter(lambda: fh.read(1024 * 1024), b''): + h.update(chunk) + info['sha256'] = h.hexdigest() + except Exception as e: + info['error'] = str(e) + return info + + def write_run_manifest(self): + """Write run_log.json + run_log.txt describing this run.""" + cfg = self.config + finished_at = datetime.now() + + inputs = {} + for key in ['transit_nodes', 'lines_modes', 'demand', 'metro_areas', + 'districts', 'taz_zones', 'bus_terminals']: + val = getattr(cfg, key) + inputs[key] = self._file_info(val) if val else None + + summary = {} + if self.scored_hubs is not None: + summary['total_hubs'] = int(len(self.scored_hubs)) + if 'tier' in self.scored_hubs.columns: + summary['hubs_by_tier'] = { + str(k): int(v) + for k, v in self.scored_hubs['tier'].value_counts().items() + } + + manifest = { + 'run_id': cfg.run_id, + 'run_by': cfg.run_by or 'unknown', + 'remarks': cfg.remarks, + 'status': self.status, + 'error_message': self.error_message, + 'started_at': self.started_at.isoformat(timespec='seconds'), + 'finished_at': finished_at.isoformat(timespec='seconds'), + 'duration_seconds': round((finished_at - self.started_at).total_seconds(), 1), + 'output_dir': str(cfg.output_dir), + 'options': { + 'skip_demand_data': cfg.skip_demand_data, + 'skip_spatial_layers': cfg.skip_spatial_layers, + 'skip_demographics': cfg.skip_demographics, + 'run_mc_distribution': cfg.run_mc_distribution, + }, + 'inputs': inputs, + 'outputs': self.output_files, + 'log_file': str(self._run_log_path), + 'results_summary': summary, + } + + # JSON manifest + json_path = cfg.results_dir / "run_log.json" + with open(json_path, 'w', encoding='utf-8') as f: + json.dump(manifest, f, indent=2, ensure_ascii=False) + + # Human-readable text log + txt_path = cfg.results_dir / "run_log.txt" + with open(txt_path, 'w', encoding='utf-8') as f: + f.write("=" * 70 + "\n") + f.write("HUB PRIORITIZATION - RUN LOG\n") + f.write("=" * 70 + "\n\n") + f.write(f"Run ID : {manifest['run_id']}\n") + f.write(f"Run by : {manifest['run_by']}\n") + f.write(f"Status : {manifest['status']}\n") + if self.error_message: + f.write(f"Error : {self.error_message}\n") + f.write(f"Started : {manifest['started_at']}\n") + f.write(f"Finished : {manifest['finished_at']}\n") + f.write(f"Duration : {manifest['duration_seconds']} s\n") + f.write(f"Output dir : {manifest['output_dir']}\n\n") + f.write(f"Remarks:\n{cfg.remarks or '(none)'}\n\n") + f.write("-" * 70 + "\n") + f.write("INPUT FILES USED\n") + f.write("-" * 70 + "\n") + for key, info in inputs.items(): + if not info: + f.write(f" {key:<16}: (not provided)\n") + elif info.get('exists'): + f.write(f" {key:<16}: {info['name']} " + f"({info.get('size_bytes', '?')} bytes, " + f"modified {info.get('modified', '?')})\n") + f.write(f" {'':<16} path: {info['path']}\n") + if 'sha256' in info: + f.write(f" {'':<16} sha256: {info['sha256']}\n") + else: + f.write(f" {key:<16}: MISSING ({info['path']})\n") + f.write("\n") + f.write("-" * 70 + "\n") + f.write("OPTIONS\n") + f.write("-" * 70 + "\n") + for k, v in manifest['options'].items(): + f.write(f" {k:<22}: {v}\n") + f.write("\n") + f.write("-" * 70 + "\n") + f.write("OUTPUT FILES PRODUCED\n") + f.write("-" * 70 + "\n") + for out in self.output_files: + f.write(f" {out}\n") + f.write("\n") + if summary: + f.write("-" * 70 + "\n") + f.write("RESULTS SUMMARY\n") + f.write("-" * 70 + "\n") + f.write(f" Total hubs: {summary.get('total_hubs', '?')}\n") + for tier, n in summary.get('hubs_by_tier', {}).items(): + f.write(f" {tier}: {n}\n") + + self.output_files.append(str(json_path)) + self.output_files.append(str(txt_path)) + logger.info(f"✓ Run log: {json_path}") + logger.info(f"✓ Run log: {txt_path}") + return manifest + + def _detach_log_handler(self): + """Remove the per-run file handler from the shared logger.""" + try: + logger.removeHandler(self._run_log_handler) + self._run_log_handler.close() + except Exception: + pass + def run(self): """Run complete pipeline.""" try: @@ -1235,22 +1569,57 @@ def run(self): self.step_11_run_mc_distribution() self.step_12_export_results() + self.status = "success" + logger.info("\n" + "="*80) logger.info("✅ PIPELINE COMPLETE!") logger.info("="*80) logger.info(f"\nFinal Results:") logger.info(f" Total hubs: {len(self.scored_hubs)}") - logger.info(f" Results: {RESULTS_DIR}") + logger.info(f" Results: {self.config.results_dir}") return self.scored_hubs except Exception as e: + self.status = "error" + self.error_message = str(e) logger.error(f"Pipeline failed: {e}", exc_info=True) raise + finally: + # Always write a manifest (even on failure) and detach the log file + try: + self.write_run_manifest() + except Exception as e: + logger.error(f"Could not write run manifest: {e}") + self._detach_log_handler() + + +def run_pipeline(config: Optional[RunConfig] = None): + """Run the full pipeline for a given configuration. + + This is the single entry point shared by the CLI and the GUI. + + Args: + config: A RunConfig. If None, a default config is built from the + RAW_DATA_DIR paths defined at the top of this module. + + Returns: + The scored hubs GeoDataFrame. + """ + cfg = config if config is not None else default_run_config() + + problems = cfg.validate() + if problems: + for p in problems: + logger.error(f"❌ {p}") + raise FileNotFoundError("; ".join(problems)) + + pipeline = CompleteHubPipeline(cfg) + return pipeline.run() def main(): - """Main entry point.""" + """CLI entry point - runs with default RAW_DATA_DIR paths.""" # Check required files if not INPUT_TRANSIT_NODES.exists(): @@ -1263,9 +1632,8 @@ def main(): logger.info("Please update INPUT_LINES_MODES path in this script") sys.exit(1) - # Run pipeline - pipeline = CompleteHubPipeline() - results = pipeline.run() + # Run pipeline with default configuration + results = run_pipeline(default_run_config()) logger.info("\n🎉 Done! Check results in data/results/")