From 65d7d19d929a82179cd519dcf66b8ddea5c2d761 Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Sun, 14 Jun 2026 08:17:57 +0000
Subject: [PATCH] Add interactive GUI for running the hub prioritization model

Refactor the complete pipeline to be configuration-driven and add a native
desktop GUI (pywebview) for running it without code.

Pipeline (scripts/run_complete_pipeline.py):
- New RunConfig dataclass describing one run: input files, output dir,
  optional-step toggles, and run metadata (run_by, remarks, run_id)
- CompleteHubPipeline now takes a RunConfig; all hardcoded input paths,
  skip-flags and output locations are sourced from it (CLI still works
  via default_run_config())
- resolve_inputs_from_directory(): auto-detect input files in a folder
- run_pipeline() shared entry point for CLI and GUI
- Per-run output directory plus a run log (run_log.json / run_log.txt)
  capturing input files used (with size/mtime/sha256), timestamps, who
  ran it, remarks, options, outputs produced and a results summary
- Dependency self-check now only prompts when run directly (import-safe)

GUI (app/run_model_gui.py + app/gui/*):
- Native folder/file pickers; pick a directory to auto-detect inputs or
  choose each file individually
- Name + remarks fields, option toggles, output-dir selection
- Live log streaming and a results panel with output file shortcuts

Docs: app/RUN_MODEL_GUI.md, README pointer, pywebview in requirements.txt
---
 README.md                        |  12 +
 app/RUN_MODEL_GUI.md             |  77 +++++
 app/gui/app.js                   | 192 ++++++++++++
 app/gui/index.html               |  94 ++++++
 app/gui/style.css                | 122 ++++++++
 app/run_model_gui.py             | 274 ++++++++++++++++++
 requirements.txt                 |   3 +
 scripts/run_complete_pipeline.py | 482 +++++++++++++++++++++++++++----
 8 files changed, 1199 insertions(+), 57 deletions(-)
 create mode 100644 app/RUN_MODEL_GUI.md
 create mode 100644 app/gui/app.js
 create mode 100644 app/gui/index.html
 create mode 100644 app/gui/style.css
 create mode 100644 app/run_model_gui.py

diff --git a/README.md b/README.md
index 8f019cb..5c6b987 100644
--- a/README.md
+++ b/README.md
@@ -66,6 +66,18 @@ print(results.nlargest(10, 'final_score'))
 python scripts/run_pipeline.py
 ```
 
+### Interactive GUI (no code)
+
+Run the model from a desktop app with native file pickers — select inputs,
+enter who is running it and any remarks, and get outputs plus a full run log:
+
+```bash
+pip install -r requirements.txt   # includes pywebview
+python app/run_model_gui.py
+```
+
+See **[app/RUN_MODEL_GUI.md](app/RUN_MODEL_GUI.md)** for details.
+
 ---
 
 ## Project Structure
diff --git a/app/RUN_MODEL_GUI.md b/app/RUN_MODEL_GUI.md
new file mode 100644
index 0000000..c85819a
--- /dev/null
+++ b/app/RUN_MODEL_GUI.md
@@ -0,0 +1,77 @@
+# Run Model GUI
+
+An interactive desktop application for running the full hub prioritization
+pipeline without touching code. The interface is built with HTML/JS and rendered
+in a **native window** via [pywebview](https://pywebview.flowrl.com/), so it can
+open real OS folder/file pickers.
+
+## What it does
+
+1. **Select inputs** — either pick a single folder and let the app auto-detect
+   every input file by name, or pick each file individually with a native
+   "Browse…" dialog.
+2. **Record who & why** — enter your name and free-text remarks describing the
+   run.
+3. **Run the pipeline** — outputs (CSV, GeoJSON, interactive map) are written to
+   an output directory of your choice.
+4. **Run log** — every run writes `run_log.json` and `run_log.txt` into the
+   output folder, recording:
+   - run id + start/finish timestamps + duration
+   - who ran it and their remarks
+   - every input file used, with size, last-modified time and SHA-256 checksum
+   - the options used and the output files produced
+   - a results summary (hub counts by tier) and success/error status
+
+## Install
+
+```bash
+pip install -r requirements.txt
+```
+
+`pywebview` needs a rendering backend:
+
+- **Windows** — uses the built-in Edge WebView2 (usually already present).
+- **macOS** — uses the built-in WebKit (no extra install).
+- **Linux** — install one backend, e.g.
+  `pip install pyqt5 pyqtwebengine` **or** the system
+  `python3-gi gir1.2-webkit2-4.1` packages.
+
+## Run
+
+```bash
+python app/run_model_gui.py
+```
+
+## Input files
+
+| Field | Required | Typical filename |
+|-------|----------|------------------|
+| Transit nodes (CSV) | ✅ | `All_nodes+lines.csv` |
+| Lines & planned modes (CSV) | ✅ | `Lines_and_Planned_Mode.csv` |
+| Demand forecast (Excel/CSV) | optional | `Demand_2050_all.xlsx` |
+| Metro areas (SHP) | optional | `metro.shp` |
+| Districts (SHP) | optional | `districts.shp` |
+| TAZ zones / demographics (SHP) | optional | `TAZ_2050.shp` |
+| Bus terminals (SHP) | optional | `bus_terminals.shp` |
+
+Directory auto-detection matches files by extension and name keywords, so naming
+your files close to the conventions above lets the app find them automatically.
+
+## Output
+
+Default output directory is `data/results/run_<timestamp>/`. Each run is
+self-contained in its own folder, including the intermediate artefacts
+(`processed/`), the final outputs, the streaming `run_<timestamp>.log`, and the
+`run_log.json` / `run_log.txt` manifest.
+
+## How it fits the code
+
+The GUI is a thin front-end over `scripts/run_complete_pipeline.py`:
+
+- `RunConfig` — describes one run (inputs, output dir, options, metadata).
+- `run_pipeline(config)` — the shared entry point used by both the GUI and the
+  command line (`python scripts/run_complete_pipeline.py`).
+- `resolve_inputs_from_directory(dir)` — powers the folder auto-detect.
+
+Running the script directly still works exactly as before, using the default
+paths under `data/raw/`.
diff --git a/app/gui/app.js b/app/gui/app.js
new file mode 100644
index 0000000..0acf0a6
--- /dev/null
+++ b/app/gui/app.js
@@ -0,0 +1,192 @@
+// Front-end logic for the Run Model GUI.
+// Communicates with the Python backend via window.pywebview.api.
+
+let FIELDS = [];          // [{field,label,required}]
+const selected = {};      // field -> path
+let pollTimer = null;
+
+function $(id) { return document.getElementById(id); }
+
+// Wait until pywebview has injected the API
+window.addEventListener('pywebviewready', init);
+
+async function init() {
+    FIELDS = await window.pywebview.api.get_input_fields();
+    buildFileRows();
+    $('outputDir').value = await window.pywebview.api.default_output_dir();
+
+    document.querySelectorAll('input[name="mode"]').forEach(r =>
+        r.addEventListener('change', applyMode));
+    applyMode();
+}
+
+function applyMode() {
+    const mode = document.querySelector('input[name="mode"]:checked').value;
+    $('dirMode').style.display = (mode === 'dir') ? 'block' : 'none';
+    // In directory mode the per-file Browse buttons are hidden (auto-detected);
+    // in files mode they are shown.
+    document.querySelectorAll('.fileBrowse').forEach(b => {
+        b.style.display = (mode === 'files') ? 'inline-block' : 'none';
+    });
+}
+
+function buildFileRows() {
+    const tbody = $('fileRows');
+    tbody.innerHTML = '';
+    FIELDS.forEach(f => {
+        const tr = document.createElement('tr');
+        const tag = f.required
+            ? '<span class="tag req">required</span>'
+            : '<span class="tag opt">optional</span>';
+        tr.innerHTML = `
+            <td>${f.label}${tag}</td>
+            <td><span class="filename missing" id="fn_${f.field}">— not selected —</span></td>
+            <td><button class="btn ghost fileBrowse" onclick="browseFile('${f.field}')">Browse…</button></td>
+        `;
+        tbody.appendChild(tr);
+    });
+}
+
+function setFile(field, path) {
+    selected[field] = path || null;
+    const el = $('fn_' + field);
+    if (path) {
+        el.textContent = path;
+        el.classList.remove('missing');
+    } else {
+        el.textContent = '— not selected —';
+        el.classList.add('missing');
+    }
+}
+
+async function browseFile(field) {
+    const path = await window.pywebview.api.pick_file(field);
+    if (path) setFile(field, path);
+}
+
+async function browseDir() {
+    const dir = await window.pywebview.api.pick_folder();
+    if (!dir) return;
+    $('dirPath').value = dir;
+    await rescan();
+}
+
+async function rescan() {
+    const dir = $('dirPath').value;
+    if (!dir) { alert('Choose a directory first.'); return; }
+    const res = await window.pywebview.api.scan_directory(dir);
+    if (!res.ok) { alert('Scan failed: ' + res.error); return; }
+    FIELDS.forEach(f => setFile(f.field, res.matches[f.field]));
+}
+
+async function browseOutput() {
+    const dir = await window.pywebview.api.pick_folder();
+    if (dir) $('outputDir').value = dir;
+}
+
+async function runModel() {
+    const runBy = $('runBy').value.trim();
+    if (!runBy) { alert('Please enter your name before running.'); return; }
+
+    // Validate required inputs are present
+    const missing = FIELDS.filter(f => f.required && !selected[f.field]).map(f => f.label);
+    if (missing.length) { alert('Missing required inputs:\n- ' + missing.join('\n- ')); return; }
+
+    const payload = {
+        inputs: { ...selected },
+        output_dir: $('outputDir').value.trim(),
+        run_by: runBy,
+        remarks: $('remarks').value,
+        skip_demand_data: $('skip_demand_data').checked,
+        skip_spatial_layers: $('skip_spatial_layers').checked,
+        skip_demographics: $('skip_demographics').checked,
+        run_mc_distribution: $('run_mc_distribution').checked,
+    };
+
+    const res = await window.pywebview.api.start_run(payload);
+    if (!res.ok) { setStatus(res.error, 'err'); return; }
+
+    $('runBtn').disabled = true;
+    $('progressCard').style.display = 'block';
+    $('resultPanel').style.display = 'none';
+    $('logBox').textContent = '';
+    setStatus('Running…');
+    pollTimer = setInterval(poll, 1000);
+}
+
+function setStatus(text, kind) {
+    const el = $('runStatus');
+    el.textContent = text || '';
+    el.className = 'status' + (kind ? ' ' + kind : '');
+}
+
+async function poll() {
+    const s = await window.pywebview.api.poll();
+    const box = $('logBox');
+    box.textContent = s.log || '';
+    box.scrollTop = box.scrollHeight;
+
+    if (s.done) {
+        clearInterval(pollTimer);
+        $('runBtn').disabled = false;
+        if (s.error) {
+            setStatus('Run failed', 'err');
+        } else {
+            setStatus('Run complete', 'ok');
+        }
+        renderResult(s.manifest, s.error);
+    }
+}
+
+function renderResult(manifest, error) {
+    const panel = $('resultPanel');
+    panel.style.display = 'block';
+    if (!manifest) {
+        panel.innerHTML = `<div class="banner err">Run finished but no run log was found.${
+            error ? ' Error: ' + escapeHtml(error) : ''}</div>`;
+        return;
+    }
+
+    const ok = manifest.status === 'success';
+    let html = `<div class="banner ${ok ? 'ok' : 'err'}">
+        ${ok ? '✅ Pipeline complete' : '❌ Pipeline failed'} —
+        run ${escapeHtml(manifest.run_id)} by ${escapeHtml(manifest.run_by)}
+        (${manifest.duration_seconds}s)
+        ${manifest.error_message ? '<br>' + escapeHtml(manifest.error_message) : ''}
+    </div>`;
+
+    const sum = manifest.results_summary || {};
+    if (sum.total_hubs != null) {
+        html += '<div class="summary">';
+        html += `<div class="stat"><b>${sum.total_hubs}</b><span>total hubs</span></div>`;
+        const byTier = sum.hubs_by_tier || {};
+        Object.keys(byTier).forEach(t => {
+            html += `<div class="stat"><b>${byTier[t]}</b><span>${escapeHtml(t)}</span></div>`;
+        });
+        html += '</div>';
+    }
+
+    html += `<p class="hint">Output directory:</p>
+        <ul class="outlist">
+        <li><span class="filename">${escapeHtml(manifest.output_dir)}</span>
+            <button class="btn ghost" onclick="openPath('${jsstr(manifest.output_dir)}')">Open folder</button></li>`;
+    (manifest.outputs || []).forEach(p => {
+        html += `<li><span class="filename">${escapeHtml(p)}</span>
+            <button class="btn ghost" onclick="openPath('${jsstr(p)}')">Open</button></li>`;
+    });
+    html += '</ul>';
+
+    panel.innerHTML = html;
+}
+
+async function openPath(path) {
+    await window.pywebview.api.open_path(path);
+}
+
+function escapeHtml(s) {
+    return String(s == null ? '' : s)
+        .replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
+}
+function jsstr(s) {
+    return String(s == null ? '' : s).replace(/\\/g, '\\\\').replace(/'/g, "\\'");
+}
diff --git a/app/gui/index.html b/app/gui/index.html
new file mode 100644
index 0000000..dd8e51e
--- /dev/null
+++ b/app/gui/index.html
@@ -0,0 +1,94 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Hub Prioritization - Run Model</title>
+    <link rel="stylesheet" href="style.css">
+</head>
+<body>
+<div class="app">
+    <header>
+        <h1>🚆 Hub Prioritization — Run Model</h1>
+        <p class="subtitle">Select inputs, run the pipeline, and produce outputs with a full run log.</p>
+    </header>
+
+    <!-- 1. Run metadata -->
+    <section class="card">
+        <h2>1. Run details</h2>
+        <div class="row">
+            <label for="runBy">Your name <span class="req">*</span></label>
+            <input type="text" id="runBy" placeholder="e.g. Ohad Cohen">
+        </div>
+        <div class="row">
+            <label for="remarks">Remarks / description</label>
+            <textarea id="remarks" rows="3"
+                placeholder="Describe this run: scenario, data version, what you're testing..."></textarea>
+        </div>
+    </section>
+
+    <!-- 2. Inputs -->
+    <section class="card">
+        <h2>2. Input data</h2>
+        <div class="modeToggle">
+            <label><input type="radio" name="mode" value="dir" checked> Select a directory (auto-detect files)</label>
+            <label><input type="radio" name="mode" value="files"> Select files individually</label>
+        </div>
+
+        <!-- Directory mode -->
+        <div id="dirMode" class="modePanel">
+            <div class="row inline">
+                <input type="text" id="dirPath" placeholder="Choose a folder containing the input files..." readonly>
+                <button class="btn" onclick="browseDir()">Browse…</button>
+                <button class="btn ghost" onclick="rescan()">Re-scan</button>
+            </div>
+            <p class="hint">The folder is scanned for the expected files by name and extension.</p>
+        </div>
+
+        <!-- File table (shared by both modes) -->
+        <table class="files">
+            <thead>
+                <tr><th>Input</th><th>File</th><th></th></tr>
+            </thead>
+            <tbody id="fileRows"><!-- filled by JS --></tbody>
+        </table>
+    </section>
+
+    <!-- 3. Options -->
+    <section class="card">
+        <h2>3. Options</h2>
+        <div class="opts">
+            <label><input type="checkbox" id="skip_demand_data"> Skip demand data</label>
+            <label><input type="checkbox" id="skip_spatial_layers"> Skip spatial layers</label>
+            <label><input type="checkbox" id="skip_demographics"> Skip demographics</label>
+            <label><input type="checkbox" id="run_mc_distribution"> Run Monte Carlo distribution analysis</label>
+        </div>
+    </section>
+
+    <!-- 4. Output -->
+    <section class="card">
+        <h2>4. Output location</h2>
+        <div class="row inline">
+            <input type="text" id="outputDir" placeholder="Output directory...">
+            <button class="btn" onclick="browseOutput()">Browse…</button>
+        </div>
+        <p class="hint">Outputs (CSV, GeoJSON, map) and a run log (run_log.json / run_log.txt) are written here.</p>
+    </section>
+
+    <!-- Run -->
+    <section class="runbar">
+        <button class="btn run" id="runBtn" onclick="runModel()">▶ Run model</button>
+        <span id="runStatus" class="status"></span>
+    </section>
+
+    <!-- Progress / results -->
+    <section class="card" id="progressCard" style="display:none;">
+        <h2>Progress</h2>
+        <pre id="logBox" class="logbox"></pre>
+        <div id="resultPanel" style="display:none;"></div>
+    </section>
+</div>
+
+<script src="app.js"></script>
+</body>
+</html>
diff --git a/app/gui/style.css b/app/gui/style.css
new file mode 100644
index 0000000..cf07981
--- /dev/null
+++ b/app/gui/style.css
@@ -0,0 +1,122 @@
+:root {
+    --bg: #f4f6f9;
+    --card: #ffffff;
+    --ink: #1f2933;
+    --muted: #6b7280;
+    --accent: #2563eb;
+    --accent-d: #1d4ed8;
+    --green: #16a34a;
+    --red: #dc2626;
+    --amber: #d97706;
+    --border: #e5e7eb;
+}
+
+* { box-sizing: border-box; }
+
+body {
+    margin: 0;
+    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Arial, sans-serif;
+    background: var(--bg);
+    color: var(--ink);
+    font-size: 14px;
+}
+
+.app { max-width: 980px; margin: 0 auto; padding: 24px 20px 60px; }
+
+header h1 { margin: 0 0 4px; font-size: 24px; }
+.subtitle { margin: 0 0 18px; color: var(--muted); }
+
+.card {
+    background: var(--card);
+    border: 1px solid var(--border);
+    border-radius: 12px;
+    padding: 18px 20px;
+    margin-bottom: 16px;
+    box-shadow: 0 1px 2px rgba(0,0,0,0.03);
+}
+
+.card h2 { margin: 0 0 14px; font-size: 16px; }
+
+.row { margin-bottom: 12px; }
+.row label { display: block; font-weight: 600; margin-bottom: 5px; }
+.row.inline { display: flex; gap: 8px; align-items: center; }
+
+input[type=text], textarea {
+    width: 100%;
+    padding: 9px 11px;
+    border: 1px solid var(--border);
+    border-radius: 8px;
+    font-size: 14px;
+    font-family: inherit;
+    background: #fff;
+}
+input[readonly] { background: #f9fafb; }
+textarea { resize: vertical; }
+
+.req { color: var(--red); }
+.hint { color: var(--muted); font-size: 12px; margin: 6px 0 0; }
+
+.btn {
+    background: var(--accent);
+    color: #fff;
+    border: none;
+    border-radius: 8px;
+    padding: 9px 14px;
+    font-size: 14px;
+    cursor: pointer;
+    white-space: nowrap;
+}
+.btn:hover { background: var(--accent-d); }
+.btn.ghost { background: #eef2ff; color: var(--accent-d); }
+.btn.ghost:hover { background: #e0e7ff; }
+.btn.run { font-size: 16px; padding: 12px 28px; background: var(--green); }
+.btn.run:hover { background: #15803d; }
+.btn:disabled { opacity: 0.5; cursor: not-allowed; }
+
+.modeToggle { display: flex; gap: 22px; margin-bottom: 12px; }
+.modeToggle label { font-weight: 600; cursor: pointer; }
+.modePanel { margin-bottom: 14px; }
+
+table.files { width: 100%; border-collapse: collapse; margin-top: 6px; }
+table.files th, table.files td {
+    text-align: left; padding: 8px 6px; border-bottom: 1px solid var(--border);
+    vertical-align: middle;
+}
+table.files th { color: var(--muted); font-size: 12px; text-transform: uppercase; letter-spacing: .03em; }
+.filename { font-family: ui-monospace, "SF Mono", Menlo, Consolas, monospace; font-size: 12px; }
+.filename.missing { color: var(--muted); font-style: italic; }
+.tag { font-size: 10px; padding: 1px 6px; border-radius: 99px; margin-left: 6px; }
+.tag.req { background: #fee2e2; color: var(--red); }
+.tag.ok  { background: #dcfce7; color: var(--green); }
+.tag.opt { background: #f3f4f6; color: var(--muted); }
+
+.opts { display: grid; grid-template-columns: 1fr 1fr; gap: 10px; }
+.opts label { font-weight: 500; cursor: pointer; }
+
+.runbar { display: flex; align-items: center; gap: 16px; margin: 4px 0 16px; }
+.status { color: var(--muted); }
+.status.err { color: var(--red); font-weight: 600; }
+.status.ok { color: var(--green); font-weight: 600; }
+
+.logbox {
+    background: #0f172a; color: #d1d5db;
+    padding: 14px; border-radius: 8px;
+    height: 260px; overflow-y: auto;
+    font-family: ui-monospace, Menlo, Consolas, monospace; font-size: 12px;
+    white-space: pre-wrap; line-height: 1.45;
+}
+
+#resultPanel { margin-top: 16px; }
+.banner { padding: 12px 14px; border-radius: 8px; font-weight: 600; margin-bottom: 12px; }
+.banner.ok { background: #dcfce7; color: #166534; }
+.banner.err { background: #fee2e2; color: #991b1b; }
+.outlist { list-style: none; padding: 0; margin: 0; }
+.outlist li {
+    display: flex; justify-content: space-between; align-items: center;
+    padding: 7px 0; border-bottom: 1px solid var(--border);
+}
+.outlist .filename { word-break: break-all; }
+.summary { display: flex; gap: 22px; margin: 8px 0 14px; }
+.summary .stat { background: #f9fafb; border: 1px solid var(--border); border-radius: 8px; padding: 10px 16px; }
+.summary .stat b { display: block; font-size: 20px; }
+.summary .stat span { color: var(--muted); font-size: 12px; }
diff --git a/app/run_model_gui.py b/app/run_model_gui.py
new file mode 100644
index 0000000..8521f60
--- /dev/null
+++ b/app/run_model_gui.py
@@ -0,0 +1,274 @@
+"""
+Hub Prioritization - Run Model GUI
+==================================
+
+An interactive desktop GUI (HTML/JS rendered in a native window via pywebview)
+for running the full hub prioritization pipeline.
+
+Features
+--------
+* Native OS folder / file pickers for selecting inputs.
+* Select a single directory and have the app auto-detect all input files,
+  or pick each file individually.
+* Enter who is running the model and free-text remarks.
+* Choose an output directory; every run writes its outputs there together
+  with a run log (run_log.json / run_log.txt) recording which files were used,
+  timestamps, who ran it, the remarks, and the outputs produced.
+* Live log streaming while the pipeline runs.
+
+Run with:
+    python app/run_model_gui.py
+"""
+
+import sys
+import json
+import logging
+import threading
+from pathlib import Path
+from datetime import datetime
+
+# --- Make the project importable ---------------------------------------------
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+try:
+    import webview  # pywebview
+except ImportError:
+    print("ERROR: pywebview is not installed.\n"
+          "Install it with:  pip install pywebview\n"
+          "(On Linux you may also need a backend, e.g. PyGObject + WebKit2GTK,\n"
+          " or pyqt5/pyqtwebengine.)")
+    sys.exit(1)
+
+# Import the pipeline. NOTE: importing this module runs a dependency check that
+# may prompt on the console if core packages are missing - install
+# requirements.txt first.
+from scripts.run_complete_pipeline import (
+    RunConfig,
+    run_pipeline,
+    resolve_inputs_from_directory,
+    INPUT_FILE_HINTS,
+    REQUIRED_INPUTS,
+    logger as pipeline_logger,
+)
+from src.config import RESULTS_DIR
+
+GUI_DIR = Path(__file__).resolve().parent / "gui"
+
+# Ordered list of input fields shown in the GUI (field, label, required)
+INPUT_FIELDS = [
+    ("transit_nodes", "Transit nodes (CSV)", True),
+    ("lines_modes", "Lines & planned modes (CSV)", True),
+    ("demand", "Demand forecast (Excel/CSV)", False),
+    ("metro_areas", "Metro areas (SHP)", False),
+    ("districts", "Districts (SHP)", False),
+    ("taz_zones", "TAZ zones / demographics (SHP)", False),
+    ("bus_terminals", "Bus terminals (SHP)", False),
+]
+
+# File-type filters for the native open dialog, per field
+FILE_TYPES = {
+    "transit_nodes": ("CSV files (*.csv)", "All files (*.*)"),
+    "lines_modes": ("CSV files (*.csv)", "All files (*.*)"),
+    "demand": ("Spreadsheet (*.xlsx;*.xls;*.csv)", "All files (*.*)"),
+    "metro_areas": ("Shapefile (*.shp)", "All files (*.*)"),
+    "districts": ("Shapefile (*.shp)", "All files (*.*)"),
+    "taz_zones": ("Shapefile (*.shp)", "All files (*.*)"),
+    "bus_terminals": ("Shapefile (*.shp)", "All files (*.*)"),
+}
+
+
+class _LogCapture(logging.Handler):
+    """Logging handler that buffers formatted records for the GUI to poll."""
+
+    def __init__(self, sink):
+        super().__init__(level=logging.INFO)
+        self._sink = sink
+        self.setFormatter(logging.Formatter("%(message)s"))
+
+    def emit(self, record):
+        try:
+            self._sink(self.format(record))
+        except Exception:
+            pass
+
+
+class Api:
+    """Methods exposed to the JavaScript front-end via pywebview."""
+
+    def __init__(self):
+        self.window = None
+        self._lock = threading.Lock()
+        self._log_lines = []
+        self._running = False
+        self._done = False
+        self._error = None
+        self._manifest = None
+        self._handler = None
+
+    # -- metadata for the UI ------------------------------------------------
+    def get_input_fields(self):
+        return [{"field": f, "label": l, "required": r} for f, l, r in INPUT_FIELDS]
+
+    def default_output_dir(self):
+        ts = datetime.now().strftime("%Y%m%d_%H%M%S")
+        return str(RESULTS_DIR / f"run_{ts}")
+
+    # -- native dialogs -----------------------------------------------------
+    def pick_folder(self):
+        result = self.window.create_file_dialog(webview.FOLDER_DIALOG)
+        if not result:
+            return None
+        return result[0] if isinstance(result, (list, tuple)) else result
+
+    def pick_file(self, field=None):
+        file_types = FILE_TYPES.get(field, ("All files (*.*)",))
+        result = self.window.create_file_dialog(
+            webview.OPEN_DIALOG, allow_multiple=False, file_types=file_types
+        )
+        if not result:
+            return None
+        return result[0] if isinstance(result, (list, tuple)) else result
+
+    # -- scan a directory for inputs ---------------------------------------
+    def scan_directory(self, directory):
+        """Auto-detect input files inside a directory."""
+        try:
+            return {"ok": True, "matches": resolve_inputs_from_directory(directory)}
+        except Exception as e:
+            return {"ok": False, "error": str(e)}
+
+    # -- run ----------------------------------------------------------------
+    def _append_log(self, line):
+        with self._lock:
+            self._log_lines.append(line)
+
+    def start_run(self, payload):
+        """Validate the payload and start the pipeline in a background thread.
+
+        payload keys: inputs (dict field->path), output_dir, run_by, remarks,
+        skip_demand_data, skip_spatial_layers, skip_demographics,
+        run_mc_distribution.
+        """
+        with self._lock:
+            if self._running:
+                return {"ok": False, "error": "A run is already in progress."}
+
+        inputs = payload.get("inputs", {}) or {}
+
+        # Build the config
+        try:
+            cfg = RunConfig(
+                transit_nodes=inputs.get("transit_nodes") or None,
+                lines_modes=inputs.get("lines_modes") or None,
+                demand=inputs.get("demand") or None,
+                metro_areas=inputs.get("metro_areas") or None,
+                districts=inputs.get("districts") or None,
+                taz_zones=inputs.get("taz_zones") or None,
+                bus_terminals=inputs.get("bus_terminals") or None,
+                output_dir=payload.get("output_dir") or None,
+                skip_demand_data=bool(payload.get("skip_demand_data")),
+                skip_spatial_layers=bool(payload.get("skip_spatial_layers")),
+                skip_demographics=bool(payload.get("skip_demographics")),
+                run_mc_distribution=bool(payload.get("run_mc_distribution")),
+                run_by=payload.get("run_by", "").strip(),
+                remarks=payload.get("remarks", "").strip(),
+            )
+        except Exception as e:
+            return {"ok": False, "error": f"Invalid configuration: {e}"}
+
+        problems = cfg.validate()
+        if problems:
+            return {"ok": False, "error": " ; ".join(problems)}
+
+        # Reset state
+        with self._lock:
+            self._log_lines = []
+            self._running = True
+            self._done = False
+            self._error = None
+            self._manifest = None
+
+        # Attach log capture
+        self._handler = _LogCapture(self._append_log)
+        pipeline_logger.addHandler(self._handler)
+
+        thread = threading.Thread(target=self._worker, args=(cfg,), daemon=True)
+        thread.start()
+        return {"ok": True, "output_dir": str(cfg.output_dir)}
+
+    def _worker(self, cfg):
+        try:
+            self._append_log(f"Starting run {cfg.run_id} (by {cfg.run_by or 'unknown'})...")
+            run_pipeline(cfg)
+            # Read the manifest the pipeline wrote
+            manifest_path = Path(cfg.output_dir) / "run_log.json"
+            if manifest_path.exists():
+                with open(manifest_path, encoding="utf-8") as f:
+                    self._manifest = json.load(f)
+        except Exception as e:
+            with self._lock:
+                self._error = str(e)
+            self._append_log(f"ERROR: {e}")
+            # Try to still load a manifest (the pipeline writes one on failure)
+            try:
+                manifest_path = Path(cfg.output_dir) / "run_log.json"
+                if manifest_path.exists():
+                    with open(manifest_path, encoding="utf-8") as f:
+                        self._manifest = json.load(f)
+            except Exception:
+                pass
+        finally:
+            with self._lock:
+                self._running = False
+                self._done = True
+            if self._handler is not None:
+                pipeline_logger.removeHandler(self._handler)
+                self._handler = None
+
+    def poll(self):
+        """Return current run state for the UI to render."""
+        with self._lock:
+            return {
+                "running": self._running,
+                "done": self._done,
+                "error": self._error,
+                "log": "\n".join(self._log_lines),
+                "manifest": self._manifest,
+            }
+
+    def open_path(self, path):
+        """Open a file/folder in the OS default application."""
+        import os
+        import subprocess
+        try:
+            p = Path(path)
+            target = str(p if p.exists() else p.parent)
+            if sys.platform.startswith("darwin"):
+                subprocess.Popen(["open", target])
+            elif sys.platform.startswith("win"):
+                os.startfile(target)  # type: ignore[attr-defined]
+            else:
+                subprocess.Popen(["xdg-open", target])
+            return {"ok": True}
+        except Exception as e:
+            return {"ok": False, "error": str(e)}
+
+
+def main():
+    api = Api()
+    window = webview.create_window(
+        "Hub Prioritization - Run Model",
+        url=str(GUI_DIR / "index.html"),
+        js_api=api,
+        width=1100,
+        height=860,
+        min_size=(900, 700),
+    )
+    api.window = window
+    webview.start()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
index dcbd7ba..5fe67cf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -38,3 +38,6 @@ sphinx>=6.0.0
 # Streamlit App
 streamlit>=1.28.0
 plotly>=5.18.0
+
+# Desktop GUI for running the model (native window + file dialogs)
+pywebview>=4.4.1
diff --git a/scripts/run_complete_pipeline.py b/scripts/run_complete_pipeline.py
index 5df92d7..29a17fe 100644
--- a/scripts/run_complete_pipeline.py
+++ b/scripts/run_complete_pipeline.py
@@ -90,8 +90,11 @@ def check_and_install_dependencies():
             print("Please install packages manually and run again.")
             sys.exit(1)
 
-# Run dependency check
-check_and_install_dependencies()
+# Run the interactive dependency check only when this script is executed
+# directly. When the module is imported (e.g. by the GUI), we must not block on
+# input(); missing imports will surface naturally as ImportError instead.
+if __name__ == "__main__":
+    check_and_install_dependencies()
 
 # ============================================================================
 # Now import everything else
@@ -114,6 +117,12 @@ def check_and_install_dependencies():
     print("\nPlease ensure the project structure is intact.")
     sys.exit(1)
 
+import json
+import logging
+import hashlib
+from dataclasses import dataclass, field, asdict
+from typing import Optional, Dict, List
+
 import geopandas as gpd
 import pandas as pd
 from datetime import datetime
@@ -159,7 +168,7 @@ def check_and_install_dependencies():
 
 
 # ============================================================================
-# CONFIGURE YOUR INPUT FILE PATHS HERE
+# DEFAULT INPUT FILE PATHS (used by the CLI / as fallbacks)
 # ============================================================================
 
 # Transit network (REQUIRED)
@@ -175,10 +184,161 @@ def check_and_install_dependencies():
 INPUT_TAZ_ZONES = RAW_DATA_DIR / "TAZ_2050.shp"  # With POP_2050 and EMPL_2050
 INPUT_BUS_TERMINALS = RAW_DATA_DIR / "bus_terminals.shp"  # Optional
 
-# Processing options
-SKIP_DEMAND_DATA = False  # Set True if you don't have demand data yet
-SKIP_SPATIAL_LAYERS = False  # Set True if you don't have spatial layers yet
-SKIP_DEMOGRAPHICS = False  # Set True if you don't have TAZ data yet
+
+# ============================================================================
+# RUN CONFIGURATION
+# ============================================================================
+# A RunConfig fully describes one execution of the pipeline: which input files
+# to use, where to write outputs, which optional steps to run, and run metadata
+# (who ran it and why). The GUI builds a RunConfig from user selections; the
+# CLI builds a default one from the paths above.
+
+# Field name -> list of filename match hints (lowercase substrings / patterns)
+# used by resolve_inputs_from_directory() to auto-detect files in a folder.
+INPUT_FILE_HINTS = {
+    'transit_nodes': {'ext': ['.csv'], 'contains': ['nodes']},
+    'lines_modes': {'ext': ['.csv'], 'contains': ['lines', 'mode']},
+    'demand': {'ext': ['.xlsx', '.xls', '.csv'], 'contains': ['demand', 'nodes_w_results']},
+    'metro_areas': {'ext': ['.shp'], 'contains': ['metro']},
+    'districts': {'ext': ['.shp'], 'contains': ['district', 'machoz']},
+    'taz_zones': {'ext': ['.shp'], 'contains': ['taz']},
+    'bus_terminals': {'ext': ['.shp'], 'contains': ['terminal', 'bus_term']},
+}
+
+# Which inputs are mandatory for a run to proceed
+REQUIRED_INPUTS = ['transit_nodes', 'lines_modes']
+
+
+@dataclass
+class RunConfig:
+    """Full configuration for a single pipeline run."""
+
+    # --- Input files (None = not provided) ---
+    transit_nodes: Optional[Path] = None
+    lines_modes: Optional[Path] = None
+    demand: Optional[Path] = None
+    metro_areas: Optional[Path] = None
+    districts: Optional[Path] = None
+    taz_zones: Optional[Path] = None
+    bus_terminals: Optional[Path] = None
+
+    # --- Output ---
+    output_dir: Optional[Path] = None
+
+    # --- Optional-step toggles ---
+    skip_demand_data: bool = False
+    skip_spatial_layers: bool = False
+    skip_demographics: bool = False
+    run_mc_distribution: bool = False
+
+    # --- Run metadata (for the run log) ---
+    run_by: str = ""
+    remarks: str = ""
+    run_id: str = field(default_factory=lambda: datetime.now().strftime('%Y%m%d_%H%M%S'))
+
+    def __post_init__(self):
+        # Normalise provided paths to Path objects
+        for f in ['transit_nodes', 'lines_modes', 'demand', 'metro_areas',
+                  'districts', 'taz_zones', 'bus_terminals', 'output_dir']:
+            val = getattr(self, f)
+            if val is not None and not isinstance(val, Path):
+                setattr(self, f, Path(val))
+
+        # Default output dir: data/results/run_<run_id>/
+        if self.output_dir is None:
+            self.output_dir = RESULTS_DIR / f"run_{self.run_id}"
+
+    @property
+    def results_dir(self) -> Path:
+        """Directory for final outputs (CSV / GeoJSON / map / logs)."""
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+        return self.output_dir
+
+    @property
+    def processed_dir(self) -> Path:
+        """Directory for intermediate artefacts."""
+        d = self.output_dir / "processed"
+        d.mkdir(parents=True, exist_ok=True)
+        return d
+
+    def validate(self) -> List[str]:
+        """Return a list of human-readable problems (empty = OK)."""
+        problems = []
+        for key in REQUIRED_INPUTS:
+            val = getattr(self, key)
+            if val is None:
+                problems.append(f"Missing required input: {key}")
+            elif not Path(val).exists():
+                problems.append(f"Required input not found on disk: {val}")
+        return problems
+
+
+def default_run_config(**overrides) -> RunConfig:
+    """Build a RunConfig from the default RAW_DATA_DIR paths (CLI behaviour)."""
+    cfg = RunConfig(
+        transit_nodes=INPUT_TRANSIT_NODES,
+        lines_modes=INPUT_LINES_MODES,
+        demand=INPUT_DEMAND_CSV if INPUT_DEMAND_CSV.exists() else None,
+        metro_areas=INPUT_METRO_AREAS if INPUT_METRO_AREAS.exists() else None,
+        districts=INPUT_DISTRICTS if INPUT_DISTRICTS.exists() else None,
+        taz_zones=INPUT_TAZ_ZONES if INPUT_TAZ_ZONES.exists() else None,
+        bus_terminals=INPUT_BUS_TERMINALS if INPUT_BUS_TERMINALS.exists() else None,
+    )
+    for k, v in overrides.items():
+        setattr(cfg, k, v)
+    cfg.__post_init__()
+    return cfg
+
+
+def resolve_inputs_from_directory(directory) -> Dict[str, Optional[str]]:
+    """Scan a directory and auto-match expected input files by name/extension.
+
+    Args:
+        directory: Folder to scan (str or Path).
+
+    Returns:
+        Dict mapping each input field name to a matched file path (or None).
+    """
+    directory = Path(directory)
+    found: Dict[str, Optional[str]] = {k: None for k in INPUT_FILE_HINTS}
+
+    if not directory.is_dir():
+        return found
+
+    files = [p for p in directory.rglob('*') if p.is_file()]
+    used: set = set()  # paths already claimed by an earlier field
+
+    def score(path: Path, hints) -> int:
+        """Number of hint words found in the filename stem (0 = no name match)."""
+        stem = path.stem.lower()
+        return sum(1 for c in hints['contains'] if c in stem)
+
+    # Fields are processed in INPUT_FILE_HINTS order, which is arranged so that
+    # the more specific names (e.g. 'nodes') claim their file before a more
+    # generic hint (e.g. 'lines', which also appears in 'All_nodes+lines.csv').
+    for field_name, hints in INPUT_FILE_HINTS.items():
+        best = None
+        best_score = 0
+        for ext in hints['ext']:
+            candidates = [p for p in files
+                          if p.suffix.lower() == ext and p not in used]
+            # Prefer the candidate matching the most hint words
+            for p in candidates:
+                s = score(p, hints)
+                if s > best_score:
+                    best, best_score = p, s
+            if best is not None:
+                break
+            # If nothing name-matched but exactly one file of a single allowed
+            # extension exists, accept it (unambiguous type, e.g. a lone .shp).
+            if len(hints['ext']) == 1 and len(candidates) == 1:
+                best = candidates[0]
+                break
+        if best is not None:
+            used.add(best)
+            found[field_name] = str(best)
+
+    return found
 
 # ============================================================================
 
@@ -186,9 +346,23 @@ def check_and_install_dependencies():
 class CompleteHubPipeline:
     """Complete pipeline with all data sources."""
 
-    def __init__(self):
+    def __init__(self, config: Optional[RunConfig] = None):
+        self.config = config if config is not None else default_run_config()
         self.logger = logger
-        self.timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+        self.timestamp = self.config.run_id
+        self.started_at = datetime.now()
+        self.output_files: List[str] = []
+        self.status = "running"
+        self.error_message = None
+
+        # Attach a per-run log file inside the output directory
+        self._run_log_path = self.config.results_dir / f"run_{self.timestamp}.log"
+        self._run_log_handler = logging.FileHandler(self._run_log_path, encoding='utf-8')
+        self._run_log_handler.setLevel(logging.DEBUG)
+        self._run_log_handler.setFormatter(
+            logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+        )
+        logger.addHandler(self._run_log_handler)
 
         # Data holders
         self.transit_nodes = None
@@ -219,43 +393,45 @@ def step_1_load_all_data(self):
         logger.info("STEP 1: LOAD ALL INPUT DATA")
         logger.info("="*80)
 
+        cfg = self.config
+
         # Transit network (required)
         logger.info("\n1.1: Loading transit nodes...")
-        self.transit_nodes = loaders.load_transit_nodes(INPUT_TRANSIT_NODES)
+        self.transit_nodes = loaders.load_transit_nodes(cfg.transit_nodes)
 
         logger.info("\n1.2: Loading lines and modes...")
-        self.lines_modes = loaders.load_lines_and_modes(INPUT_LINES_MODES)
+        self.lines_modes = loaders.load_lines_and_modes(cfg.lines_modes)
 
         # Demand data (optional)
-        if not SKIP_DEMAND_DATA and INPUT_DEMAND_CSV.exists():
+        if not cfg.skip_demand_data and cfg.demand and Path(cfg.demand).exists():
             logger.info("\n1.3: Loading demand data...")
-            self.demand_data = loaders.load_demand_data(INPUT_DEMAND_CSV)
+            self.demand_data = loaders.load_demand_data(cfg.demand)
         else:
             logger.warning("⚠ Skipping demand data (file not found or disabled)")
 
         # Spatial layers (optional)
-        if not SKIP_SPATIAL_LAYERS:
-            if INPUT_METRO_AREAS.exists():
+        if not cfg.skip_spatial_layers:
+            if cfg.metro_areas and Path(cfg.metro_areas).exists():
                 logger.info("\n1.4: Loading metro areas...")
-                self.metro_areas = loaders.load_metro_areas(INPUT_METRO_AREAS)
+                self.metro_areas = loaders.load_metro_areas(cfg.metro_areas)
             else:
                 logger.warning("⚠ Metro areas not found")
 
-            if INPUT_DISTRICTS.exists():
+            if cfg.districts and Path(cfg.districts).exists():
                 logger.info("\n1.5: Loading districts...")
-                self.districts = loaders.load_districts(INPUT_DISTRICTS)
+                self.districts = loaders.load_districts(cfg.districts)
             else:
                 logger.warning("⚠ Districts not found")
 
-            if INPUT_TAZ_ZONES.exists() and not SKIP_DEMOGRAPHICS:
+            if cfg.taz_zones and Path(cfg.taz_zones).exists() and not cfg.skip_demographics:
                 logger.info("\n1.6: Loading TAZ zones...")
-                self.taz_zones = loaders.load_taz_zones(INPUT_TAZ_ZONES)
+                self.taz_zones = loaders.load_taz_zones(cfg.taz_zones)
             else:
                 logger.warning("⚠ TAZ zones not found or skipped")
 
-            if INPUT_BUS_TERMINALS.exists():
+            if cfg.bus_terminals and Path(cfg.bus_terminals).exists():
                 logger.info("\n1.7: Loading bus terminals...")
-                self.bus_terminals = loaders.load_bus_terminals(INPUT_BUS_TERMINALS)
+                self.bus_terminals = loaders.load_bus_terminals(cfg.bus_terminals)
             else:
                 logger.warning("⚠ Bus terminals not found (optional)")
 
@@ -284,7 +460,7 @@ def step_2_create_h3_hexagons(self):
         )
 
         # Save intermediate
-        output_path = PROCESSED_DATA_DIR / f"h3_hexagons_{self.timestamp}.csv"
+        output_path = self.config.processed_dir / f"h3_hexagons_{self.timestamp}.csv"
         export_df = self.h3_hexagons.copy()
         export_df['geometry'] = export_df['geometry'].apply(lambda x: x.wkt)
         export_df.to_csv(output_path, index=False, encoding='utf-8-sig')
@@ -305,7 +481,7 @@ def step_3_group_hexagons(self):
         self.grouped_hubs = merging.aggregate_groups(hexagons_grouped)
 
         # Save
-        output_path = PROCESSED_DATA_DIR / f"grouped_hubs_{self.timestamp}.csv"
+        output_path = self.config.processed_dir / f"grouped_hubs_{self.timestamp}.csv"
         export_df = self.grouped_hubs.copy()
         export_df['geometry'] = export_df['geometry'].apply(lambda x: x.wkt)
         export_df.to_csv(output_path, index=False, encoding='utf-8-sig')
@@ -324,7 +500,7 @@ def step_4_add_demand_data(self):
         logger.info("STEP 4: ADD DEMAND DATA")
         logger.info("="*80)
 
-        if SKIP_DEMAND_DATA:
+        if self.config.skip_demand_data:
             logger.warning("⚠ Skipping demand data - disabled")
             self.hubs_with_demand = self.grouped_hubs.copy()
             self.hubs_with_demand['TotalDemand'] = 5000  # Placeholder
@@ -595,23 +771,32 @@ def process_sheet(df, sheet_name, region_name):
             logger.info(f"    Processed {rows_processed} rows, {len(result)} unique nodes")
             return result
 
-        # Try to find demand file (Excel or CSV)
-        demand_excel = RAW_DATA_DIR / "Demand_2050_all.xlsx"
-        demand_xls = RAW_DATA_DIR / "Demand_2050_all.xls"
-        # Also check for the original filename pattern
-        demand_excel_alt = RAW_DATA_DIR / "Nodes_w_results_21082025.xlsx"
-        demand_csv = INPUT_DEMAND_CSV
-
-        node_demand = {}
+        # Determine demand file from the run configuration. Excel (multi-sheet)
+        # is preferred; a single CSV is also supported. Fall back to the legacy
+        # default locations only when no demand file was configured.
+        configured_demand = Path(self.config.demand) if self.config.demand else None
 
-        try:
-            # Try Excel file first (with multiple sheets)
-            excel_file = None
-            for ef in [demand_excel, demand_xls, demand_excel_alt]:
+        excel_file = None
+        demand_csv = None
+        if configured_demand and configured_demand.exists():
+            if configured_demand.suffix.lower() in ('.xlsx', '.xls'):
+                excel_file = configured_demand
+            else:
+                demand_csv = configured_demand
+        else:
+            # Legacy fallbacks (CLI without explicit config)
+            for ef in [RAW_DATA_DIR / "Demand_2050_all.xlsx",
+                       RAW_DATA_DIR / "Demand_2050_all.xls",
+                       RAW_DATA_DIR / "Nodes_w_results_21082025.xlsx"]:
                 if ef.exists():
                     excel_file = ef
                     break
+            if excel_file is None and INPUT_DEMAND_CSV.exists():
+                demand_csv = INPUT_DEMAND_CSV
+
+        node_demand = {}
 
+        try:
             if excel_file:
                 logger.info(f"Loading demand from Excel: {excel_file}")
 
@@ -654,7 +839,7 @@ def process_sheet(df, sheet_name, region_name):
                         logger.warning(f"    Error loading sheet '{sheet}': {e}")
 
             # If no Excel or no data found, try CSV
-            elif demand_csv.exists():
+            elif demand_csv and demand_csv.exists():
                 logger.info(f"Loading demand from CSV: {demand_csv}")
                 df_demand = pd.read_csv(demand_csv, encoding='utf-8-sig')
                 logger.info(f"Loaded {len(df_demand)} rows")
@@ -666,7 +851,7 @@ def process_sheet(df, sheet_name, region_name):
 
             else:
                 logger.warning("⚠ No demand file found!")
-                logger.info(f"  Looked for: {demand_excel}, {demand_xls}, {demand_excel_alt}, {demand_csv}")
+                logger.info(f"  Configured demand file: {configured_demand}")
                 self.hubs_with_demand = self.grouped_hubs.copy()
                 self.hubs_with_demand['TotalDemand'] = 5000  # Placeholder
                 self.hubs_with_demand['TotalTransfers'] = 0
@@ -898,7 +1083,7 @@ def step_5_add_spatial_tags(self):
         hubs_proj = hubs_tagged.to_crs('EPSG:2039')
 
         # Tag with district (for region determination)
-        if self.districts is not None and not SKIP_SPATIAL_LAYERS:
+        if self.districts is not None and not self.config.skip_spatial_layers:
             logger.info("Spatial join with districts...")
             try:
                 # Find district column
@@ -963,7 +1148,7 @@ def step_6_add_demographics(self):
         logger.info("STEP 6: ADD DEMOGRAPHIC DATA")
         logger.info("="*80)
 
-        if SKIP_DEMOGRAPHICS or self.taz_zones is None:
+        if self.config.skip_demographics or self.taz_zones is None:
             logger.warning("⚠ Skipping demographics - no TAZ data")
             # Add placeholder columns
             for zone in ['zone1', 'zone2', 'zone3']:
@@ -987,17 +1172,18 @@ def step_6_add_demographics(self):
         processor = InfluenceAreaProcessor()
 
         # Save hubs temporarily
-        temp_csv = PROCESSED_DATA_DIR / f"temp_hubs_{self.timestamp}.csv"
+        temp_csv = self.config.processed_dir / f"temp_hubs_{self.timestamp}.csv"
         export_df = self.hubs_with_demand.copy()
         export_df['geometry'] = export_df['geometry'].apply(lambda x: x.wkt if x else None)
         export_df.to_csv(temp_csv, index=False, encoding='utf-8-sig')
 
         # Process influence areas
         try:
+            _terminals = self.config.bus_terminals
             result_gdf = processor.process_full_pipeline(
                 hubs_csv=str(temp_csv),
-                taz_shp=str(INPUT_TAZ_ZONES),
-                terminals_shp=str(INPUT_BUS_TERMINALS) if INPUT_BUS_TERMINALS.exists() else None,
+                taz_shp=str(self.config.taz_zones),
+                terminals_shp=str(_terminals) if _terminals and Path(_terminals).exists() else None,
                 output_csv=None  # Don't save intermediate file
             )
             self.hubs_with_demographics = result_gdf
@@ -1125,13 +1311,14 @@ def step_11_run_mc_distribution(self):
         logger.info("STEP 11: MONTE CARLO DISTRIBUTION ANALYSIS (OPTIONAL)")
         logger.info("="*80)
 
-        # Check if user wants to run MC distribution
-        run_mc_dist = os.environ.get('RUN_MC_DISTRIBUTION', 'false').lower() == 'true'
+        # Check if user wants to run MC distribution (config flag or env var)
+        run_mc_dist = self.config.run_mc_distribution or \
+            os.environ.get('RUN_MC_DISTRIBUTION', 'false').lower() == 'true'
 
         if not run_mc_dist:
             logger.info("⊘ Skipping MC distribution analysis (not requested)")
-            logger.info("  To enable: Set environment variable RUN_MC_DISTRIBUTION=true")
-            logger.info("  or modify this script to set run_mc_dist = True")
+            logger.info("  To enable: set run_mc_distribution=True in the RunConfig")
+            logger.info("  or set environment variable RUN_MC_DISTRIBUTION=true")
             logger.info("✓ Step 11 complete (skipped)")
             return
 
@@ -1163,7 +1350,7 @@ def step_11_run_mc_distribution(self):
             score_matrix.index = self.scored_hubs.index
 
             # Run distribution analysis
-            mc_dist_dir = RESULTS_DIR / f'mc_distribution_{self.timestamp}'
+            mc_dist_dir = self.config.results_dir / f'mc_distribution_{self.timestamp}'
             mc_results = run_mc_distribution_analysis(
                 score_matrix=score_matrix,
                 output_dir=str(mc_dist_dir),
@@ -1197,28 +1384,175 @@ def step_12_export_results(self):
         logger.info("STEP 12: EXPORT RESULTS")
         logger.info("="*80)
 
+        results_dir = self.config.results_dir
+
         # CSV
-        csv_path = RESULTS_DIR / f"hub_prioritization_results_{self.timestamp}.csv"
+        csv_path = results_dir / f"hub_prioritization_results_{self.timestamp}.csv"
         export_df = self.scored_hubs.copy()
         export_df['geometry'] = export_df['geometry'].apply(lambda x: x.wkt if x else None)
         export_df.to_csv(csv_path, index=False, encoding='utf-8-sig')
+        self.output_files.append(str(csv_path))
         logger.info(f"✓ CSV: {csv_path}")
 
         # GeoJSON
-        geojson_path = RESULTS_DIR / f"hub_results_{self.timestamp}.geojson"
+        geojson_path = results_dir / f"hub_results_{self.timestamp}.geojson"
         self.scored_hubs.to_file(geojson_path, driver='GeoJSON')
+        self.output_files.append(str(geojson_path))
         logger.info(f"✓ GeoJSON: {geojson_path}")
 
         # Map
         try:
-            map_path = RESULTS_DIR / f"hub_map_{self.timestamp}.html"
+            map_path = results_dir / f"hub_map_{self.timestamp}.html"
             maps.create_hub_map(self.scored_hubs, color_by='final_score', output_file=str(map_path))
+            self.output_files.append(str(map_path))
             logger.info(f"✓ Map: {map_path}")
         except Exception as e:
             logger.warning(f"Could not create map: {e}")
 
         logger.info("✓ Step 12 complete")
 
+    # ------------------------------------------------------------------
+    # Run logging / manifest
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _file_info(path) -> Dict:
+        """Return name/size/mtime/sha256 metadata for an input file."""
+        info = {'path': str(path), 'exists': False}
+        try:
+            p = Path(path)
+            if p.exists():
+                stat = p.stat()
+                info.update({
+                    'exists': True,
+                    'name': p.name,
+                    'size_bytes': stat.st_size,
+                    'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(timespec='seconds'),
+                })
+                # SHA-256 (skip very large files for speed)
+                if stat.st_size <= 500 * 1024 * 1024:
+                    h = hashlib.sha256()
+                    with open(p, 'rb') as fh:
+                        for chunk in iter(lambda: fh.read(1024 * 1024), b''):
+                            h.update(chunk)
+                    info['sha256'] = h.hexdigest()
+        except Exception as e:
+            info['error'] = str(e)
+        return info
+
+    def write_run_manifest(self):
+        """Write run_log.json + run_log.txt describing this run."""
+        cfg = self.config
+        finished_at = datetime.now()
+
+        inputs = {}
+        for key in ['transit_nodes', 'lines_modes', 'demand', 'metro_areas',
+                    'districts', 'taz_zones', 'bus_terminals']:
+            val = getattr(cfg, key)
+            inputs[key] = self._file_info(val) if val else None
+
+        summary = {}
+        if self.scored_hubs is not None:
+            summary['total_hubs'] = int(len(self.scored_hubs))
+            if 'tier' in self.scored_hubs.columns:
+                summary['hubs_by_tier'] = {
+                    str(k): int(v)
+                    for k, v in self.scored_hubs['tier'].value_counts().items()
+                }
+
+        manifest = {
+            'run_id': cfg.run_id,
+            'run_by': cfg.run_by or 'unknown',
+            'remarks': cfg.remarks,
+            'status': self.status,
+            'error_message': self.error_message,
+            'started_at': self.started_at.isoformat(timespec='seconds'),
+            'finished_at': finished_at.isoformat(timespec='seconds'),
+            'duration_seconds': round((finished_at - self.started_at).total_seconds(), 1),
+            'output_dir': str(cfg.output_dir),
+            'options': {
+                'skip_demand_data': cfg.skip_demand_data,
+                'skip_spatial_layers': cfg.skip_spatial_layers,
+                'skip_demographics': cfg.skip_demographics,
+                'run_mc_distribution': cfg.run_mc_distribution,
+            },
+            'inputs': inputs,
+            'outputs': self.output_files,
+            'log_file': str(self._run_log_path),
+            'results_summary': summary,
+        }
+
+        # JSON manifest
+        json_path = cfg.results_dir / "run_log.json"
+        with open(json_path, 'w', encoding='utf-8') as f:
+            json.dump(manifest, f, indent=2, ensure_ascii=False)
+
+        # Human-readable text log
+        txt_path = cfg.results_dir / "run_log.txt"
+        with open(txt_path, 'w', encoding='utf-8') as f:
+            f.write("=" * 70 + "\n")
+            f.write("HUB PRIORITIZATION - RUN LOG\n")
+            f.write("=" * 70 + "\n\n")
+            f.write(f"Run ID        : {manifest['run_id']}\n")
+            f.write(f"Run by        : {manifest['run_by']}\n")
+            f.write(f"Status        : {manifest['status']}\n")
+            if self.error_message:
+                f.write(f"Error         : {self.error_message}\n")
+            f.write(f"Started       : {manifest['started_at']}\n")
+            f.write(f"Finished      : {manifest['finished_at']}\n")
+            f.write(f"Duration      : {manifest['duration_seconds']} s\n")
+            f.write(f"Output dir    : {manifest['output_dir']}\n\n")
+            f.write(f"Remarks:\n{cfg.remarks or '(none)'}\n\n")
+            f.write("-" * 70 + "\n")
+            f.write("INPUT FILES USED\n")
+            f.write("-" * 70 + "\n")
+            for key, info in inputs.items():
+                if not info:
+                    f.write(f"  {key:<16}: (not provided)\n")
+                elif info.get('exists'):
+                    f.write(f"  {key:<16}: {info['name']}  "
+                            f"({info.get('size_bytes', '?')} bytes, "
+                            f"modified {info.get('modified', '?')})\n")
+                    f.write(f"  {'':<16}  path: {info['path']}\n")
+                    if 'sha256' in info:
+                        f.write(f"  {'':<16}  sha256: {info['sha256']}\n")
+                else:
+                    f.write(f"  {key:<16}: MISSING ({info['path']})\n")
+            f.write("\n")
+            f.write("-" * 70 + "\n")
+            f.write("OPTIONS\n")
+            f.write("-" * 70 + "\n")
+            for k, v in manifest['options'].items():
+                f.write(f"  {k:<22}: {v}\n")
+            f.write("\n")
+            f.write("-" * 70 + "\n")
+            f.write("OUTPUT FILES PRODUCED\n")
+            f.write("-" * 70 + "\n")
+            for out in self.output_files:
+                f.write(f"  {out}\n")
+            f.write("\n")
+            if summary:
+                f.write("-" * 70 + "\n")
+                f.write("RESULTS SUMMARY\n")
+                f.write("-" * 70 + "\n")
+                f.write(f"  Total hubs: {summary.get('total_hubs', '?')}\n")
+                for tier, n in summary.get('hubs_by_tier', {}).items():
+                    f.write(f"    {tier}: {n}\n")
+
+        self.output_files.append(str(json_path))
+        self.output_files.append(str(txt_path))
+        logger.info(f"✓ Run log: {json_path}")
+        logger.info(f"✓ Run log: {txt_path}")
+        return manifest
+
+    def _detach_log_handler(self):
+        """Remove the per-run file handler from the shared logger."""
+        try:
+            logger.removeHandler(self._run_log_handler)
+            self._run_log_handler.close()
+        except Exception:
+            pass
+
     def run(self):
         """Run complete pipeline."""
         try:
@@ -1235,22 +1569,57 @@ def run(self):
             self.step_11_run_mc_distribution()
             self.step_12_export_results()
 
+            self.status = "success"
+
             logger.info("\n" + "="*80)
             logger.info("✅ PIPELINE COMPLETE!")
             logger.info("="*80)
             logger.info(f"\nFinal Results:")
             logger.info(f"  Total hubs: {len(self.scored_hubs)}")
-            logger.info(f"  Results: {RESULTS_DIR}")
+            logger.info(f"  Results: {self.config.results_dir}")
 
             return self.scored_hubs
 
         except Exception as e:
+            self.status = "error"
+            self.error_message = str(e)
             logger.error(f"Pipeline failed: {e}", exc_info=True)
             raise
+        finally:
+            # Always write a manifest (even on failure) and detach the log file
+            try:
+                self.write_run_manifest()
+            except Exception as e:
+                logger.error(f"Could not write run manifest: {e}")
+            self._detach_log_handler()
+
+
+def run_pipeline(config: Optional[RunConfig] = None):
+    """Run the full pipeline for a given configuration.
+
+    This is the single entry point shared by the CLI and the GUI.
+
+    Args:
+        config: A RunConfig. If None, a default config is built from the
+            RAW_DATA_DIR paths defined at the top of this module.
+
+    Returns:
+        The scored hubs GeoDataFrame.
+    """
+    cfg = config if config is not None else default_run_config()
+
+    problems = cfg.validate()
+    if problems:
+        for p in problems:
+            logger.error(f"❌ {p}")
+        raise FileNotFoundError("; ".join(problems))
+
+    pipeline = CompleteHubPipeline(cfg)
+    return pipeline.run()
 
 
 def main():
-    """Main entry point."""
+    """CLI entry point - runs with default RAW_DATA_DIR paths."""
 
     # Check required files
     if not INPUT_TRANSIT_NODES.exists():
@@ -1263,9 +1632,8 @@ def main():
         logger.info("Please update INPUT_LINES_MODES path in this script")
         sys.exit(1)
 
-    # Run pipeline
-    pipeline = CompleteHubPipeline()
-    results = pipeline.run()
+    # Run pipeline with default configuration
+    results = run_pipeline(default_run_config())
 
     logger.info("\n🎉 Done! Check results in data/results/")