diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..0b6b788 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,76 @@ +name: CI + +on: + push: + branches: ["**"] + pull_request: + +jobs: + lint-and-typecheck: + name: Lint & Type Check + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: pip + + - name: Install dev tools + run: pip install ruff mypy + + - name: ruff lint + run: ruff check scripts/ + + - name: mypy type check + run: mypy scripts/ --ignore-missing-imports + + syntax-check: + name: Script Syntax Validation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + + - name: Check Python syntax + run: | + python -m py_compile scripts/scrape_positioning.py + python -m py_compile scripts/analyze_positioning.py + python -m py_compile scripts/render_positioning.py + python -m py_compile scripts/run_pipeline.py + echo "All scripts pass syntax check" + + deps-install: + name: Dependency Installation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + cache: pip + + - name: Install system deps for WeasyPrint + run: | + sudo apt-get update -q + sudo apt-get install -y -q \ + libpango-1.0-0 libpangoft2-1.0-0 libharfbuzz0b \ + libffi-dev libjpeg-dev libopenjp2-7 + + - name: Install Python dependencies + run: pip install -r requirements.txt + + - name: Install Playwright browsers + run: playwright install chromium --with-deps + + - name: Verify imports + run: | + python -c "import playwright; print('playwright ok')" + python -c "import anthropic; print('anthropic ok')" + python -c "import openai; print('openai ok')" + python -c "import weasyprint; print('weasyprint ok')" diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..fc0e34e --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,100 @@ +# AGENTS.md — Neobank Positioning Engine Skill + +## How Claude Code Invokes This Skill + +Claude Code reads `SKILL.md` at session start. The skill triggers on phrases like: + +- `position [company]` +- `positioning audit for [company]` +- `competitive positioning: A vs B vs C` + +Claude then drives the three-script pipeline interactively, pausing for human input at defined checkpoints. + +--- + +## Invocation Pattern + +``` +Trigger phrase → Phase 1 (Scrape) → [PAUSE] → Phase 2 (Analyze) → [PAUSE] → Phase 3 (Render) → Done +``` + +Claude calls scripts via `subprocess`-style Bash tool calls — it does NOT import them as modules. + +--- + +## Pause Points (Human Review Required) + +| After Phase | What Claude Shows | What Human Decides | +| ----------- | ------------------------------------------- | --------------------------------------------------- | +| Scrape | List of URLs scraped + data quality summary | Approve data, add missing competitors, or re-scrape | +| Analyze | Positioning map + white space findings | Confirm strategic direction before generating copy | +| Render | PDF path + brief summary | Accept output or request revisions to framework | + +Claude MUST stop and surface results at each pause point. Do not auto-chain all three stages without human confirmation. + +--- + +## Error Handling Expectations + +| Error | Expected Behavior | +| --------------------------------------------- | ---------------------------------------------------------------------------------------- | +| Playwright scrape fails (bot protection, 403) | Fall back to `references/neobank-messaging-map.md`, note fallback in output | +| Thin scrape data (<200 chars body text) | Warn user, suggest manual input or app store description as supplement | +| API key missing | Exit with clear message: "Set ANTHROPIC_API_KEY or OPENROUTER_API_KEY in .env" | +| API rate limit / timeout | Retry once (MAX_RETRIES=1), then surface error with raw scraped data for manual analysis | +| WeasyPrint missing system deps | Output HTML only, note PDF generation failed with install instructions | +| `output/` directory missing | Scripts create it automatically — not an error | + +--- + +## Output Contract + +`analyze_positioning.py` MUST emit JSON matching the schema in `examples/kast-brief.json`. Fields: + +``` +company, date, competitors[], executive_summary, +positioning_elements{}, territory_map{}, white_space[], +messaging_framework{ + positioning_statements[], one_liners[], value_propositions[], + audience_messaging[], what_not_to_say[], competitive_responses[] +} +``` + +`render_positioning.py` reads this schema. Breaking changes to the schema break rendering. + +--- + +## Custom Framework Adaptation + +To adapt this skill for non-neobank verticals: + +1. **References** — Replace `references/neobank-messaging-map.md` with pre-mapped data for the new vertical. Keep `references/positioning-frameworks.md` (Moore/Dunford frameworks are universal). + +2. **Territory dimensions** — The four dimensions (audience spectrum, trust model, value prop core, brand personality) are defined in the analyze prompt inside `scripts/analyze_positioning.py`. Search for `POSITIONING_DIMENSIONS` or equivalent prompt section and rewrite for your vertical. + +3. **Scrape targets** — `scrape_positioning.py` scrapes generic web content — no neobank-specific logic. Works for any B2C website. + +4. **Skill trigger** — Update `SKILL.md` front matter `description` field and trigger phrases in the `## Trigger` section. + +5. **Examples** — Add a real brief JSON to `examples/` for the new vertical so Claude has a concrete output reference. + +No code changes required for new competitors — they're passed as CLI arguments. + +--- + +## Running Outside Claude Code + +The skill scripts are plain Python. Any agent or CI system can invoke them: + +```bash +# Full pipeline (non-interactive) +python scripts/run_pipeline.py "Company" "https://url.com" \ + --competitors "Rival1:https://r1.com" "Rival2:https://r2.com" + +# Stage by stage +python scripts/scrape_positioning.py "Company" "https://url.com" +python scripts/analyze_positioning.py output/company-positioning.json --competitors rival1 rival2 +python scripts/render_positioning.py output/company-brief.json +``` + +All scripts exit 0 on success, non-zero on failure. Errors go to stderr. diff --git a/CLAUDE.md b/CLAUDE.md index 38178f9..291d43f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,20 +1,71 @@ # Neobank Positioning Engine -## Build/Run -- Install: `pip install -r requirements.txt && playwright install chromium` -- Scrape: `python scripts/scrape_positioning.py "Company" "https://url.com"` -- Analyze: `python scripts/analyze_positioning.py output/slug-positioning.json --competitors comp1 comp2` -- Render: `python scripts/render_positioning.py output/slug-brief.json` -- Pipeline: `python scripts/run_pipeline.py "Company" "https://url.com" --competitors "Comp:https://url"` +## Run Commands + +| Phase | Command | +| ------------- | ---------------------------------------------------------------------------------------------- | +| Scrape | `python scripts/scrape_positioning.py "Company" "https://url.com"` | +| Analyze | `python scripts/analyze_positioning.py output/slug-positioning.json --competitors comp1 comp2` | +| Render | `python scripts/render_positioning.py output/slug-brief.json` | +| Full pipeline | `python scripts/run_pipeline.py "Company" "https://url.com" --competitors "Comp:https://url"` | + +## Environment Setup + +```bash +# Create virtualenv +python -m venv .venv && source .venv/bin/activate + +# Install system deps (macOS) +brew install pango cairo gdk-pixbuf libffi + +# Install system deps (Ubuntu/Debian) +sudo apt-get install -y libpango-1.0-0 libpangoft2-1.0-0 libharfbuzz0b libffi-dev + +# Install Python deps +pip install -r requirements.txt +playwright install chromium +``` + +## API Key Configuration + +```bash +cp .env.example .env +# Edit .env and set at least one: +# ANTHROPIC_API_KEY=sk-ant-... (preferred, ~$0.10-0.20/run) +# OPENROUTER_API_KEY=sk-or-... (fallback, ~$0.15-0.35/run) +``` + +Auto-detection order: Anthropic key → OpenRouter key. Override with `--provider anthropic|openrouter` and `--model `. ## Architecture + Three-stage pipeline: scrape (Playwright) → analyze (Claude API) → render (WeasyPrint). Scripts in `scripts/`, references in `references/`, examples in `examples/`, output in `output/`. Brief schema defined by `examples/kast-brief.json`. ## Key Patterns + - Scripts use `sys.argv` / `argparse` at module level with `if __name__ == "__main__"` guards - `run_pipeline.py` chains scripts via `subprocess.run()` (not imports) because of asyncio + argv patterns - `analyze_positioning.py` auto-detects API provider from env vars (Anthropic preferred, OpenRouter fallback) - `.env` loaded by a simple parser in `analyze_positioning.py`, no dotenv dependency - `slugify()` is duplicated across scripts intentionally (no shared utils module) + +## Common Issues + +| Issue | Fix | +| ---------------------------- | ----------------------------------------------------------------------------------- | +| `WeasyPrint` import error | Install system deps: `brew install pango cairo` (macOS) or apt-get equivalents | +| Playwright browser not found | `playwright install chromium` | +| Scraper returns empty body | Site uses heavy JS — increase `asyncio.sleep` in `scrape_page()` or add manual data | +| API auth error | Check `.env` exists and key has no trailing whitespace | +| `output/` not found | Scripts create it automatically on first run | +| PDF blank / CSS broken | WeasyPrint version mismatch — pin to `weasyprint~=63.0` | + +## Linting + +```bash +pip install ruff mypy +ruff check scripts/ +mypy scripts/ --ignore-missing-imports +``` diff --git a/README.md b/README.md index 5253cd8..f92af38 100644 --- a/README.md +++ b/README.md @@ -4,18 +4,46 @@ **Find out what your competitors actually claim, what territory they leave open, and exactly how to position against them.** -The Positioning Engine scrapes crypto neobank websites, extracts every positioning signal (headlines, value props, CTAs, proof points, brand voice, and — critically — what they *don't* say), maps the competitive landscape, identifies unclaimed territory, and generates a complete messaging framework with real copy you can use. +The Positioning Engine scrapes crypto neobank websites, extracts every positioning signal (headlines, value props, CTAs, proof points, brand voice, and — critically — what they _don't_ say), maps the competitive landscape, identifies unclaimed territory, and generates a complete messaging framework with real copy you can use. This isn't a brand strategy template. Every recommendation traces back to what competitors actually say on their websites and what positioning territory is genuinely unclaimed. --- +## Architecture + +```mermaid +flowchart LR + subgraph Phase1["Phase 1 — Scrape"] + S1[scrape_positioning.py\nPlaywright headless browser] + S1 -->|output/{slug}-positioning.json| D1[(Positioning\nJSON)] + end + + subgraph Phase2["Phase 2 — Analyze"] + D1 --> A1[analyze_positioning.py\nClaude API] + REF[references/\nFrameworks + Map] --> A1 + A1 -->|output/{slug}-brief.json| D2[(Brief\nJSON)] + end + + subgraph Phase3["Phase 3 — Render"] + D2 --> R1[render_positioning.py\nWeasyPrint] + R1 --> PDF[output/{slug}-brief.pdf] + R1 --> HTML[output/{slug}-brief.html] + end + + Phase1 --> Phase2 --> Phase3 +``` + +`run_pipeline.py` chains all three stages in one command via `subprocess.run()`. + +--- + ## What You Get A positioning brief covering: - **Executive Summary** — The one-paragraph strategic read. Where you stand, what's working, what's not, and the single biggest opportunity. -- **Positioning Elements** — For each company analyzed: claims, target audience, benefits, proof points, brand voice, CTA language, and omissions (what they *don't* mention is often more revealing). +- **Positioning Elements** — For each company analyzed: claims, target audience, benefits, proof points, brand voice, CTA language, and omissions (what they _don't_ mention is often more revealing). - **Territory Map** — Every company scored on four dimensions: audience spectrum (crypto-native vs mainstream), trust model (self-custody vs custodial), value proposition core (yield vs utility), and brand personality (technical vs lifestyle). - **White Space Analysis** — Positioning territories that are unclaimed or weakly held, with evidence for each. - **Messaging Framework** — Positioning statements (Geoffrey Moore format), one-liner options, value propositions with proof points, audience-specific messaging, what NOT to say, and competitive response playbooks. @@ -24,70 +52,130 @@ The output is a structured JSON file that renders into a styled HTML/PDF brief. --- -## How It Works +## Installation -Three stages, fully automated: +### System Dependencies -``` -Scrape → Analyze → Render -``` +WeasyPrint requires native libraries. Install before `pip install`. -1. **Scrape** — A headless browser visits each company's website and pulls positioning data: headlines, subheadlines, meta descriptions, CTAs, body copy, and proof points. -2. **Analyze** — The scraped data is fed to Claude (Anthropic's AI) along with positioning frameworks and competitive intelligence. The AI performs the full analysis: extracting positioning elements, mapping territories, finding white space, and generating the messaging framework. -3. **Render** — The structured brief is rendered as a clean HTML document and PDF. +**macOS:** ---- +```bash +brew install pango cairo gdk-pixbuf libffi +``` -## Getting Started +**Ubuntu / Debian:** -### Prerequisites +```bash +sudo apt-get install -y \ + libpango-1.0-0 libpangoft2-1.0-0 libharfbuzz0b \ + libffi-dev libjpeg-dev libopenjp2-7 +``` -- **Python 3.10+** — [Download Python](https://www.python.org/downloads/) if you don't have it -- **An API key** — Either an [Anthropic API key](https://console.anthropic.com/) or an [OpenRouter API key](https://openrouter.ai/) +**Windows:** Use WSL2 with the Ubuntu instructions above. -### Setup +### Python Setup ```bash -# Clone the repo git clone https://github.com/growgami/Neobank-Positioning-Engine-Skill.git cd Neobank-Positioning-Engine-Skill -# Install dependencies +python -m venv .venv && source .venv/bin/activate # Windows: .venv\Scripts\activate pip install -r requirements.txt playwright install chromium +``` + +### Environment Variables -# Add your API key +```bash cp .env.example .env -# Open .env and paste your ANTHROPIC_API_KEY or OPENROUTER_API_KEY ``` -### Run It +Edit `.env`: -**One command, full pipeline:** +| Variable | Required | Description | +| -------------------- | -------------- | ------------------------------------------------- | +| `ANTHROPIC_API_KEY` | One of the two | Direct Anthropic API — preferred. ~$0.10–0.20/run | +| `OPENROUTER_API_KEY` | One of the two | OpenRouter fallback. ~$0.15–0.35/run | + +The engine auto-detects which key is available (Anthropic takes priority). Override with `--provider` and `--model` flags. + +--- + +## Usage + +### Full Pipeline (one command) ```bash python scripts/run_pipeline.py "KAST" "https://kast.xyz" \ --competitors "Revolut:https://revolut.com" "Crypto.com:https://crypto.com" ``` -This scrapes all websites, runs the analysis, and renders the brief. Output lands in `output/`. +Output lands in `output/kast-brief.json` and `output/kast-brief.pdf`. -**Or run each stage separately** (useful if you want to re-analyze without re-scraping): +### Stage by Stage ```bash -# Scrape +# 1. Scrape target + each competitor python scripts/scrape_positioning.py "KAST" "https://kast.xyz" python scripts/scrape_positioning.py "Revolut" "https://revolut.com" +python scripts/scrape_positioning.py "Crypto.com" "https://crypto.com" -# Analyze +# 2. Analyze (re-run without re-scraping to iterate on analysis) python scripts/analyze_positioning.py output/kast-positioning.json \ - --competitors revolut + --competitors revolut crypto-com -# Render +# 3. Render to PDF python scripts/render_positioning.py output/kast-brief.json ``` -**Cost:** Each run costs roughly **$0.10–0.35** in API fees, depending on the model and number of competitors. +Running stages separately is useful when you want to re-analyze with different instructions without re-scraping (scraping is slow; analysis is fast). + +### Optional Flags + +| Flag | Values | Default | +| ------------ | ------------------------- | ---------------------------- | +| `--provider` | `anthropic`, `openrouter` | auto-detected from env | +| `--model` | any model ID | `claude-sonnet-4-5-20250514` | + +--- + +## Output Format + +`output/{slug}-brief.json` — matches the schema in `examples/kast-brief.json`: + +```json +{ + "company": "KAST", + "date": "2026-02-17", + "competitors": ["Revolut", "Crypto.com"], + "executive_summary": "...", + "positioning_elements": { "KAST": { ... }, "Revolut": { ... } }, + "territory_map": { "dimensions": [...], "scores": { ... } }, + "white_space": [ { "territory": "...", "evidence": "..." } ], + "messaging_framework": { + "positioning_statements": [...], + "one_liners": [...], + "value_propositions": [...], + "audience_messaging": [...], + "what_not_to_say": [...], + "competitive_responses": [...] + } +} +``` + +`render_positioning.py` converts this to `output/{slug}-brief.html` and `output/{slug}-brief.pdf`. + +--- + +## Cost + +| Provider | Model | Cost per Run | +| ----------------------- | ----------------- | ------------ | +| Anthropic (recommended) | Claude Sonnet 4.5 | ~$0.10–0.20 | +| OpenRouter | Claude Sonnet 4.5 | ~$0.15–0.35 | + +Cost scales with the number of competitors (more scraped content = more tokens). 3–5 competitors is the typical range. --- @@ -109,17 +197,6 @@ Claude will scrape, analyze, and render, pausing for your input at each stage. T --- -## API Providers - -| Provider | Env Variable | Default Model | Cost per Run | -|----------|-------------|---------------|-------------| -| **Anthropic** (recommended) | `ANTHROPIC_API_KEY` | Claude Sonnet 4.5 | ~$0.10–0.20 | -| **OpenRouter** (alternative) | `OPENROUTER_API_KEY` | Claude Sonnet 4.5 | ~$0.15–0.35 | - -The engine auto-detects which key is available. Override with `--provider` and `--model` if needed. - ---- - ## Repo Structure ``` @@ -135,7 +212,11 @@ neobank-positioning-engine/ ├── examples/ │ ├── kast-brief.json # Example: KAST vs Revolut, Crypto.com, Wirex │ └── avici-brief.json # Example: Avici vs Bleap, KAST, RedotPay +├── .github/workflows/ci.yml # Ruff, mypy, syntax + dep install checks ├── SKILL.md # Claude Code skill definition +├── CLAUDE.md # Dev environment and run commands +├── AGENTS.md # Agent invocation guide and error handling +├── pyproject.toml # Project metadata, ruff + mypy config ├── requirements.txt ├── .env.example └── LICENSE diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8968bbd --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,38 @@ +[build-system] +requires = ["setuptools>=68"] +build-backend = "setuptools.backends.legacy:build" + +[project] +name = "neobank-positioning-engine" +version = "0.1.0" +description = "Scrape competitor websites, analyze positioning, generate messaging frameworks for crypto neobanks" +readme = "README.md" +license = { file = "LICENSE" } +requires-python = ">=3.10" +dependencies = [ + "playwright~=1.50", + "weasyprint~=63.0", + "anthropic~=0.49", + "openai~=1.66", +] + +[project.optional-dependencies] +dev = [ + "ruff>=0.4", + "mypy>=1.10", +] + +[tool.ruff] +target-version = "py310" +line-length = 100 + +[tool.ruff.lint] +select = ["E", "F", "W", "I", "UP"] +ignore = ["E501"] + +[tool.mypy] +python_version = "3.10" +ignore_missing_imports = true +warn_unused_ignores = true +warn_return_any = false +check_untyped_defs = true