From 589a20bba00af91c9bb8542427f57af6ba856a5e Mon Sep 17 00:00:00 2001 From: Kyle Ryan Date: Wed, 17 Jun 2026 21:06:43 -0400 Subject: [PATCH 1/2] feat(django): support string-prefixed route literals and re_path() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The route regexes required a quote immediately after `path(`, so routes written with a Python string-literal prefix — `path(r"...")`, f-strings, bytes — were skipped. The raw-string `r"..."` form is idiomatic in Django urls.py, so affected projects lost most of their routes. - Allow optional string-literal prefixes (`[rRbBuUfF]*`) before the quote in both `directPathRe` and `pathIncludeRe`. - Match `re_path()` in addition to `path()`. - Add a django-urls fixture and django.test.ts covering raw-string routes, re_path, include() prefixing, @api_view, and a recall guard. Co-Authored-By: Claude Opus 4.8 (1M context) --- package.json | 2 +- scripts/fixtures/django-urls/blog/urls.py | 12 +++++ scripts/fixtures/django-urls/blog/views.py | 6 +++ scripts/fixtures/django-urls/blogsite/urls.py | 10 ++++ scripts/fixtures/django-urls/manage.py | 1 + src/extractors/django.test.ts | 53 +++++++++++++++++++ src/extractors/django.ts | 10 ++-- 7 files changed, 90 insertions(+), 4 deletions(-) create mode 100644 scripts/fixtures/django-urls/blog/urls.py create mode 100644 scripts/fixtures/django-urls/blog/views.py create mode 100644 scripts/fixtures/django-urls/blogsite/urls.py create mode 100644 scripts/fixtures/django-urls/manage.py create mode 100644 src/extractors/django.test.ts diff --git a/package.json b/package.json index c82b61c..1918cff 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@pensar/surface", - "version": "0.2.1", + "version": "0.2.2", "description": "White-box endpoint discovery for source code repositories.", "license": "MIT", "type": "module", diff --git a/scripts/fixtures/django-urls/blog/urls.py b/scripts/fixtures/django-urls/blog/urls.py new file mode 100644 index 0000000..3a18d5a --- /dev/null +++ b/scripts/fixtures/django-urls/blog/urls.py @@ -0,0 +1,12 @@ +from django.urls import path, re_path +from . import views + +# Mixes the idiomatic raw-string `r"..."` route form with plain-quoted routes +# and an re_path() regex route, so the extractor is exercised against every +# common Python string-literal style. +urlpatterns = [ + path(r"posts/", views.post_list), + path(r"posts/new/", views.post_create), + path("drafts/", views.draft_list), + re_path(r"^posts/(?P[0-9]+)/$", views.post_detail), +] diff --git a/scripts/fixtures/django-urls/blog/views.py b/scripts/fixtures/django-urls/blog/views.py new file mode 100644 index 0000000..996126d --- /dev/null +++ b/scripts/fixtures/django-urls/blog/views.py @@ -0,0 +1,6 @@ +from rest_framework.decorators import api_view + + +@api_view(["GET", "POST"]) +def feed(request): + return None diff --git a/scripts/fixtures/django-urls/blogsite/urls.py b/scripts/fixtures/django-urls/blogsite/urls.py new file mode 100644 index 0000000..60ebadf --- /dev/null +++ b/scripts/fixtures/django-urls/blogsite/urls.py @@ -0,0 +1,10 @@ +from django.contrib import admin +from django.urls import path, include +from . import views + +# Root URLconf: plain-quoted routes plus an include() into the blog app. +urlpatterns = [ + path("admin/", admin.site.urls), + path("", views.home), + path("blog/", include("blog.urls")), +] diff --git a/scripts/fixtures/django-urls/manage.py b/scripts/fixtures/django-urls/manage.py new file mode 100644 index 0000000..945ec59 --- /dev/null +++ b/scripts/fixtures/django-urls/manage.py @@ -0,0 +1 @@ +# Django project marker (used for framework detection only). diff --git a/src/extractors/django.test.ts b/src/extractors/django.test.ts new file mode 100644 index 0000000..dc5a1fc --- /dev/null +++ b/src/extractors/django.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, test } from "bun:test"; +import { resolve } from "path"; +import { django } from "./django.ts"; +import { createScanContext } from "../scan-context.ts"; + +const FIXTURE = resolve(import.meta.dir, "../../scripts/fixtures/django-urls"); + +function extract(fixturePath: string = FIXTURE) { + const ctx = createScanContext(fixturePath); + return django.extract(ctx); +} + +describe("django url extraction", () => { + const endpoints = extract(); + const byPath = (p: string) => endpoints.find((e) => e.path === p); + + test("extracts routes declared with string-prefixed literals (r\"...\")", () => { + // blog.urls is mounted under "blog/" via include(), so routes are prefixed. + expect(byPath("/blog/posts")).toBeDefined(); + expect(byPath("/blog/posts/new")).toBeDefined(); + }); + + test("extracts plain-quoted routes", () => { + expect(byPath("/admin")).toBeDefined(); + expect(byPath("/blog/drafts")).toBeDefined(); + }); + + test("extracts re_path() regex routes", () => { + expect(endpoints.find((e) => e.handler === "post_detail")).toBeDefined(); + }); + + test("applies include() prefix to mounted app routes", () => { + // Unprefixed it would be "/posts"; the include mounts blog.urls at /blog. + expect(byPath("/posts")).toBeUndefined(); + expect(byPath("/blog/posts")).toBeDefined(); + }); + + test("extracts @api_view endpoints with their methods", () => { + const feed = endpoints.filter((e) => e.handler === "feed"); + expect(feed.map((e) => e.method).sort()).toEqual(["GET", "POST"]); + }); + + test("captures every path()/re_path() route declared (recall guard)", () => { + for (const p of [ + "/admin", + "/blog/posts", + "/blog/posts/new", + "/blog/drafts", + ]) { + expect(byPath(p)).toBeDefined(); + } + }); +}); diff --git a/src/extractors/django.ts b/src/extractors/django.ts index 2f952f4..e05c8f1 100644 --- a/src/extractors/django.ts +++ b/src/extractors/django.ts @@ -21,10 +21,14 @@ export const django: Extractor = { return name === "urls.py" || name === "routes.py"; }); - // Build prefix map from include() calls + // Build prefix map from include() calls. + // `[rRbBuUfF]*` allows Python string-literal prefixes (raw `r"..."`, + // f-strings, bytes, unicode) before the quote — these are common in + // urls.py and would otherwise be skipped. `(?:path|re_path)` covers both + // Django route helpers. const includePrefixes: Record = {}; const pathIncludeRe = - /path\s*\(\s*['"]([^'"]*)['"]\s*,\s*include\s*\(\s*['"]([^'"]+)['"]/g; + /(?:path|re_path)\s*\(\s*[rRbBuUfF]*['"]([^'"]*)['"]\s*,\s*include\s*\(\s*[rRbBuUfF]*['"]([^'"]+)['"]/g; for (const f of urlFiles) { const content = ctx.readFile(f); @@ -35,7 +39,7 @@ export const django: Extractor = { } const directPathRe = - /path\s*\(\s*['"]([^'"]*)['"]\s*,\s*(?!include)(\w[\w.]*)/g; + /(?:path|re_path)\s*\(\s*[rRbBuUfF]*['"]([^'"]*)['"]\s*,\s*(?!include)(\w[\w.]*)/g; const apiViewRe = /@api_view\s*\(\s*\[([^\]]*)\]\s*\)(.*?)def\s+(\w+)\s*\(/gs; From 46b3e6384df6bd08c2e8c354eedda435dce97d54 Mon Sep 17 00:00:00 2001 From: Kyle Ryan Date: Wed, 17 Jun 2026 21:07:41 -0400 Subject: [PATCH 2/2] style: prettier format django.test.ts Co-Authored-By: Claude Opus 4.8 (1M context) --- src/extractors/django.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/extractors/django.test.ts b/src/extractors/django.test.ts index dc5a1fc..f47e915 100644 --- a/src/extractors/django.test.ts +++ b/src/extractors/django.test.ts @@ -14,7 +14,7 @@ describe("django url extraction", () => { const endpoints = extract(); const byPath = (p: string) => endpoints.find((e) => e.path === p); - test("extracts routes declared with string-prefixed literals (r\"...\")", () => { + test('extracts routes declared with string-prefixed literals (r"...")', () => { // blog.urls is mounted under "blog/" via include(), so routes are prefixed. expect(byPath("/blog/posts")).toBeDefined(); expect(byPath("/blog/posts/new")).toBeDefined();