Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@pensar/surface",
"version": "0.2.1",
"version": "0.2.2",
"description": "White-box endpoint discovery for source code repositories.",
"license": "MIT",
"type": "module",
Expand Down
12 changes: 12 additions & 0 deletions scripts/fixtures/django-urls/blog/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from django.urls import path, re_path
from . import views

# Mixes the idiomatic raw-string `r"..."` route form with plain-quoted routes
# and an re_path() regex route, so the extractor is exercised against every
# common Python string-literal style.
urlpatterns = [
path(r"posts/", views.post_list),
path(r"posts/new/", views.post_create),
path("drafts/", views.draft_list),
re_path(r"^posts/(?P<pk>[0-9]+)/$", views.post_detail),
]
6 changes: 6 additions & 0 deletions scripts/fixtures/django-urls/blog/views.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from rest_framework.decorators import api_view


@api_view(["GET", "POST"])
def feed(request):
return None
10 changes: 10 additions & 0 deletions scripts/fixtures/django-urls/blogsite/urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from django.contrib import admin
from django.urls import path, include
from . import views

# Root URLconf: plain-quoted routes plus an include() into the blog app.
urlpatterns = [
path("admin/", admin.site.urls),
path("", views.home),
path("blog/", include("blog.urls")),
]
1 change: 1 addition & 0 deletions scripts/fixtures/django-urls/manage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Django project marker (used for framework detection only).
53 changes: 53 additions & 0 deletions src/extractors/django.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import { describe, expect, test } from "bun:test";
import { resolve } from "path";
import { django } from "./django.ts";
import { createScanContext } from "../scan-context.ts";

const FIXTURE = resolve(import.meta.dir, "../../scripts/fixtures/django-urls");

function extract(fixturePath: string = FIXTURE) {
const ctx = createScanContext(fixturePath);
return django.extract(ctx);
}

describe("django url extraction", () => {
const endpoints = extract();
const byPath = (p: string) => endpoints.find((e) => e.path === p);

test('extracts routes declared with string-prefixed literals (r"...")', () => {
// blog.urls is mounted under "blog/" via include(), so routes are prefixed.
expect(byPath("/blog/posts")).toBeDefined();
expect(byPath("/blog/posts/new")).toBeDefined();
});

test("extracts plain-quoted routes", () => {
expect(byPath("/admin")).toBeDefined();
expect(byPath("/blog/drafts")).toBeDefined();
});

test("extracts re_path() regex routes", () => {
expect(endpoints.find((e) => e.handler === "post_detail")).toBeDefined();
});

test("applies include() prefix to mounted app routes", () => {
// Unprefixed it would be "/posts"; the include mounts blog.urls at /blog.
expect(byPath("/posts")).toBeUndefined();
expect(byPath("/blog/posts")).toBeDefined();
});

test("extracts @api_view endpoints with their methods", () => {
const feed = endpoints.filter((e) => e.handler === "feed");
expect(feed.map((e) => e.method).sort()).toEqual(["GET", "POST"]);
});

test("captures every path()/re_path() route declared (recall guard)", () => {
for (const p of [
"/admin",
"/blog/posts",
"/blog/posts/new",
"/blog/drafts",
]) {
expect(byPath(p)).toBeDefined();
}
});
});
10 changes: 7 additions & 3 deletions src/extractors/django.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,14 @@ export const django: Extractor = {
return name === "urls.py" || name === "routes.py";
});

// Build prefix map from include() calls
// Build prefix map from include() calls.
// `[rRbBuUfF]*` allows Python string-literal prefixes (raw `r"..."`,
// f-strings, bytes, unicode) before the quote — these are common in
// urls.py and would otherwise be skipped. `(?:path|re_path)` covers both
// Django route helpers.
const includePrefixes: Record<string, string> = {};
const pathIncludeRe =
/path\s*\(\s*['"]([^'"]*)['"]\s*,\s*include\s*\(\s*['"]([^'"]+)['"]/g;
/(?:path|re_path)\s*\(\s*[rRbBuUfF]*['"]([^'"]*)['"]\s*,\s*include\s*\(\s*[rRbBuUfF]*['"]([^'"]+)['"]/g;

for (const f of urlFiles) {
const content = ctx.readFile(f);
Expand All @@ -35,7 +39,7 @@ export const django: Extractor = {
}

const directPathRe =
/path\s*\(\s*['"]([^'"]*)['"]\s*,\s*(?!include)(\w[\w.]*)/g;
/(?:path|re_path)\s*\(\s*[rRbBuUfF]*['"]([^'"]*)['"]\s*,\s*(?!include)(\w[\w.]*)/g;

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

re_path regex stored as URL

Medium Severity

directPathRe now matches re_path() with the same pipeline as path(), so the first capture is the full regex string (anchors, groups, quantifiers), not a URL segment. After include() prefixing and normalizePath, the reported endpoint path is a regex literal rather than a routable URL shape consumers expect for discovery and scanning.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 46b3e63. Configure here.

const apiViewRe =
/@api_view\s*\(\s*\[([^\]]*)\]\s*\)(.*?)def\s+(\w+)\s*\(/gs;

Expand Down
Loading