Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions apps/ingestion/src/app.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { Hono } from "hono";
import { type MiddlewareHandler } from "hono";
import { cors } from "hono/cors";
import { handleIngest } from "./handlers/ingest.js";
import { handleMetrics } from "./handlers/metrics.js";
Expand Down Expand Up @@ -28,10 +29,10 @@ function incrementRequestCount(req: Request) {
}

function requestCounter() {
return async function (c: any, next: any) {
return async function (c, next) {
incrementRequestCount(c.req.raw);
await next();
};
} satisfies MiddlewareHandler;
}

function getCorsOrigin(origin: string | undefined): string | undefined {
Expand Down
47 changes: 37 additions & 10 deletions apps/ingestion/src/db/client.ts
Original file line number Diff line number Diff line change
@@ -1,21 +1,48 @@
import { drizzle } from "drizzle-orm/neon-http";
import { neon } from "@neondatabase/serverless";
import { events, resume, visitors, visitorEvents } from "./schema";
import { events, visitors } from "./schema";

function getDbClient() {
function createDb(databaseUrl: string) {
const sql = neon(databaseUrl);
return drizzle(sql, { schema: { events, visitors } });
}

type DbClient = ReturnType<typeof createDb>;

function createFallbackDb(): DbClient {
return {
select() {
return {
from() {
return [];
},
};
},
insert() {
Comment on lines +12 to +21
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion (bug_risk): Fallback DB client shape diverges from the real drizzle client and may cause subtle runtime differences.

The fallback client is cast to DbClient, but its query methods are synchronous and return plain arrays/objects, while the real drizzle client returns Promises (e.g. select().from(), insert().values().returning()). This can break code that uses Promise-specific behavior (.then, Promise.all, assuming async execute()). Consider making these methods async and returning Promise-based results that mirror drizzle’s shapes to keep the async contract consistent.

return {
values() {
return {
returning() {
return [];
},
};
},
};
},
async execute() {
return { rows: [] };
},
} as unknown as DbClient;
}

function getDbClient(): DbClient {
const databaseUrl = process.env.DATABASE_URL;

if (!databaseUrl) {
// Return dummy client during build or test if url is missing, or if env not set
return {
select: () => ({ from: () => [] }),
insert: () => ({ values: () => ({ returning: () => [] }) }),
execute: async () => ({ rows: [] }),
} as any;
return createFallbackDb();
}

const sql = neon(databaseUrl);
return drizzle(sql, { schema: { events, resume, visitors, visitorEvents } });
return createDb(databaseUrl);
}

export const db = getDbClient();
13 changes: 2 additions & 11 deletions apps/ingestion/src/db/index.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,4 @@
export { events } from "./schema";
export { resume, visitors, visitorEvents } from "./schema";
export type {
Event,
NewEvent,
Resume,
NewResume,
Visitor,
NewVisitor,
VisitorEvent,
NewVisitorEvent,
} from "./schema";
export { visitors } from "./schema";
export type { Event, NewEvent, Visitor, NewVisitor } from "./schema";
export { db } from "./client";
10 changes: 10 additions & 0 deletions apps/ingestion/src/db/migrations/0001_archive_legacy_tables.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
DO $$
BEGIN
IF to_regclass('public.resume') IS NOT NULL AND to_regclass('public.old_resume') IS NULL THEN
ALTER TABLE public.resume RENAME TO old_resume;
END IF;

IF to_regclass('public.visitor_events') IS NOT NULL AND to_regclass('public.old_visitor_events') IS NULL THEN
ALTER TABLE public.visitor_events RENAME TO old_visitor_events;
END IF;
END $$;
49 changes: 0 additions & 49 deletions apps/ingestion/src/db/schema.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import {
pgTable,
bigserial,
bigint,
integer,
text,
timestamp,
Expand Down Expand Up @@ -45,27 +44,6 @@ export const events = pgTable(
}),
);

export const resume = pgTable("resume", {
id: bigserial("id", { mode: "bigint" }).primaryKey(),
event: text("event").notNull(),
ts: timestamp("ts", { withTimezone: true }).notNull().defaultNow(),
path: text("path"),
referrer: text("referrer"),
origin: text("origin"),
host: text("host"),
isLocalhost: boolean("is_localhost"),
ua: text("ua"),
lang: text("lang"),
ipHash: text("ip_hash"),
visitorId: text("visitor_id"),
country: text("country"),
region: text("region"),
city: text("city"),
deviceType: text("device_type"),
resumeVersion: text("resume_version"),
meta: jsonb("meta"),
});

export const visitors = pgTable(
"visitors",
{
Expand Down Expand Up @@ -96,34 +74,7 @@ export const visitors = pgTable(
}),
);

export const visitorEvents = pgTable(
"visitor_events",
{
id: bigserial("id", { mode: "bigint" }).primaryKey(),
visitorId: bigint("visitor_id", { mode: "bigint" })
.notNull()
.references(() => visitors.id),
eventType: text("event_type").notNull(),
ts: timestamp("ts", { withTimezone: true }).notNull().defaultNow(),
path: text("path"),
referrer: text("referrer"),
sessionId: text("session_id"),
durationMs: integer("duration_ms"),
meta: jsonb("meta"),
},
(table) => ({
visitorIdIdx: index("idx_visitor_events_visitor_id").on(table.visitorId),
eventTypeIdx: index("idx_visitor_events_event_type").on(table.eventType),
sessionIdIdx: index("idx_visitor_events_session_id").on(table.sessionId),
tsIdx: index("idx_visitor_events_ts").on(table.ts),
}),
);

export type Event = typeof events.$inferSelect;
export type NewEvent = typeof events.$inferInsert;
export type Resume = typeof resume.$inferSelect;
export type NewResume = typeof resume.$inferInsert;
export type Visitor = typeof visitors.$inferSelect;
export type NewVisitor = typeof visitors.$inferInsert;
export type VisitorEvent = typeof visitorEvents.$inferSelect;
export type NewVisitorEvent = typeof visitorEvents.$inferInsert;
48 changes: 25 additions & 23 deletions apps/ingestion/src/db/seed.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { db } from "./client";
import { events } from "./schema";
import { events, type NewEvent } from "./schema";

const PROJECTS = ["analytics-demo.io", "skriuw.dev", "personal-site.com"];
const BROWSERS = [
Expand Down Expand Up @@ -30,34 +30,46 @@ const REGIONS = {
Netherlands: ["North Holland", "South Holland", "Utrecht", "North Brabant"],
"United Kingdom": ["London", "Manchester", "Birmingham", "Scotland"],
Germany: ["Berlin", "Bavaria", "Hamburg", "Hesse"],
};
} satisfies Partial<Record<(typeof COUNTRIES)[number], readonly string[]>>;
const CITIES = {
California: ["San Francisco", "Los Angeles", "San Diego"],
"New York": ["New York City", "Buffalo", "Rochester"],
"North Holland": ["Amsterdam", "Haarlem", "Zaanstad"],
London: ["Westminster", "Camden", "Greenwich"],
};
} satisfies Record<string, readonly string[]>;
const PATHS = ["/", "/features", "/pricing", "/docs", "/blog", "/about"];
const REFERRERS = ["https://google.com", "https://twitter.com", "https://github.com", ""];
const DEVICES = ["desktop", "mobile", "tablet"];

function getRandomItem<T>(arr: T[]): T {
type SeedValue = string | number | boolean | Record<string, string | number | boolean>;
type SeedMeta = Record<string, SeedValue>;

function getRandomItem<T>(arr: readonly T[]): T {
return arr[Math.floor(Math.random() * arr.length)];
}

function getRandomInt(min: number, max: number): number {
return Math.floor(Math.random() * (max - min + 1)) + min;
}

function getRegions(country: string): readonly string[] {
if (country in REGIONS) return REGIONS[country as keyof typeof REGIONS];
return ["Unknown"];
}

function getCities(region: string): readonly string[] {
if (region in CITIES) return CITIES[region as keyof typeof CITIES];
return ["Unknown"];
}

async function seed() {
console.log("🌱 Starting advanced seed...");
console.log("Starting advanced seed...");

const now = new Date();
const thirtyDaysAgo = new Date(now.getTime() - 30 * 24 * 60 * 60 * 1000);

const entries: any[] = [];
const entries: NewEvent[] = [];

// Generate 200 visitors
for (let i = 0; i < 200; i++) {
const visitorId = `v-${Math.random().toString(36).substring(7)}`;
const projectId = getRandomItem(PROJECTS);
Expand All @@ -68,14 +80,12 @@ async function seed() {
const screenSize = deviceType === "mobile" ? "390x844" : "1920x1080";
const viewport = deviceType === "mobile" ? "390x844" : "1440x900";

// Geographic data for this visitor
const country = getRandomItem(COUNTRIES);
const regionList = (REGIONS as any)[country] || ["Unknown"];
const region = getRandomItem(regionList) as string;
const cityList = (CITIES as any)[region] || ["Unknown"];
const regionList = getRegions(country);
const region = getRandomItem(regionList);
const cityList = getCities(region);
const city = getRandomItem(cityList);

// Advanced traits for this visitor (Segmentation & A/B tests)
const userProperties = {
plan: Math.random() > 0.8 ? "pro" : "free",
role: Math.random() > 0.9 ? "admin" : "user",
Expand All @@ -84,7 +94,6 @@ async function seed() {
pricing_color: Math.random() > 0.5 ? "green" : "red",
};

// Each visitor has 1-5 sessions
const sessionCount = getRandomInt(1, 5);
for (let s = 0; s < sessionCount; s++) {
const sessionId = `s-${Math.random().toString(36).substring(7)}`;
Expand All @@ -94,15 +103,13 @@ async function seed() {
);
let lastEventTime = sessionStart;

// Force a funnel for ~30% of sessions
const isFunnelSession = Math.random() > 0.7;
let funnelStep = 0;

const pageviewCount = getRandomInt(1, 10);
for (let p = 0; p < pageviewCount; p++) {
let path = getRandomItem(PATHS);

// Force funnel path progression
if (isFunnelSession) {
if (funnelStep === 0) {
path = "/";
Expand All @@ -117,8 +124,7 @@ async function seed() {
const timestamp = new Date(lastEventTime.getTime() + getRandomInt(1000, 60000));
lastEventTime = timestamp;

// Base metadata (injecting user attributes & experiments uniformly)
const meta: any = {
const meta: SeedMeta = {
browser: browser.name,
browserVersion: browser.version,
os: os.name,
Expand All @@ -135,7 +141,6 @@ async function seed() {
meta.utmMedium = "social";
}

// 1. Pageview
entries.push({
projectId,
type: "pageview",
Expand All @@ -152,7 +157,6 @@ async function seed() {
meta,
});

// 2. Funnel conversion events
if (isFunnelSession && funnelStep === 1) {
entries.push({
projectId,
Expand Down Expand Up @@ -193,7 +197,6 @@ async function seed() {

funnelStep++;

// 3. Search events
if (Math.random() > 0.8) {
entries.push({
projectId,
Expand All @@ -215,7 +218,6 @@ async function seed() {
});
}

// 4. Performance & Engagement
if (Math.random() > 0.3) {
entries.push({
projectId,
Expand Down Expand Up @@ -268,12 +270,12 @@ async function seed() {
await db.insert(events).values(entries);
}

console.log("Advanced seed complete!");
console.log("Advanced seed complete!");
process.exit(0);
}

seed().catch((err) => {
console.error("Seed failed:");
console.error("Seed failed:");
console.error(err);
process.exit(1);
});
25 changes: 15 additions & 10 deletions apps/ingestion/src/handlers/ingest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,26 +19,31 @@ async function getDb(): Promise<DbModule> {
return dbModule;
}

export function __setDbModule(mock: any) {
export function __setDbModule(mock: DbModule) {
dbModule = mock;
}

const ORIGIN_ALLOWLIST: string[] = process.env.ORIGIN_ALLOWLIST
? process.env.ORIGIN_ALLOWLIST.split(",").map(function (o) {
return o.trim();
})
: [];

const INTERNAL_IPS: string[] = process.env.INTERNAL_IP_HASHES
? process.env.INTERNAL_IP_HASHES.split(",").map(function (h) {
return h.trim();
})
: [];

function getOriginAllowlist(): string[] {
if (!process.env.ORIGIN_ALLOWLIST) return [];

return process.env.ORIGIN_ALLOWLIST.split(",")
.map(function (origin) {
return origin.trim();
})
.filter(Boolean);
}

function isOriginAllowed(origin: string | null): boolean {
if (ORIGIN_ALLOWLIST.length === 0) return true;
if (origin && ORIGIN_ALLOWLIST.includes(origin)) return true;
return true;
const allowlist = getOriginAllowlist();
if (allowlist.length === 0) return true;
if (origin && allowlist.includes(origin)) return true;
return false;
}

function isInternalTraffic(ipHash: string | null, localhost: boolean): boolean {
Expand Down
Loading
Loading