Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion test/cli/daemon-kubo-restart-race.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,11 @@ describe("daemon shutdown with a late signal-exit registrant (issue #70)", () =>
const shutdownRes = await fetch(`${SIGEXIT_KUBO_API_URL}/shutdown`, { method: "POST" });
expect(shutdownRes.status).toBe(200);

// This is a setup precondition, not the assertion under test: after /shutdown the
// daemon must auto-restart kubo before we can verify it dies on SIGTERM. The detect
// window must absorb the injected PKC_CLI_TEST_IPFS_READY_DELAY_MS (5s) plus restart
// time on a heavily-loaded CI runner — locally this lands in ~13s, but ubuntu CI has
// been observed taking >30s, so widen to 60s (well within the test's 120s cap, issue #77).
const kuboRestarted = await waitForCondition(
async () => {
try {
Expand All @@ -256,7 +261,7 @@ describe("daemon shutdown with a late signal-exit registrant (issue #70)", () =>
return false;
}
},
30000,
60000,
500
);
expect(kuboRestarted).toBe(true);
Expand Down
45 changes: 28 additions & 17 deletions test/cli/daemon.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,7 @@ describe("bitsocial daemon kubo restart cleanup", async () => {
// without running exit hooks (asyncExitHook/process.on("exit")), so the daemon has no
// opportunity to clean up kubo. On Unix, SIGTERM is caught by the exit hook which runs
// killKuboProcess(). The normal user path (Ctrl+C/SIGINT) works on all platforms.
it.skipIf(process.platform === "win32")("stops kubo when daemon exits during a restart cycle", { timeout: 60000 }, async () => {
it.skipIf(process.platform === "win32")("stops kubo when daemon exits during a restart cycle", { timeout: 120000 }, async () => {
const previousDelay = process.env["PKC_CLI_TEST_IPFS_READY_DELAY_MS"];
process.env["PKC_CLI_TEST_IPFS_READY_DELAY_MS"] = "5000";

Expand All @@ -404,17 +404,42 @@ describe("bitsocial daemon kubo restart cleanup", async () => {
);
expect(typeof daemonProcess.pid).toBe("number");

// Diagnostics for the flaky-on-CI failures (issue #70/#77): the daemon's DEBUG output is
// redirected to its log files (not stderr), so dump those — they show which kubo pids were
// spawned, restarted and killed, and crucially the timing. Without this the CI log only
// contains the bare assertion. Fired on BOTH precondition failures below.
const dumpDiagnostics = async (reason: string) => {
const proc = daemonProcess!;
const tail = (text: string | undefined, lines: number) => (text ?? "").split("\n").slice(-lines).join("\n");
console.log(`[restart-cleanup diagnostics] ${reason}`);
console.log(`[restart-cleanup diagnostics] daemon exitCode=${proc.exitCode} signalCode=${proc.signalCode}`);
console.log(`[restart-cleanup diagnostics] daemon stdout tail:\n${tail(proc.capturedStdout, 40)}`);
console.log(`[restart-cleanup diagnostics] daemon stderr tail:\n${tail(proc.capturedStderr, 60)}`);
for (const logFile of await fsPromise.readdir(cleanupLogDir).catch(() => [] as string[])) {
const content = await fsPromise.readFile(path.join(cleanupLogDir, logFile), "utf-8").catch(() => "");
console.log(`[restart-cleanup diagnostics] log file ${logFile} tail:\n${tail(content, 250)}`);
}
};

const shutdownRes = await fetch(`${cleanupKuboApiUrl}/shutdown`, { method: "POST" });
expect(shutdownRes.status).toBe(200);

// Setup precondition (not the assertion under test): after /shutdown the daemon must
// auto-restart kubo before we can verify it dies on SIGTERM. The detect window must absorb
// the injected PKC_CLI_TEST_IPFS_READY_DELAY_MS (5s) plus restart under CI contention —
// test files run in parallel forks (fileParallelism) and a 2-vCPU ubuntu runner spawns many
// kubo nodes concurrently, so restart that lands in ~13s locally was observed taking >20s on
// CI (the old window). Widened to 45s; the test timeout was raised to 120s to match. If it
// still fails, dumpDiagnostics() prints the daemon's kubo restart timeline (issue #77).
const kuboRestarted = await waitForCondition(async () => {
try {
const res = await fetch(`${cleanupKuboApiUrl}/bitswap/stat`, { method: "POST" });
return res.ok;
} catch {
return false;
}
}, 20000, 500);
}, 45000, 500);
if (!kuboRestarted) await dumpDiagnostics(`kubo did not come back up on ${cleanupKuboApiUrl} within the restart window`);
expect(kuboRestarted).toBe(true);

const killed = daemonProcess.kill();
Expand All @@ -433,21 +458,7 @@ describe("bitsocial daemon kubo restart cleanup", async () => {
return true;
}
}, 10000, 500);
if (!kuboStoppedAfterKill) {
// Diagnostics for the flaky-on-CI failure (issue #70): the daemon's DEBUG output is
// redirected to its log files (not stderr), so dump those — they show which kubo
// pids were spawned, restarted and killed. Without this the CI log only contains
// the bare assertion.
const tail = (text: string | undefined, lines: number) => (text ?? "").split("\n").slice(-lines).join("\n");
console.log(`[restart-cleanup diagnostics] kubo still responding on ${cleanupKuboApiUrl} after daemon exit`);
console.log(`[restart-cleanup diagnostics] daemon exitCode=${daemonProcess.exitCode} signalCode=${daemonProcess.signalCode}`);
console.log(`[restart-cleanup diagnostics] daemon stdout tail:\n${tail(daemonProcess.capturedStdout, 40)}`);
console.log(`[restart-cleanup diagnostics] daemon stderr tail:\n${tail(daemonProcess.capturedStderr, 60)}`);
for (const logFile of await fsPromise.readdir(cleanupLogDir).catch(() => [] as string[])) {
const content = await fsPromise.readFile(path.join(cleanupLogDir, logFile), "utf-8").catch(() => "");
console.log(`[restart-cleanup diagnostics] log file ${logFile} tail:\n${tail(content, 250)}`);
}
}
if (!kuboStoppedAfterKill) await dumpDiagnostics(`kubo still responding on ${cleanupKuboApiUrl} after daemon exit`);
expect(kuboStoppedAfterKill).toBe(true);
} finally {
if (daemonProcess) await stopPkcDaemon(daemonProcess);
Expand Down
182 changes: 105 additions & 77 deletions test/cli/logs.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,71 @@ const runBitsocialLogs = (args: string[], logDir: string): Promise<{ stdout: str
const buildLogLine = (date: Date, message: string, stream?: "stdout" | "stderr") =>
stream ? `[${date.toISOString()}] [${stream}] ${message}` : `[${date.toISOString()}] ${message}`;

// Drive a `bitsocial logs -f ...` process by observing its output. `onChunk` runs on every stdout
// chunk (use it to trigger file writes once a marker appears). The process is SIGINT-killed as soon
// as `doneWhen(stdout, stderr)` returns true — plus an optional `graceMs` afterwards so any late or
// erroneously-unfiltered output can still surface — or after `maxMs` as a safety cap. Anchoring the
// kill to observed output rather than a fixed wall-clock window from spawn keeps these tests robust
// on slow/loaded CI runners, where CLI cold-start alone can consume several seconds and a fixed
// window would kill the process before the awaited line is ever emitted (issue #77).
const runFollowUntil = (
args: string[],
logDir: string,
opts: {
onChunk?: (stdout: string) => void;
doneWhen: (stdout: string, stderr: string) => boolean;
graceMs?: number;
maxMs?: number;
}
): Promise<{ stdout: string; stderr: string; timedOut: boolean }> => {
const { onChunk, doneWhen, graceMs = 0, maxMs = 25000 } = opts;
return new Promise((resolve, reject) => {
const proc = spawn("node", ["./bin/run", "logs", "--logPath", logDir, ...args], {
stdio: ["pipe", "pipe", "pipe"]
});

let stdout = "";
let stderr = "";
let killed = false;
let finishing = false;
let timedOut = false;
const kill = () => {
if (killed) return;
killed = true;
proc.kill("SIGINT");
};
const safety = setTimeout(() => {
timedOut = true;
kill();
}, maxMs);
const maybeFinish = () => {
if (finishing || killed) return;
if (doneWhen(stdout, stderr)) {
finishing = true;
if (graceMs > 0) setTimeout(kill, graceMs);
else kill();
}
};
proc.stdout.on("data", (data: Buffer) => {
stdout += data.toString();
onChunk?.(stdout);
maybeFinish();
});
proc.stderr.on("data", (data: Buffer) => {
stderr += data.toString();
maybeFinish();
});
proc.on("close", () => {
clearTimeout(safety);
resolve({ stdout, stderr, timedOut });
});
proc.on("error", (err) => {
clearTimeout(safety);
reject(err);
});
});
};

describe("bitsocial logs (synthetic log file tests)", () => {
let logDir: string;
let logFile: string;
Expand Down Expand Up @@ -338,38 +403,17 @@ describe("bitsocial logs -f log file rotation (synthetic)", () => {
const file1 = path.join(logDir, "bitsocial_cli_daemon_2026-03-01T00-00-00.000Z.log");
await fsPromise.writeFile(file1, buildLogLine(new Date("2026-03-01T00:00:00.000Z"), "INITIAL_MARKER") + "\n");

const result = await new Promise<{ stdout: string; stderr: string }>((resolve, reject) => {
const proc = spawn("node", ["./bin/run", "logs", "--logPath", logDir, "-f"], {
stdio: ["pipe", "pipe", "pipe"]
});

let stdout = "";
let stderr = "";
let createdNewFile = false;
proc.stdout.on("data", (data: Buffer) => {
stdout += data.toString();
// Wait for initial output before creating the new file
let createdNewFile = false;
const result = await runFollowUntil(["-f"], logDir, {
// Wait for initial output before creating the new file
onChunk: (stdout) => {
if (!createdNewFile && stdout.includes("INITIAL_MARKER")) {
createdNewFile = true;
const file2 = path.join(logDir, "bitsocial_cli_daemon_2026-03-01T01-00-00.000Z.log");
fsPromise.writeFile(file2, buildLogLine(new Date("2026-03-01T01:00:00.000Z"), "NEW_FILE_MARKER") + "\n");
}
});
proc.stderr.on("data", (data: Buffer) => { stderr += data.toString(); });

// Wait long enough for the 3-second new-file check to fire, then kill
const timer = setTimeout(() => {
proc.kill("SIGINT");
}, 8000);

proc.on("close", () => {
clearTimeout(timer);
resolve({ stdout, stderr });
});
proc.on("error", (err) => {
clearTimeout(timer);
reject(err);
});
},
doneWhen: (stdout, stderr) => stdout.includes("NEW_FILE_MARKER") && stderr.includes("switched to new log file")
});

expect(result.stdout).toContain("INITIAL_MARKER");
Expand All @@ -382,16 +426,9 @@ describe("bitsocial logs -f log file rotation (synthetic)", () => {
const file1 = path.join(logDir, "bitsocial_cli_daemon_2026-04-01T00-00-00.000Z.log");
await fsPromise.writeFile(file1, buildLogLine(new Date("2026-04-01T00:00:00.000Z"), "initial stdout", "stdout") + "\n");

const result = await new Promise<{ stdout: string; stderr: string }>((resolve, reject) => {
const proc = spawn("node", ["./bin/run", "logs", "--logPath", logDir, "--stdout", "-f"], {
stdio: ["pipe", "pipe", "pipe"]
});

let stdout = "";
let stderr = "";
let createdNewFile = false;
proc.stdout.on("data", (data: Buffer) => {
stdout += data.toString();
let createdNewFile = false;
const result = await runFollowUntil(["--stdout", "-f"], logDir, {
onChunk: (stdout) => {
if (!createdNewFile && stdout.includes("initial stdout")) {
createdNewFile = true;
const file2 = path.join(logDir, "bitsocial_cli_daemon_2026-04-01T01-00-00.000Z.log");
Expand All @@ -401,21 +438,11 @@ describe("bitsocial logs -f log file rotation (synthetic)", () => {
].join("\n") + "\n";
fsPromise.writeFile(file2, content);
}
});
proc.stderr.on("data", (data: Buffer) => { stderr += data.toString(); });

const timer = setTimeout(() => {
proc.kill("SIGINT");
}, 8000);

proc.on("close", () => {
clearTimeout(timer);
resolve({ stdout, stderr });
});
proc.on("error", (err) => {
clearTimeout(timer);
reject(err);
});
},
doneWhen: (stdout) => stdout.includes("new stdout msg"),
// Both lines are written together; grace period lets an erroneously-unfiltered
// stderr line surface before we kill, so the negative assertion stays meaningful.
graceMs: 1000
});

expect(result.stdout).toContain("initial stdout");
Expand All @@ -428,36 +455,37 @@ describe("bitsocial logs -f log file rotation (synthetic)", () => {
const file1 = path.join(logDir, "bitsocial_cli_daemon_2026-05-01T00-00-00.000Z.log");
await fsPromise.writeFile(file1, buildLogLine(new Date("2026-05-01T00:00:00.000Z"), "initial line") + "\n");

const result = await new Promise<{ stdout: string }>((resolve, reject) => {
const proc = spawn("node", ["./bin/run", "logs", "--logPath", logDir, "-f"], {
stdio: ["pipe", "pipe", "pipe"]
});

let stdout = "";
let appended = false;
proc.stdout.on("data", (data: Buffer) => {
stdout += data.toString();
// Only append after initial line has been observed, so CLI startup time doesn't matter
let appended = false;
let appendError: unknown;
const result = await runFollowUntil(["-f"], logDir, {
// Only append after initial line has been observed, so CLI startup time doesn't matter
onChunk: (stdout) => {
if (!appended && stdout.includes("initial line")) {
appended = true;
fsPromise.appendFile(file1, buildLogLine(new Date("2026-05-01T00:01:00.000Z"), "APPENDED_LINE") + "\n");
fsPromise
.appendFile(file1, buildLogLine(new Date("2026-05-01T00:01:00.000Z"), "APPENDED_LINE") + "\n")
.catch((err) => {
appendError = err;
});
}
});

const timer = setTimeout(() => {
proc.kill("SIGINT");
}, 8000);

proc.on("close", () => {
clearTimeout(timer);
resolve({ stdout });
});
proc.on("error", (err) => {
clearTimeout(timer);
reject(err);
});
},
doneWhen: (stdout) => stdout.includes("APPENDED_LINE")
});

if (!result.stdout.includes("APPENDED_LINE")) {
// Diagnostics for the flaky-on-CI failure (issue #77): the test waited the full safety cap
// without ever seeing APPENDED_LINE. Distinguish the three possible causes so the next CI
// run is conclusive instead of a guess:
// - append never reached disk -> appendTriggered/appendError + on-disk content show it
// - logs -f never detected it -> on-disk has APPENDED_LINE but captured stdout does not
// - output truncated on exit -> timedOut true and the line is on disk but not captured
const onDisk = await fsPromise.readFile(file1, "utf-8").catch((err) => `<read failed: ${err}>`);
console.log(`[follow-append diagnostics] timedOut=${result.timedOut} appendTriggered=${appended} appendError=${String(appendError)}`);
console.log(`[follow-append diagnostics] file1 on disk (${file1}):\n${onDisk}`);
console.log(`[follow-append diagnostics] captured stdout (${result.stdout.length} bytes):\n${JSON.stringify(result.stdout)}`);
console.log(`[follow-append diagnostics] captured stderr:\n${JSON.stringify(result.stderr)}`);
}

expect(result.stdout).toContain("initial line");
expect(result.stdout).toContain("APPENDED_LINE");
});
Expand Down
Loading