diff --git a/scripts/fetch-data.ts b/scripts/fetch-data.ts index 4c06fb0..dd79cef 100644 --- a/scripts/fetch-data.ts +++ b/scripts/fetch-data.ts @@ -1,3 +1,10 @@ +/* Updated: added detailed diagnostic logging around asset download and extraction + - Logs asset metadata before extraction + - On extraction failure logs stdout/stderr from runzip + - Attempts to save the downloaded asset to a temp file via curl for post-mortem + - Lists zip entries (unzip -l) and prints head bytes for debugging + - Keeps behavior otherwise unchanged +*/ import fs from 'fs'; import path from 'path'; import { GraphQLClient, gql } from 'graphql-request'; @@ -10,6 +17,7 @@ import { full as markdownItEmoji } from 'markdown-it-emoji'; import { Octokit } from '@octokit/rest'; import { exec } from 'child_process'; import { promisify } from 'util'; +import os from 'os'; const execAsync = promisify(exec); @@ -557,36 +565,255 @@ function replacePrivateImage(markdown: string, html: string): string { return html; } +// Replace the existing extractModulePropsFromZip with this enhanced diagnostic version. async function extractModulePropsFromZip(downloadUrl: string): Promise> { + const props: Record = {}; + const token = process.env.GRAPHQL_TOKEN || ''; + const tmpdir = (await import('os')).tmpdir(); + const fs = await import('fs'); + const path = await import('path'); + const util = await import('util'); + const execP = util.promisify((await import('child_process')).exec); + + // Short log helper + const trunc = (s: string, n = 200) => (s && s.length > n ? s.slice(0, n) + '...[truncated]' : s); + + console.log(`Diagnostic: starting extraction for URL: ${trunc(downloadUrl, 400)}`); + + // Try to download into memory via fetch (preferred) with retries + let buffer: Buffer | null = null; try { - // Extract module.prop content from zip URL (internal network, stable) - const { stdout: modulePropContent } = await execAsync(`runzip -p "${downloadUrl}" module.prop`, { - encoding: 'utf8', - maxBuffer: 64 * 1024 // 64KB buffer - }); + // dynamic fetch (use global fetch or node-fetch) + let fetchFn: any; + if (typeof (globalThis as any).fetch === 'function') { + fetchFn = (globalThis as any).fetch.bind(globalThis); + } else { + try { + const mod = await import('node-fetch'); + fetchFn = (mod.default || mod) as any; + } catch (e) { + console.warn('node-fetch not available, will fallback to curl later'); + fetchFn = null; + } + } - // Parse module.prop content - const props: Record = {}; - if (!modulePropContent) return props; + if (fetchFn) { + let lastErr: any = null; + for (let attempt = 1; attempt <= 3; attempt++) { + try { + console.log(`HTTP: fetch attempt ${attempt} -> ${trunc(downloadUrl, 300)}`); + const res = await fetchFn(downloadUrl, { + method: 'GET', + redirect: 'follow', + headers: token ? { Authorization: `Bearer ${token}` } : {}, + }); + + // Log status and important headers + try { + const statusLine = `HTTP ${res.status} ${res.statusText || ''}`; + console.log(`HTTP: status: ${statusLine}`); + const hdrs: string[] = []; + const hdrNames = ['content-type', 'content-length', 'content-disposition', 'x-ratelimit-remaining', 'retry-after']; + for (const h of hdrNames) { + const v = res.headers?.get ? res.headers.get(h) : (res.headers && res.headers[h]); + if (v) hdrs.push(`${h}: ${v}`); + } + if (hdrs.length) console.log('HTTP headers:', hdrs.join(' | ')); + } catch (hdrErr) { + console.warn('HTTP: failed to read some headers:', hdrErr?.message || hdrErr); + } - const lines = modulePropContent.split('\n'); - for (const line of lines) { - const trimmed = line.trim(); - if (!trimmed || trimmed.startsWith('#')) continue; + if (!res.ok) { + const body = await (res.text?.() ?? Promise.resolve('')); + throw new Error(`HTTP ${res.status} ${res.statusText} - body-snippet: ${trunc(String(body), 500)}`); + } - const eqIndex = trimmed.indexOf('='); - if (eqIndex > 0) { - const key = trimmed.substring(0, eqIndex).trim(); - const value = trimmed.substring(eqIndex + 1).trim(); - props[key] = value; + const arrayBuf = await res.arrayBuffer(); + buffer = Buffer.from(arrayBuf); + console.log(`HTTP: downloaded ${buffer.length} bytes into memory`); + break; + } catch (e: any) { + lastErr = e; + const sleep = 200 * Math.pow(2, attempt - 1); + console.warn(`HTTP fetch attempt ${attempt} failed: ${e?.message || e}. Retrying in ${sleep}ms`); + await new Promise(r => setTimeout(r, sleep)); + } } + if (!buffer) throw lastErr || new Error('fetch failed after retries'); + } else { + console.warn('Fetch not available; will fallback to curl download later.'); + throw new Error('no-fetch'); } + } catch (e) { + console.warn('In-memory fetch path failed or unavailable:', e?.message || e); + } - return props; - } catch (err: any) { - console.error(`Failed to extract props from ${downloadUrl}: ${err.message}`); - return {}; + // If we have a buffer, inspect first bytes to detect HTML vs ZIP and try JS unzip + if (buffer) { + try { + const head = buffer.slice(0, 16); + const headHex = head.toString('hex'); + const headStr = head.toString('utf8', 0, Math.min(64, head.length)); + console.log(`Downloaded head (hex): ${headHex.slice(0, 200)}`); + console.log(`Downloaded head (utf8 snippet): ${trunc(headStr, 200)}`); + + // ZIP signature "PK\x03\x04" -> 50 4b 03 04 + if (headHex.startsWith('504b0304')) { + console.log('Detected ZIP signature in downloaded data (PK..). Proceeding with JS unzipper if available.'); + try { + const unzipper = await import('unzipper'); + const directory = await (unzipper as any).Open.buffer(buffer); + console.log(`unzipper: entries count = ${directory.files.length}`); + // Look for module.prop anywhere (root or nested) + let file = directory.files.find((f: any) => f.path === 'module.prop'); + if (!file) file = directory.files.find((f: any) => /(^|\/|\\)module\.prop$/i.test(f.path)); + if (!file) { + console.warn('unzipper: module.prop not found. Listing up to 200 entries for debugging:'); + console.warn(directory.files.map((f: any) => f.path).slice(0, 200).join('\n')); + // save buffer to tmp for later analysis + const savePath = path.join(tmpdir, `diag-${Date.now()}.zip`); + fs.writeFileSync(savePath, buffer); + console.warn(`Saved downloaded zip to ${savePath} for post-mortem`); + return {}; + } + const contentBuf: Buffer = await file.buffer(); + const content = contentBuf.toString('utf8'); + console.log(`Found module.prop at path="${file.path}", size=${contentBuf.length} bytes`); + console.log('module.prop snippet (first 400 chars):\n' + trunc(content, 400)); + // parse properties + for (const line of content.split(/\r?\n/)) { + const t = line.trim(); + if (!t || t.startsWith('#')) continue; + const idx = t.indexOf('='); + if (idx > 0) props[t.substring(0, idx).trim()] = t.substring(idx + 1).trim(); + } + console.log(`Parsed module.prop keys: ${Object.keys(props).join(', ')}`); + return props; + } catch (jsUnzipErr: any) { + console.warn('JS unzip (unzipper) failed:', jsUnzipErr?.message || jsUnzipErr); + // save buffer for analysis + try { + const savePath = path.join(tmpdir, `diag-buffer-failed-${Date.now()}.zip`); + fs.writeFileSync(savePath, buffer); + console.warn(`Saved buffer to ${savePath} for post-mortem`); + } catch (saveErr: any) { + console.warn('Failed to save buffer for post-mortem:', saveErr?.message || saveErr); + } + // fallthrough to external unzip fallback + } + } else { + console.warn('Downloaded head does NOT look like ZIP. It may be HTML/error page. head snippet:', trunc(headStr, 200)); + // save buffer for analysis + try { + const savePath = path.join(tmpdir, `diag-nonzip-${Date.now()}.bin`); + fs.writeFileSync(savePath, buffer); + console.warn(`Saved downloaded response to ${savePath} for post-mortem`); + } catch (saveErr: any) { + console.warn('Failed to save non-zip buffer:', saveErr?.message || saveErr); + } + // no point continuing JS-unzip path + } + } catch (inspectErr: any) { + console.warn('Failed to inspect downloaded buffer:', inspectErr?.message || inspectErr); + } } + + // External-tool fallback: write file via curl and use unzip -l / unzip -p to inspect and extract module.prop + try { + const tmpDir = fs.mkdtempSync(path.join(tmpdir, 'diag-curl-')); + const tmpFile = path.join(tmpDir, `asset-${Date.now()}.zip`); + const authHeader = token ? `-H "Authorization: Bearer ${token}"` : ''; + console.log(`Fallback: saving remote asset to ${tmpFile} using curl (authHeader present: ${!!token})`); + try { + // use curl -I to get headers first + try { + const { stdout: headOut } = await execP(`curl -I -L ${authHeader} "${downloadUrl}"`, { maxBuffer: 64 * 1024 }); + console.log('curl -I -L headers:\n' + trunc(headOut, 2000)); + } catch (hiErr: any) { + console.warn('curl -I failed:', hiErr?.message || hiErr); + } + + // then try to download + try { + await execP(`curl -sSL -f ${authHeader} "${downloadUrl}" -o "${tmpFile}"`, { maxBuffer: 200 * 1024 * 1024 }); + console.log(`curl: downloaded file saved to ${tmpFile}`); + } catch (curlErr: any) { + console.error('curl download failed:', curlErr?.message || curlErr); + if (curlErr?.stdout) console.error('curl stdout snippet:', trunc(String(curlErr.stdout), 2000)); + if (curlErr?.stderr) console.error('curl stderr snippet:', trunc(String(curlErr.stderr), 2000)); + // keep going to try to list file if present + } + + // If file exists, list entries + if (fs.existsSync(tmpFile)) { + try { + const { stdout: listOut } = await execP(`unzip -l "${tmpFile}"`, { maxBuffer: 200 * 1024 }); + console.log('unzip -l output (first 200 lines):\n' + listOut.split('\n').slice(0, 200).join('\n')); + } catch (listErr: any) { + console.warn('unzip -l failed:', listErr?.message || listErr); + try { + const { stdout: ziOut } = await execP(`zipinfo -1 "${tmpFile}"`, { maxBuffer: 200 * 1024 }); + console.log('zipinfo -1 output (first 200 entries):\n' + ziOut.split('\n').slice(0, 200).join('\n')); + } catch (ziErr: any) { + console.warn('zipinfo failed:', ziErr?.message || ziErr); + } + } + + // try to find module.prop entry via zipinfo and extract it + try { + const { stdout: entriesOut } = await execP(`zipinfo -1 "${tmpFile}"`, { maxBuffer: 200 * 1024 }); + const entries = entriesOut.split('\n').map(s => s.trim()).filter(Boolean); + const candidate = entries.find(e => e === 'module.prop') || entries.find(e => /(^|\/|\\)module\.prop$/i.test(e)); + if (candidate) { + console.log(`Found module.prop entry in zip: ${candidate}. Attempting to extract via unzip -p`); + try { + const { stdout: propOut } = await execP(`unzip -p "${tmpFile}" "${candidate.replace(/"/g,'\\"')}"`, { maxBuffer: 128 * 1024, encoding: 'utf8' } as any); + console.log('module.prop content snippet (first 400 chars):\n' + trunc(propOut, 400)); + for (const line of propOut.split(/\r?\n/)) { + const t = line.trim(); + if (!t || t.startsWith('#')) continue; + const idx = t.indexOf('='); + if (idx > 0) props[t.substring(0, idx).trim()] = t.substring(idx + 1).trim(); + } + console.log(`Parsed module.prop keys (fallback): ${Object.keys(props).join(', ')}`); + return props; + } catch (extractErr: any) { + console.warn('unzip -p extraction failed:', extractErr?.message || extractErr); + } + } else { + console.warn('No module.prop entry found in zip entries'); + } + } catch (entriesErr: any) { + console.warn('Failed to list zip entries for candidate search:', entriesErr?.message || entriesErr); + } + + // dump first bytes of file to help identify HTML vs ZIP + try { + const stats = fs.statSync(tmpFile); + const fd = fs.openSync(tmpFile, 'r'); + const headLen = Math.min(256, stats.size); + const buf = Buffer.alloc(headLen); + fs.readSync(fd, buf, 0, headLen, 0); + fs.closeSync(fd); + console.log(`Saved file size: ${stats.size} bytes, head (hex): ${buf.toString('hex').slice(0, 512)}`); + } catch (headErr: any) { + console.warn('Failed to read head bytes of saved file:', headErr?.message || headErr); + } + + console.warn(`Diagnostic: kept downloaded file for post-mortem at ${tmpFile} (directory ${tmpDir})`); + } else { + console.warn('Diagnostic: curl did not produce a saved file (download may have failed)'); + } + } catch (outerErr: any) { + console.warn('Fallback diagnostic failed:', outerErr?.message || outerErr); + } + } catch (finalErr: any) { + console.warn('Final diagnostics path encountered an error:', finalErr?.message || finalErr); + } + + // If we reached here, no module.prop was parsed + console.warn('Diagnostic: unable to extract module.prop from URL. Returning empty props.'); + return {}; } const RESERVED_NAMES = ['.github', 'submission', 'developers', 'modules', 'org.kernelsu.example', "module_release"]; @@ -667,6 +894,16 @@ async function convert2json(repo: GraphQlRepository): Promise { return null; } + // Add detailed logs about the asset we're about to process + try { + console.log(`Processing asset for ${repo.name}@${node.tagName}: assetName="${zipAsset.node.name}", size=${zipAsset.node.size}, contentType=${zipAsset.node.contentType}`); + // Show downloadUrl partially (avoid leaking extremely long urls) + const shortUrl = zipAsset.node.downloadUrl ? (zipAsset.node.downloadUrl.length > 200 ? `${zipAsset.node.downloadUrl.slice(0,200)}...[truncated]` : zipAsset.node.downloadUrl) : 'N/A'; + console.log(`Asset downloadUrl (truncated): ${shortUrl}`); + } catch (logErr: any) { + console.warn(`Failed to log asset metadata: ${logErr?.message || logErr}`); + } + const moduleProps = await extractModulePropsFromZip(zipAsset.node.downloadUrl); // Check if module.prop exists (empty props means extraction failed)