diff --git a/scripts/fetch-data.ts b/scripts/fetch-data.ts index dd79cef..34ea17c 100644 --- a/scripts/fetch-data.ts +++ b/scripts/fetch-data.ts @@ -565,255 +565,69 @@ function replacePrivateImage(markdown: string, html: string): string { return html; } -// Replace the existing extractModulePropsFromZip with this enhanced diagnostic version. +// Enhanced version with diagnostic logging that uses the reliable runzip tool async function extractModulePropsFromZip(downloadUrl: string): Promise> { const props: Record = {}; - const token = process.env.GRAPHQL_TOKEN || ''; - const tmpdir = (await import('os')).tmpdir(); - const fs = await import('fs'); - const path = await import('path'); - const util = await import('util'); - const execP = util.promisify((await import('child_process')).exec); - - // Short log helper - const trunc = (s: string, n = 200) => (s && s.length > n ? s.slice(0, n) + '...[truncated]' : s); - - console.log(`Diagnostic: starting extraction for URL: ${trunc(downloadUrl, 400)}`); - - // Try to download into memory via fetch (preferred) with retries - let buffer: Buffer | null = null; + const MAX_LOG_LENGTH = 500; // Maximum length for truncated log output + try { - // dynamic fetch (use global fetch or node-fetch) - let fetchFn: any; - if (typeof (globalThis as any).fetch === 'function') { - fetchFn = (globalThis as any).fetch.bind(globalThis); - } else { - try { - const mod = await import('node-fetch'); - fetchFn = (mod.default || mod) as any; - } catch (e) { - console.warn('node-fetch not available, will fallback to curl later'); - fetchFn = null; - } + console.log(`Extracting module.prop from: ${downloadUrl.substring(0, 100)}${downloadUrl.length > 100 ? '...' : ''}`); + + // Validate URL to prevent command injection - must be a GitHub URL + if (!downloadUrl.startsWith('https://github.com/') && !downloadUrl.startsWith('https://objects.githubusercontent.com/')) { + throw new Error('Invalid download URL: must be from github.com or githubusercontent.com'); } - - if (fetchFn) { - let lastErr: any = null; - for (let attempt = 1; attempt <= 3; attempt++) { - try { - console.log(`HTTP: fetch attempt ${attempt} -> ${trunc(downloadUrl, 300)}`); - const res = await fetchFn(downloadUrl, { - method: 'GET', - redirect: 'follow', - headers: token ? { Authorization: `Bearer ${token}` } : {}, - }); - - // Log status and important headers - try { - const statusLine = `HTTP ${res.status} ${res.statusText || ''}`; - console.log(`HTTP: status: ${statusLine}`); - const hdrs: string[] = []; - const hdrNames = ['content-type', 'content-length', 'content-disposition', 'x-ratelimit-remaining', 'retry-after']; - for (const h of hdrNames) { - const v = res.headers?.get ? res.headers.get(h) : (res.headers && res.headers[h]); - if (v) hdrs.push(`${h}: ${v}`); - } - if (hdrs.length) console.log('HTTP headers:', hdrs.join(' | ')); - } catch (hdrErr) { - console.warn('HTTP: failed to read some headers:', hdrErr?.message || hdrErr); - } - - if (!res.ok) { - const body = await (res.text?.() ?? Promise.resolve('')); - throw new Error(`HTTP ${res.status} ${res.statusText} - body-snippet: ${trunc(String(body), 500)}`); - } - - const arrayBuf = await res.arrayBuffer(); - buffer = Buffer.from(arrayBuf); - console.log(`HTTP: downloaded ${buffer.length} bytes into memory`); - break; - } catch (e: any) { - lastErr = e; - const sleep = 200 * Math.pow(2, attempt - 1); - console.warn(`HTTP fetch attempt ${attempt} failed: ${e?.message || e}. Retrying in ${sleep}ms`); - await new Promise(r => setTimeout(r, sleep)); - } - } - if (!buffer) throw lastErr || new Error('fetch failed after retries'); - } else { - console.warn('Fetch not available; will fallback to curl download later.'); - throw new Error('no-fetch'); + + // Comprehensive validation: only allow safe URL characters + // Whitelist approach: alphanumeric, /, :, ., -, _, ?, =, &, % + if (!/^[a-zA-Z0-9/:.\-_?=&%]+$/.test(downloadUrl)) { + throw new Error('Invalid download URL: contains disallowed characters'); } - } catch (e) { - console.warn('In-memory fetch path failed or unavailable:', e?.message || e); - } + + // Additional safety: escape the URL for shell by wrapping in single quotes and escaping any single quotes + const escapedUrl = downloadUrl.replace(/'/g, "'\\''"); + + // Use runzip which is installed in CI and works reliably + const { stdout: modulePropContent } = await execAsync(`runzip -p '${escapedUrl}' module.prop`, { + encoding: 'utf8', + maxBuffer: 128 * 1024 // Increased from 64KB to 128KB to reduce truncation + }); - // If we have a buffer, inspect first bytes to detect HTML vs ZIP and try JS unzip - if (buffer) { - try { - const head = buffer.slice(0, 16); - const headHex = head.toString('hex'); - const headStr = head.toString('utf8', 0, Math.min(64, head.length)); - console.log(`Downloaded head (hex): ${headHex.slice(0, 200)}`); - console.log(`Downloaded head (utf8 snippet): ${trunc(headStr, 200)}`); - - // ZIP signature "PK\x03\x04" -> 50 4b 03 04 - if (headHex.startsWith('504b0304')) { - console.log('Detected ZIP signature in downloaded data (PK..). Proceeding with JS unzipper if available.'); - try { - const unzipper = await import('unzipper'); - const directory = await (unzipper as any).Open.buffer(buffer); - console.log(`unzipper: entries count = ${directory.files.length}`); - // Look for module.prop anywhere (root or nested) - let file = directory.files.find((f: any) => f.path === 'module.prop'); - if (!file) file = directory.files.find((f: any) => /(^|\/|\\)module\.prop$/i.test(f.path)); - if (!file) { - console.warn('unzipper: module.prop not found. Listing up to 200 entries for debugging:'); - console.warn(directory.files.map((f: any) => f.path).slice(0, 200).join('\n')); - // save buffer to tmp for later analysis - const savePath = path.join(tmpdir, `diag-${Date.now()}.zip`); - fs.writeFileSync(savePath, buffer); - console.warn(`Saved downloaded zip to ${savePath} for post-mortem`); - return {}; - } - const contentBuf: Buffer = await file.buffer(); - const content = contentBuf.toString('utf8'); - console.log(`Found module.prop at path="${file.path}", size=${contentBuf.length} bytes`); - console.log('module.prop snippet (first 400 chars):\n' + trunc(content, 400)); - // parse properties - for (const line of content.split(/\r?\n/)) { - const t = line.trim(); - if (!t || t.startsWith('#')) continue; - const idx = t.indexOf('='); - if (idx > 0) props[t.substring(0, idx).trim()] = t.substring(idx + 1).trim(); - } - console.log(`Parsed module.prop keys: ${Object.keys(props).join(', ')}`); - return props; - } catch (jsUnzipErr: any) { - console.warn('JS unzip (unzipper) failed:', jsUnzipErr?.message || jsUnzipErr); - // save buffer for analysis - try { - const savePath = path.join(tmpdir, `diag-buffer-failed-${Date.now()}.zip`); - fs.writeFileSync(savePath, buffer); - console.warn(`Saved buffer to ${savePath} for post-mortem`); - } catch (saveErr: any) { - console.warn('Failed to save buffer for post-mortem:', saveErr?.message || saveErr); - } - // fallthrough to external unzip fallback - } - } else { - console.warn('Downloaded head does NOT look like ZIP. It may be HTML/error page. head snippet:', trunc(headStr, 200)); - // save buffer for analysis - try { - const savePath = path.join(tmpdir, `diag-nonzip-${Date.now()}.bin`); - fs.writeFileSync(savePath, buffer); - console.warn(`Saved downloaded response to ${savePath} for post-mortem`); - } catch (saveErr: any) { - console.warn('Failed to save non-zip buffer:', saveErr?.message || saveErr); - } - // no point continuing JS-unzip path - } - } catch (inspectErr: any) { - console.warn('Failed to inspect downloaded buffer:', inspectErr?.message || inspectErr); + if (!modulePropContent) { + console.warn('runzip returned empty content for module.prop'); + return props; } - } - - // External-tool fallback: write file via curl and use unzip -l / unzip -p to inspect and extract module.prop - try { - const tmpDir = fs.mkdtempSync(path.join(tmpdir, 'diag-curl-')); - const tmpFile = path.join(tmpDir, `asset-${Date.now()}.zip`); - const authHeader = token ? `-H "Authorization: Bearer ${token}"` : ''; - console.log(`Fallback: saving remote asset to ${tmpFile} using curl (authHeader present: ${!!token})`); - try { - // use curl -I to get headers first - try { - const { stdout: headOut } = await execP(`curl -I -L ${authHeader} "${downloadUrl}"`, { maxBuffer: 64 * 1024 }); - console.log('curl -I -L headers:\n' + trunc(headOut, 2000)); - } catch (hiErr: any) { - console.warn('curl -I failed:', hiErr?.message || hiErr); - } - - // then try to download - try { - await execP(`curl -sSL -f ${authHeader} "${downloadUrl}" -o "${tmpFile}"`, { maxBuffer: 200 * 1024 * 1024 }); - console.log(`curl: downloaded file saved to ${tmpFile}`); - } catch (curlErr: any) { - console.error('curl download failed:', curlErr?.message || curlErr); - if (curlErr?.stdout) console.error('curl stdout snippet:', trunc(String(curlErr.stdout), 2000)); - if (curlErr?.stderr) console.error('curl stderr snippet:', trunc(String(curlErr.stderr), 2000)); - // keep going to try to list file if present - } - - // If file exists, list entries - if (fs.existsSync(tmpFile)) { - try { - const { stdout: listOut } = await execP(`unzip -l "${tmpFile}"`, { maxBuffer: 200 * 1024 }); - console.log('unzip -l output (first 200 lines):\n' + listOut.split('\n').slice(0, 200).join('\n')); - } catch (listErr: any) { - console.warn('unzip -l failed:', listErr?.message || listErr); - try { - const { stdout: ziOut } = await execP(`zipinfo -1 "${tmpFile}"`, { maxBuffer: 200 * 1024 }); - console.log('zipinfo -1 output (first 200 entries):\n' + ziOut.split('\n').slice(0, 200).join('\n')); - } catch (ziErr: any) { - console.warn('zipinfo failed:', ziErr?.message || ziErr); - } - } - - // try to find module.prop entry via zipinfo and extract it - try { - const { stdout: entriesOut } = await execP(`zipinfo -1 "${tmpFile}"`, { maxBuffer: 200 * 1024 }); - const entries = entriesOut.split('\n').map(s => s.trim()).filter(Boolean); - const candidate = entries.find(e => e === 'module.prop') || entries.find(e => /(^|\/|\\)module\.prop$/i.test(e)); - if (candidate) { - console.log(`Found module.prop entry in zip: ${candidate}. Attempting to extract via unzip -p`); - try { - const { stdout: propOut } = await execP(`unzip -p "${tmpFile}" "${candidate.replace(/"/g,'\\"')}"`, { maxBuffer: 128 * 1024, encoding: 'utf8' } as any); - console.log('module.prop content snippet (first 400 chars):\n' + trunc(propOut, 400)); - for (const line of propOut.split(/\r?\n/)) { - const t = line.trim(); - if (!t || t.startsWith('#')) continue; - const idx = t.indexOf('='); - if (idx > 0) props[t.substring(0, idx).trim()] = t.substring(idx + 1).trim(); - } - console.log(`Parsed module.prop keys (fallback): ${Object.keys(props).join(', ')}`); - return props; - } catch (extractErr: any) { - console.warn('unzip -p extraction failed:', extractErr?.message || extractErr); - } - } else { - console.warn('No module.prop entry found in zip entries'); - } - } catch (entriesErr: any) { - console.warn('Failed to list zip entries for candidate search:', entriesErr?.message || entriesErr); - } - // dump first bytes of file to help identify HTML vs ZIP - try { - const stats = fs.statSync(tmpFile); - const fd = fs.openSync(tmpFile, 'r'); - const headLen = Math.min(256, stats.size); - const buf = Buffer.alloc(headLen); - fs.readSync(fd, buf, 0, headLen, 0); - fs.closeSync(fd); - console.log(`Saved file size: ${stats.size} bytes, head (hex): ${buf.toString('hex').slice(0, 512)}`); - } catch (headErr: any) { - console.warn('Failed to read head bytes of saved file:', headErr?.message || headErr); - } - - console.warn(`Diagnostic: kept downloaded file for post-mortem at ${tmpFile} (directory ${tmpDir})`); - } else { - console.warn('Diagnostic: curl did not produce a saved file (download may have failed)'); + console.log(`Successfully extracted module.prop (${modulePropContent.length} bytes)`); + + // Parse module.prop content + const lines = modulePropContent.split('\n'); + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith('#')) continue; + + const eqIndex = trimmed.indexOf('='); + if (eqIndex > 0) { + const key = trimmed.substring(0, eqIndex).trim(); + const value = trimmed.substring(eqIndex + 1).trim(); + props[key] = value; } - } catch (outerErr: any) { - console.warn('Fallback diagnostic failed:', outerErr?.message || outerErr); } - } catch (finalErr: any) { - console.warn('Final diagnostics path encountered an error:', finalErr?.message || finalErr); - } - // If we reached here, no module.prop was parsed - console.warn('Diagnostic: unable to extract module.prop from URL. Returning empty props.'); - return {}; + const propKeys = Object.keys(props); + console.log(`Parsed ${propKeys.length} properties from module.prop: ${propKeys.join(', ')}`); + return props; + + } catch (err: any) { + console.error(`Failed to extract props from ${downloadUrl}: ${err.message}`); + + // Log additional diagnostic info + if (err.code) console.error(`Error code: ${err.code}`); + if (err.stderr) console.error(`stderr: ${String(err.stderr).substring(0, MAX_LOG_LENGTH)}`); + if (err.stdout) console.error(`stdout: ${String(err.stdout).substring(0, MAX_LOG_LENGTH)}`); + + return {}; + } } const RESERVED_NAMES = ['.github', 'submission', 'developers', 'modules', 'org.kernelsu.example', "module_release"]; @@ -898,7 +712,7 @@ async function convert2json(repo: GraphQlRepository): Promise { try { console.log(`Processing asset for ${repo.name}@${node.tagName}: assetName="${zipAsset.node.name}", size=${zipAsset.node.size}, contentType=${zipAsset.node.contentType}`); // Show downloadUrl partially (avoid leaking extremely long urls) - const shortUrl = zipAsset.node.downloadUrl ? (zipAsset.node.downloadUrl.length > 200 ? `${zipAsset.node.downloadUrl.slice(0,200)}...[truncated]` : zipAsset.node.downloadUrl) : 'N/A'; + const shortUrl = zipAsset.node.downloadUrl ? (zipAsset.node.downloadUrl.length > 200 ? `${zipAsset.node.downloadUrl.slice(0, 200)}...[truncated]` : zipAsset.node.downloadUrl) : 'N/A'; console.log(`Asset downloadUrl (truncated): ${shortUrl}`); } catch (logErr: any) { console.warn(`Failed to log asset metadata: ${logErr?.message || logErr}`);