diff --git a/home/dot_local/bin/executable_commit-helper b/home/dot_local/bin/executable_commit-helper index 894ccdf..ec6e9d9 100644 --- a/home/dot_local/bin/executable_commit-helper +++ b/home/dot_local/bin/executable_commit-helper @@ -26,15 +26,26 @@ const IGNORE_PATTERNS = [ /\.min\.(js|css)$/, /\.bundle\.(js|css)$/, /dist\/.*\.map$/, - /\.svg$/, // Often generated or binary-like ]; // Binary/large file extensions to skip in diffs const BINARY_EXTENSIONS = [ - '.png', '.jpg', '.jpeg', '.gif', '.ico', '.webp', - '.pdf', '.zip', '.tar', '.gz', '.woff', '.woff2', '.ttf', '.eot', - '.mp4', '.mp3', '.wav', '.avi', '.mov', - '.so', '.dylib', '.dll', '.exe', + '.png', '.jpg', '.jpeg', '.gif', '.ico', '.webp', '.avif', '.bmp', + '.svg', // Often base64-encoded or huge + '.pdf', '.zip', '.tar', '.gz', '.woff', '.woff2', '.ttf', '.eot', '.otf', + '.mp4', '.mp3', '.wav', '.avi', '.mov', '.webm', + '.so', '.dylib', '.dll', '.exe', '.bin', + '.wasm', '.pyc', '.class', + '.db', '.sqlite', '.sqlite3', + '.lockb', // Bun binary lockfile +]; + +// Lockfiles - always truncate to first 100 lines +const LOCKFILE_PATTERNS = [ + /\.lock$/, + /lock\.(json|yaml)$/, + /^(package|pnpm|yarn|bun|composer|Cargo|Gemfile|Pipfile|poetry)[-.]lock/, + /^go\.sum$/, ]; interface ChangeStats { @@ -72,6 +83,84 @@ function parseNumstat(numstat: string): FileChange[] { }); } +/** + * Check if content is predominantly base64-encoded + */ +function hasBase64Pattern(content: string): boolean { + const base64Chunks = content.match(/[A-Za-z0-9+/=]{100,}/g) || []; + const base64Length = base64Chunks.reduce((sum, chunk) => sum + chunk.length, 0); + return base64Length > content.length * 0.3; // >30% base64 +} + +/** + * Determine if a file should be aggressively truncated + */ +function shouldCullAggressively(file: string, content: string): boolean { + const lines = content.split('\n'); + const avgLineLength = content.length / Math.max(lines.length, 1); + + return ( + // Known lockfiles + LOCKFILE_PATTERNS.some(p => p.test(file)) || + + // Extremely long average line length (minified/generated) + avgLineLength > 200 || + + // Any single line over 5000 chars + lines.some(line => line.length > 5000) || + + // Predominantly base64 content + hasBase64Pattern(content) || + + // Known generated patterns + /\.generated\./i.test(file) || + /\.min\./i.test(file) || + /\.bundle\./i.test(file) || + content.includes('/* @generated */') || + content.includes('// Auto-generated') || + content.includes('@autogenerated') + ); +} + +/** + * Check if file is binary based on extension + */ +function isBinaryFile(path: string): boolean { + return BINARY_EXTENSIONS.some(ext => path.toLowerCase().endsWith(ext)); +} + +/** + * Format file size in human-readable format + */ +function formatBytes(bytes: number): string { + if (bytes < 1024) return `${bytes} B`; + if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; + return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; +} + +/** + * Count lines in text + */ +function countLines(text: string): number { + return text.split('\n').length; +} + +/** + * Truncate text to maximum number of lines + */ +function truncateToLines(text: string, maxLines: number): string { + const lines = text.split('\n'); + if (lines.length <= maxLines) return text; + return lines.slice(0, maxLines).join('\n'); +} + +/** + * Format binary file summary for diff + */ +function formatBinarySummary(filePath: string): string { + return `Binary file: ${filePath}\n(content omitted)`; +} + /** * Get overall change statistics */ @@ -139,16 +228,11 @@ function getFileSummary(files: FileChange[]): string { * Get filtered diff output (excluding ignored files) */ async function getFilteredDiff(staged: boolean): Promise { - const command = staged - ? 'git diff --staged' - : 'git diff HEAD~1..HEAD'; - // Get list of files to exclude - const numstatCmd = staged - ? 'git diff --staged --numstat' - : 'git diff HEAD~1..HEAD --numstat'; + const numstat = staged + ? await $`git diff --staged --numstat`.text() + : await $`git diff HEAD~1..HEAD --numstat`.text(); - const numstat = await $`sh -c ${numstatCmd}`.text(); const files = parseNumstat(numstat); const filesToExclude = files .filter(f => f.shouldIgnore || f.isBinary) @@ -156,23 +240,37 @@ async function getFilteredDiff(staged: boolean): Promise { // Build diff command with exclusions if (filesToExclude.length === 0) { - return await $`sh -c ${command}`.text(); + return staged + ? await $`git diff --staged`.text() + : await $`git diff HEAD~1..HEAD`.text(); } - // Git diff with pathspec exclusions - const excludeArgs = filesToExclude.map(f => `:(exclude)${f}`).join(' '); - const fullCommand = `${command} -- . ${excludeArgs}`; - + // Git diff with pathspec exclusions - construct as array to avoid shell quoting issues try { - return await $`sh -c ${fullCommand}`.text(); + const baseArgs = staged ? ['diff', '--staged'] : ['diff', 'HEAD~1..HEAD']; + const args = [...baseArgs, '--', '.', ...filesToExclude.map(f => `:(exclude)${f}`)]; + + // Use Bun.spawn to call git with proper argument handling + const proc = Bun.spawn(['git', ...args], { + cwd: process.cwd(), + stdout: 'pipe', + stderr: 'pipe', + }); + + const output = await new Response(proc.stdout).text(); + await proc.exited; + + return output; } catch { - // If exclusion fails, just return full diff - return await $`sh -c ${command}`.text(); + // If exclusion fails, return diff without exclusions + return staged + ? await $`git diff --staged`.text() + : await $`git diff HEAD~1..HEAD`.text(); } } /** - * Truncate diff to fit within line budget + * Truncate diff with per-file character/line limits and smart culling */ function truncateDiff(diff: string, maxLines: number, filesInfo: string): string { const lines = diff.split('\n'); @@ -181,9 +279,14 @@ function truncateDiff(diff: string, maxLines: number, filesInfo: string): string return diff; } - // Try to include complete file diffs rather than cutting mid-file - const fileDiffs: Array<{ header: string; content: string; lineCount: number }> = []; - let currentFile: { header: string; lines: string[] } | null = null; + // Parse into individual file diffs + const fileDiffs: Array<{ + header: string; + content: string; + lineCount: number; + path: string; + }> = []; + let currentFile: { header: string; lines: string[]; path: string } | null = null; for (const line of lines) { if (line.startsWith('diff --git')) { @@ -192,9 +295,13 @@ function truncateDiff(diff: string, maxLines: number, filesInfo: string): string header: currentFile.header, content: currentFile.lines.join('\n'), lineCount: currentFile.lines.length, + path: currentFile.path, }); } - currentFile = { header: line, lines: [line] }; + // Extract file path from "diff --git a/path b/path" + const match = line.match(/diff --git a\/(.*?) b\//); + const path = match ? match[1] : 'unknown'; + currentFile = { header: line, lines: [line], path }; } else if (currentFile) { currentFile.lines.push(line); } @@ -205,25 +312,63 @@ function truncateDiff(diff: string, maxLines: number, filesInfo: string): string header: currentFile.header, content: currentFile.lines.join('\n'), lineCount: currentFile.lines.length, + path: currentFile.path, }); } - // Include files until we hit the limit - let includedLines = 0; + // Process each file with per-file limits and smart culling + let totalLines = 0; const includedDiffs: string[] = []; - const omittedFiles: string[] = []; + const omittedFiles: Array<{file: string, reason: string}> = []; for (const fileDiff of fileDiffs) { - if (includedLines + fileDiff.lineCount <= maxLines - 10) { // Reserve space for summary - includedDiffs.push(fileDiff.content); - includedLines += fileDiff.lineCount; - } else { - // Extract filename from diff header - const match = fileDiff.header.match(/diff --git a\/(.*?) b\//); - if (match) { - omittedFiles.push(match[1]); + // Check if binary file + if (isBinaryFile(fileDiff.path)) { + const summary = formatBinarySummary(fileDiff.path); + includedDiffs.push(summary); + totalLines += countLines(summary); + continue; + } + + let content = fileDiff.content; + const fileLines = fileDiff.lineCount; + const fileChars = content.length; + let truncationNotice = ''; + + // Apply per-file safety limits + const CHAR_THRESHOLD = 10000; + const LINE_THRESHOLD = 1500; + + if (fileChars > CHAR_THRESHOLD || fileLines > LINE_THRESHOLD) { + // File exceeded threshold - check if it should be culled + if (shouldCullAggressively(fileDiff.path, content)) { + // Check if it's a lockfile (special handling) + if (LOCKFILE_PATTERNS.some(p => p.test(fileDiff.path))) { + content = truncateToLines(content, 100); + truncationNotice = `\n... (lockfile truncated - showing first 100 of ${fileLines} lines)`; + } else { + // Other noise - aggressive truncation + content = truncateToLines(content, 30); + truncationNotice = `\n... (generated/noisy file truncated - showing first 30 of ${fileLines} lines, ${formatBytes(fileChars)} total)`; + } + } else { + // Legitimate large file - more generous truncation + content = truncateToLines(content, 300); + truncationNotice = `\n... (large file truncated - showing first 300 of ${fileLines} lines, ${formatBytes(fileChars)} total)`; } } + + // Check if it fits in global budget + const contentLines = countLines(content); + if (totalLines + contentLines <= maxLines - 10) { // Reserve space for summary + includedDiffs.push(content + truncationNotice); + totalLines += contentLines; + } else { + omittedFiles.push({ + file: fileDiff.path, + reason: 'global line budget exceeded' + }); + } } let result = includedDiffs.join('\n\n'); @@ -231,7 +376,7 @@ function truncateDiff(diff: string, maxLines: number, filesInfo: string): string if (omittedFiles.length > 0) { result += '\n\n---\n'; result += `**Note:** ${omittedFiles.length} file(s) omitted due to output size limit:\n`; - result += omittedFiles.map(f => ` - ${f}`).join('\n'); + result += omittedFiles.map(f => ` - ${f.file} (${f.reason})`).join('\n'); result += '\n\n_Full changes visible in git status/stat output above._'; } @@ -239,7 +384,7 @@ function truncateDiff(diff: string, maxLines: number, filesInfo: string): string } /** - * Get preview of new files being added + * Get preview of new files being added (with per-file and total limits) */ async function getNewFilesPreviews(maxFiles: number = 5, maxLinesPerFile: number = 50): Promise { try { @@ -253,34 +398,60 @@ async function getNewFilesPreviews(maxFiles: number = 5, maxLinesPerFile: number const previews: string[] = []; const filesToShow = files.slice(0, maxFiles); + let totalChars = 0; + const MAX_TOTAL_CHARS = 30000; + const MAX_CHARS_PER_FILE = 10000; for (const file of filesToShow) { // Skip binary files - if (BINARY_EXTENSIONS.some(ext => file.endsWith(ext))) { + if (isBinaryFile(file)) { previews.push(`=== ${file} ===\n(binary file)`); continue; } try { const content = await Bun.file(file).text(); - const lines = content.split('\n').slice(0, maxLinesPerFile); - const truncated = lines.length < content.split('\n').length - ? `\n... (${content.split('\n').length - lines.length} more lines)` + + // Apply per-file char limit FIRST (prevents single-line disasters) + if (content.length > MAX_CHARS_PER_FILE) { + if (shouldCullAggressively(file, content)) { + previews.push(`=== ${file} ===\n(generated/noisy file - preview omitted)\nSize: ${formatBytes(content.length)}`); + } else { + const truncated = content.slice(0, MAX_CHARS_PER_FILE); + previews.push(`=== ${file} ===\n${truncated}\n... (truncated from ${formatBytes(content.length)})`); + } + continue; + } + + // Apply line limit + const lines = content.split('\n'); + const truncatedLines = lines.slice(0, maxLinesPerFile); + const truncated = truncatedLines.join('\n'); + const notice = lines.length > maxLinesPerFile + ? `\n... (${lines.length - maxLinesPerFile} more lines)` : ''; - previews.push(`=== ${file} ===\n${lines.join('\n')}${truncated}`); + const preview = `=== ${file} ===\n${truncated}${notice}`; + + // Check total budget + if (totalChars + preview.length > MAX_TOTAL_CHARS) { + const remaining = files.length - previews.length; + previews.push(`\n... (${remaining} more file(s) omitted - preview size limit reached)`); + break; + } + + previews.push(preview); + totalChars += preview.length; } catch { previews.push(`=== ${file} ===\n(unreadable)`); } } - let result = previews.join('\n\n'); - - if (files.length > maxFiles) { - result += `\n\n_... and ${files.length - maxFiles} more new file(s)_`; + if (files.length > maxFiles && previews[previews.length - 1]?.includes('omitted') === false) { + previews.push(`\n... (${files.length - maxFiles} more new file(s) not shown)`); } - return result; + return previews.join('\n\n'); } catch { return ''; } @@ -345,6 +516,15 @@ async function stagedContext(maxLines: number): Promise { output += '## Recent Commit Style\n```\n' + recentCommits.trim() + '\n```\n'; + // Final safety: ensure total output doesn't exceed safe limit + const MAX_TOTAL_OUTPUT = 150000; // 150K chars, leaves 50K headroom + if (output.length > MAX_TOTAL_OUTPUT) { + const lastNewline = output.slice(0, MAX_TOTAL_OUTPUT).lastIndexOf('\n'); + output = output.slice(0, lastNewline) + + '\n\n[OUTPUT TRUNCATED - Exceeds safe character limit for AI context]\n' + + `(Shown ${lastNewline.toLocaleString()} of ${output.length.toLocaleString()} chars)`; + } + return output; }