#!/usr/bin/env bun /** * commit-helper - Efficient git context gathering for AI-assisted commits * * Provides optimized git context with smart truncation and filtering. * Designed to give AI assistants the right amount of context without overwhelming them. * * Usage: * commit-helper --staged [maxLines] # For committing staged changes * commit-helper --amend [maxLines] # For amending last commit * * Default maxLines: 1000 */ import { $ } from "bun"; // Files to ignore in diffs (lockfiles, generated files, etc.) const IGNORE_PATTERNS = [ /package-lock\.json$/, /yarn\.lock$/, /pnpm-lock\.yaml$/, /Cargo\.lock$/, /poetry\.lock$/, /bun\.lockb?$/, /\.min\.(js|css)$/, /\.bundle\.(js|css)$/, /dist\/.*\.map$/, ]; // Binary/large file extensions to skip in diffs const BINARY_EXTENSIONS = [ '.png', '.jpg', '.jpeg', '.gif', '.ico', '.webp', '.avif', '.bmp', '.svg', // Often base64-encoded or huge '.pdf', '.zip', '.tar', '.gz', '.woff', '.woff2', '.ttf', '.eot', '.otf', '.mp4', '.mp3', '.wav', '.avi', '.mov', '.webm', '.so', '.dylib', '.dll', '.exe', '.bin', '.wasm', '.pyc', '.class', '.db', '.sqlite', '.sqlite3', '.lockb', // Bun binary lockfile ]; // Lockfiles - always truncate to first 100 lines const LOCKFILE_PATTERNS = [ /\.lock$/, /lock\.(json|yaml)$/, /^(package|pnpm|yarn|bun|composer|Cargo|Gemfile|Pipfile|poetry)[-.]lock/, /^go\.sum$/, ]; interface ChangeStats { files: number; additions: number; deletions: number; } interface FileChange { path: string; additions: number; deletions: number; isBinary: boolean; shouldIgnore: boolean; } /** * Parse git diff numstat output into structured data */ function parseNumstat(numstat: string): FileChange[] { return numstat .split('\n') .filter(line => line.trim()) .map(line => { const parts = line.split('\t'); const additions = parts[0] === '-' ? 0 : parseInt(parts[0], 10); const deletions = parts[1] === '-' ? 0 : parseInt(parts[1], 10); const path = parts[2] || ''; const isBinary = parts[0] === '-' && parts[1] === '-'; const shouldIgnore = IGNORE_PATTERNS.some(pattern => pattern.test(path)) || BINARY_EXTENSIONS.some(ext => path.endsWith(ext)); return { path, additions, deletions, isBinary, shouldIgnore }; }); } /** * Check if content is predominantly base64-encoded */ function hasBase64Pattern(content: string): boolean { const base64Chunks = content.match(/[A-Za-z0-9+/=]{100,}/g) || []; const base64Length = base64Chunks.reduce((sum, chunk) => sum + chunk.length, 0); return base64Length > content.length * 0.3; // >30% base64 } /** * Determine if a file should be aggressively truncated */ function shouldCullAggressively(file: string, content: string): boolean { const lines = content.split('\n'); const avgLineLength = content.length / Math.max(lines.length, 1); return ( // Known lockfiles LOCKFILE_PATTERNS.some(p => p.test(file)) || // Extremely long average line length (minified/generated) avgLineLength > 200 || // Any single line over 5000 chars lines.some(line => line.length > 5000) || // Predominantly base64 content hasBase64Pattern(content) || // Known generated patterns /\.generated\./i.test(file) || /\.min\./i.test(file) || /\.bundle\./i.test(file) || content.includes('/* @generated */') || content.includes('// Auto-generated') || content.includes('@autogenerated') ); } /** * Check if file is binary based on extension */ function isBinaryFile(path: string): boolean { return BINARY_EXTENSIONS.some(ext => path.toLowerCase().endsWith(ext)); } /** * Format file size in human-readable format */ function formatBytes(bytes: number): string { if (bytes < 1024) return `${bytes} B`; if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`; return `${(bytes / (1024 * 1024)).toFixed(1)} MB`; } /** * Count lines in text */ function countLines(text: string): number { return text.split('\n').length; } /** * Truncate text to maximum number of lines */ function truncateToLines(text: string, maxLines: number): string { const lines = text.split('\n'); if (lines.length <= maxLines) return text; return lines.slice(0, maxLines).join('\n'); } /** * Format binary file summary for diff */ function formatBinarySummary(filePath: string): string { return `Binary file: ${filePath}\n(content omitted)`; } /** * Get overall change statistics */ function getChangeStats(files: FileChange[]): ChangeStats { return files.reduce((acc, file) => ({ files: acc.files + 1, additions: acc.additions + file.additions, deletions: acc.deletions + file.deletions, }), { files: 0, additions: 0, deletions: 0 }); } /** * Get file type distribution summary */ function getFileTypeDistribution(files: FileChange[]): string { const extensions = files.map(f => { const match = f.path.match(/\.([^.]+)$/); return match ? match[1] : '(no extension)'; }); const counts = new Map(); for (const ext of extensions) { counts.set(ext, (counts.get(ext) || 0) + 1); } return Array.from(counts.entries()) .sort((a, b) => b[1] - a[1]) .map(([ext, count]) => ` ${count.toString().padStart(3)} .${ext}`) .join('\n'); } /** * Get categorized file changes summary */ function getFileSummary(files: FileChange[]): string { const included = files.filter(f => !f.shouldIgnore && !f.isBinary); const ignored = files.filter(f => f.shouldIgnore); const binary = files.filter(f => f.isBinary && !f.shouldIgnore); let summary = ''; if (included.length > 0) { summary += '**Included changes:**\n'; summary += included.map(f => { const changes = `(+${f.additions}/-${f.deletions})`; return ` ${changes.padEnd(12)} ${f.path}`; }).join('\n'); } if (ignored.length > 0) { summary += '\n\n**Ignored files (lockfiles/generated):**\n'; summary += ignored.map(f => ` ${f.path}`).join('\n'); summary += '\n _(Changes to these files are omitted from diff output)_'; } if (binary.length > 0) { summary += '\n\n**Binary files:**\n'; summary += binary.map(f => ` ${f.path}`).join('\n'); } return summary; } /** * Get filtered diff output (excluding ignored files) */ async function getFilteredDiff(staged: boolean): Promise { // Get list of files to exclude const numstat = staged ? await $`git diff --staged --numstat`.text() : await $`git diff HEAD~1..HEAD --numstat`.text(); const files = parseNumstat(numstat); const filesToExclude = files .filter(f => f.shouldIgnore || f.isBinary) .map(f => f.path); // Build diff command with exclusions if (filesToExclude.length === 0) { return staged ? await $`git diff --staged`.text() : await $`git diff HEAD~1..HEAD`.text(); } // Git diff with pathspec exclusions - construct as array to avoid shell quoting issues try { const baseArgs = staged ? ['diff', '--staged'] : ['diff', 'HEAD~1..HEAD']; const args = [...baseArgs, '--', '.', ...filesToExclude.map(f => `:(exclude)${f}`)]; // Use Bun.spawn to call git with proper argument handling const proc = Bun.spawn(['git', ...args], { cwd: process.cwd(), stdout: 'pipe', stderr: 'pipe', }); const output = await new Response(proc.stdout).text(); await proc.exited; return output; } catch { // If exclusion fails, return diff without exclusions return staged ? await $`git diff --staged`.text() : await $`git diff HEAD~1..HEAD`.text(); } } /** * Truncate diff with per-file character/line limits and smart culling */ function truncateDiff(diff: string, maxLines: number, filesInfo: string): string { const lines = diff.split('\n'); if (lines.length <= maxLines) { return diff; } // Parse into individual file diffs const fileDiffs: Array<{ header: string; content: string; lineCount: number; path: string; }> = []; let currentFile: { header: string; lines: string[]; path: string } | null = null; for (const line of lines) { if (line.startsWith('diff --git')) { if (currentFile) { fileDiffs.push({ header: currentFile.header, content: currentFile.lines.join('\n'), lineCount: currentFile.lines.length, path: currentFile.path, }); } // Extract file path from "diff --git a/path b/path" const match = line.match(/diff --git a\/(.*?) b\//); const path = match ? match[1] : 'unknown'; currentFile = { header: line, lines: [line], path }; } else if (currentFile) { currentFile.lines.push(line); } } if (currentFile) { fileDiffs.push({ header: currentFile.header, content: currentFile.lines.join('\n'), lineCount: currentFile.lines.length, path: currentFile.path, }); } // Process each file with per-file limits and smart culling let totalLines = 0; const includedDiffs: string[] = []; const omittedFiles: Array<{file: string, reason: string}> = []; for (const fileDiff of fileDiffs) { // Check if binary file if (isBinaryFile(fileDiff.path)) { const summary = formatBinarySummary(fileDiff.path); includedDiffs.push(summary); totalLines += countLines(summary); continue; } let content = fileDiff.content; const fileLines = fileDiff.lineCount; const fileChars = content.length; let truncationNotice = ''; // Apply per-file safety limits const CHAR_THRESHOLD = 10000; const LINE_THRESHOLD = 1500; if (fileChars > CHAR_THRESHOLD || fileLines > LINE_THRESHOLD) { // File exceeded threshold - check if it should be culled if (shouldCullAggressively(fileDiff.path, content)) { // Check if it's a lockfile (special handling) if (LOCKFILE_PATTERNS.some(p => p.test(fileDiff.path))) { content = truncateToLines(content, 100); truncationNotice = `\n... (lockfile truncated - showing first 100 of ${fileLines} lines)`; } else { // Other noise - aggressive truncation content = truncateToLines(content, 30); truncationNotice = `\n... (generated/noisy file truncated - showing first 30 of ${fileLines} lines, ${formatBytes(fileChars)} total)`; } } else { // Legitimate large file - more generous truncation content = truncateToLines(content, 300); truncationNotice = `\n... (large file truncated - showing first 300 of ${fileLines} lines, ${formatBytes(fileChars)} total)`; } } // Check if it fits in global budget const contentLines = countLines(content); if (totalLines + contentLines <= maxLines - 10) { // Reserve space for summary includedDiffs.push(content + truncationNotice); totalLines += contentLines; } else { omittedFiles.push({ file: fileDiff.path, reason: 'global line budget exceeded' }); } } let result = includedDiffs.join('\n\n'); if (omittedFiles.length > 0) { result += '\n\n---\n'; result += `**Note:** ${omittedFiles.length} file(s) omitted due to output size limit:\n`; result += omittedFiles.map(f => ` - ${f.file} (${f.reason})`).join('\n'); result += '\n\n_Full changes visible in git status/stat output above._'; } return result; } /** * Get preview of new files being added (with per-file and total limits) */ async function getNewFilesPreviews(maxFiles: number = 5, maxLinesPerFile: number = 50): Promise { try { // Get list of new files (A = added) const newFiles = await $`git diff --staged --name-only --diff-filter=A`.text(); const files = newFiles.trim().split('\n').filter(f => f); if (files.length === 0) { return ''; } const previews: string[] = []; const filesToShow = files.slice(0, maxFiles); let totalChars = 0; const MAX_TOTAL_CHARS = 30000; const MAX_CHARS_PER_FILE = 10000; for (const file of filesToShow) { // Skip binary files if (isBinaryFile(file)) { previews.push(`=== ${file} ===\n(binary file)`); continue; } try { const content = await Bun.file(file).text(); // Apply per-file char limit FIRST (prevents single-line disasters) if (content.length > MAX_CHARS_PER_FILE) { if (shouldCullAggressively(file, content)) { previews.push(`=== ${file} ===\n(generated/noisy file - preview omitted)\nSize: ${formatBytes(content.length)}`); } else { const truncated = content.slice(0, MAX_CHARS_PER_FILE); previews.push(`=== ${file} ===\n${truncated}\n... (truncated from ${formatBytes(content.length)})`); } continue; } // Apply line limit const lines = content.split('\n'); const truncatedLines = lines.slice(0, maxLinesPerFile); const truncated = truncatedLines.join('\n'); const notice = lines.length > maxLinesPerFile ? `\n... (${lines.length - maxLinesPerFile} more lines)` : ''; const preview = `=== ${file} ===\n${truncated}${notice}`; // Check total budget if (totalChars + preview.length > MAX_TOTAL_CHARS) { const remaining = files.length - previews.length; previews.push(`\n... (${remaining} more file(s) omitted - preview size limit reached)`); break; } previews.push(preview); totalChars += preview.length; } catch { previews.push(`=== ${file} ===\n(unreadable)`); } } if (files.length > maxFiles && previews[previews.length - 1]?.includes('omitted') === false) { previews.push(`\n... (${files.length - maxFiles} more new file(s) not shown)`); } return previews.join('\n\n'); } catch { return ''; } } /** * Generate context for staged changes */ async function stagedContext(maxLines: number): Promise { // Check if there are staged changes try { await $`git diff --staged --quiet`; // If command succeeds (exit 0), there are no changes throw new Error('No staged changes to commit'); } catch (err) { // Exit code 1 means there are changes (expected) // Any other error will be re-thrown if (err && typeof err === 'object' && 'exitCode' in err && err.exitCode !== 1) { throw err; } } // Gather all git information const [status, numstat, recentCommits] = await Promise.all([ $`git status`.text(), $`git diff --staged --numstat`.text(), $`git log --format='%h %s' -10`.text(), ]); const files = parseNumstat(numstat); const stats = getChangeStats(files); const fileSummary = getFileSummary(files); const fileTypes = getFileTypeDistribution(files); // Calculate how many lines we can use for diff const headerLines = 50; // Approximate lines for headers/summaries const diffMaxLines = Math.max(100, maxLines - headerLines); const diff = await getFilteredDiff(true); const truncatedDiff = truncateDiff(diff, diffMaxLines, fileSummary); const newFilesPreviews = await getNewFilesPreviews(5, 50); // Build output let output = '# Git Commit Context (Staged Changes)\n\n'; output += '## Status\n```\n' + status.trim() + '\n```\n\n'; output += '## Change Summary\n'; output += `**Files:** ${stats.files} | **Additions:** ${stats.additions} | **Deletions:** ${stats.deletions}\n\n`; output += '## Files Changed\n' + fileSummary + '\n\n'; output += '## File Types Modified\n```\n' + fileTypes + '\n```\n\n'; output += '## Staged Changes (Diff)\n'; output += '```diff\n' + truncatedDiff.trim() + '\n```\n\n'; if (newFilesPreviews) { output += '## New Files Preview\n```\n' + newFilesPreviews + '\n```\n\n'; } output += '## Recent Commit Style\n```\n' + recentCommits.trim() + '\n```\n'; // Final safety: ensure total output doesn't exceed safe limit const MAX_TOTAL_OUTPUT = 150000; // 150K chars, leaves 50K headroom if (output.length > MAX_TOTAL_OUTPUT) { const lastNewline = output.slice(0, MAX_TOTAL_OUTPUT).lastIndexOf('\n'); output = output.slice(0, lastNewline) + '\n\n[OUTPUT TRUNCATED - Exceeds safe character limit for AI context]\n' + `(Shown ${lastNewline.toLocaleString()} of ${output.length.toLocaleString()} chars)`; } return output; } /** * Generate context for amending last commit */ async function amendContext(maxLines: number): Promise { // Check if we have any commits try { await $`git rev-parse HEAD`; } catch { throw new Error('No commits to amend'); } // Gather git information const [stagedStat, lastCommitStat, recentCommits] = await Promise.all([ $`git diff --staged --stat`.text(), $`git show --stat --pretty=format: HEAD`.text().then(s => s.split('\n').filter(l => l.trim()).join('\n')), $`git log --oneline -5`.text(), ]); let output = '# Git Commit Context (Amend)\n\n'; output += '## Current Staged Changes\n'; if (stagedStat.trim()) { output += '```\n' + stagedStat.trim() + '\n```\n\n'; } else { output += '_No staged changes (message-only amendment)_\n\n'; } output += '## Files in Most Recent Commit\n'; output += '```\n' + lastCommitStat.trim() + '\n```\n\n'; output += '## Recent Commit History (for style reference)\n'; output += '```\n' + recentCommits.trim() + '\n```\n'; return output; } /** * Main entry point */ async function main() { const args = Bun.argv.slice(2); if (args.length === 0 || (!args[0].startsWith('--staged') && !args[0].startsWith('--amend'))) { console.error('Usage: commit-helper --staged [maxLines] | --amend [maxLines]'); console.error(' Default maxLines: 1000'); process.exit(1); } const mode = args[0]; const maxLines = args[1] ? parseInt(args[1], 10) : 1000; if (isNaN(maxLines) || maxLines < 100) { console.error('Error: maxLines must be a number >= 100'); process.exit(1); } try { let output: string; if (mode === '--staged') { output = await stagedContext(maxLines); } else if (mode === '--amend') { output = await amendContext(maxLines); } else { throw new Error(`Unknown mode: ${mode}`); } console.log(output); } catch (error) { if (error instanceof Error) { console.error(`Error: ${error.message}`); if (error.stack) { console.error('\nStack trace:'); console.error(error.stack); } } else if (error && typeof error === 'object') { console.error('Error details:', JSON.stringify(error, null, 2)); } else { console.error('Error: Unknown error occurred:', error); } process.exit(1); } } main();