dotfiles/home/dot_local/bin/executable_commit-helper

#!/usr/bin/env bun

/**
 * commit-helper - Efficient git context gathering for AI-assisted commits
 *
 * Provides optimized git context with smart truncation and filtering.
 * Designed to give AI assistants the right amount of context without overwhelming them.
 *
 * Usage:
 *   commit-helper --staged [maxLines]    # For committing staged changes
 *   commit-helper --amend [maxLines]     # For amending last commit
 *
 * Default maxLines: 1000
 */

import { $ } from "bun";

// Files to ignore in diffs (lockfiles, generated files, etc.)
const IGNORE_PATTERNS = [
  /package-lock\.json$/,
  /yarn\.lock$/,
  /pnpm-lock\.yaml$/,
  /Cargo\.lock$/,
  /poetry\.lock$/,
  /bun\.lockb?$/,
  /\.min\.(js|css)$/,
  /\.bundle\.(js|css)$/,
  /dist\/.*\.map$/,
];

// Binary/large file extensions to skip in diffs
const BINARY_EXTENSIONS = [
  '.png', '.jpg', '.jpeg', '.gif', '.ico', '.webp', '.avif', '.bmp',
  '.svg',  // Often base64-encoded or huge
  '.pdf', '.zip', '.tar', '.gz', '.woff', '.woff2', '.ttf', '.eot', '.otf',
  '.mp4', '.mp3', '.wav', '.avi', '.mov', '.webm',
  '.so', '.dylib', '.dll', '.exe', '.bin',
  '.wasm', '.pyc', '.class',
  '.db', '.sqlite', '.sqlite3',
  '.lockb',  // Bun binary lockfile
];

// Lockfiles - always truncate to first 100 lines
const LOCKFILE_PATTERNS = [
  /\.lock$/,
  /lock\.(json|yaml)$/,
  /^(package|pnpm|yarn|bun|composer|Cargo|Gemfile|Pipfile|poetry)[-.]lock/,
  /^go\.sum$/,
];

interface ChangeStats {
  files: number;
  additions: number;
  deletions: number;
}

interface FileChange {
  path: string;
  additions: number;
  deletions: number;
  isBinary: boolean;
  shouldIgnore: boolean;
}

/**
 * Parse git diff numstat output into structured data
 */
function parseNumstat(numstat: string): FileChange[] {
  return numstat
    .split('\n')
    .filter(line => line.trim())
    .map(line => {
      const parts = line.split('\t');
      const additions = parts[0] === '-' ? 0 : parseInt(parts[0], 10);
      const deletions = parts[1] === '-' ? 0 : parseInt(parts[1], 10);
      const path = parts[2] || '';

      const isBinary = parts[0] === '-' && parts[1] === '-';
      const shouldIgnore = IGNORE_PATTERNS.some(pattern => pattern.test(path)) ||
                          BINARY_EXTENSIONS.some(ext => path.endsWith(ext));

      return { path, additions, deletions, isBinary, shouldIgnore };
    });
}

/**
 * Check if content is predominantly base64-encoded
 */
function hasBase64Pattern(content: string): boolean {
  const base64Chunks = content.match(/[A-Za-z0-9+/=]{100,}/g) || [];
  const base64Length = base64Chunks.reduce((sum, chunk) => sum + chunk.length, 0);
  return base64Length > content.length * 0.3; // >30% base64
}

/**
 * Determine if a file should be aggressively truncated
 */
function shouldCullAggressively(file: string, content: string): boolean {
  const lines = content.split('\n');
  const avgLineLength = content.length / Math.max(lines.length, 1);

  return (
    // Known lockfiles
    LOCKFILE_PATTERNS.some(p => p.test(file)) ||

    // Extremely long average line length (minified/generated)
    avgLineLength > 200 ||

    // Any single line over 5000 chars
    lines.some(line => line.length > 5000) ||

    // Predominantly base64 content
    hasBase64Pattern(content) ||

    // Known generated patterns
    /\.generated\./i.test(file) ||
    /\.min\./i.test(file) ||
    /\.bundle\./i.test(file) ||
    content.includes('/* @generated */') ||
    content.includes('// Auto-generated') ||
    content.includes('@autogenerated')
  );
}

/**
 * Check if file is binary based on extension
 */
function isBinaryFile(path: string): boolean {
  return BINARY_EXTENSIONS.some(ext => path.toLowerCase().endsWith(ext));
}

/**
 * Format file size in human-readable format
 */
function formatBytes(bytes: number): string {
  if (bytes < 1024) return `${bytes} B`;
  if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
  return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
}

/**
 * Count lines in text
 */
function countLines(text: string): number {
  return text.split('\n').length;
}

/**
 * Truncate text to maximum number of lines
 */
function truncateToLines(text: string, maxLines: number): string {
  const lines = text.split('\n');
  if (lines.length <= maxLines) return text;
  return lines.slice(0, maxLines).join('\n');
}

/**
 * Format binary file summary for diff
 */
function formatBinarySummary(filePath: string): string {
  return `Binary file: ${filePath}\n(content omitted)`;
}

/**
 * Get overall change statistics
 */
function getChangeStats(files: FileChange[]): ChangeStats {
  return files.reduce((acc, file) => ({
    files: acc.files + 1,
    additions: acc.additions + file.additions,
    deletions: acc.deletions + file.deletions,
  }), { files: 0, additions: 0, deletions: 0 });
}

/**
 * Get file type distribution summary
 */
function getFileTypeDistribution(files: FileChange[]): string {
  const extensions = files.map(f => {
    const match = f.path.match(/\.([^.]+)$/);
    return match ? match[1] : '(no extension)';
  });

  const counts = new Map<string, number>();
  for (const ext of extensions) {
    counts.set(ext, (counts.get(ext) || 0) + 1);
  }

  return Array.from(counts.entries())
    .sort((a, b) => b[1] - a[1])
    .map(([ext, count]) => `  ${count.toString().padStart(3)} .${ext}`)
    .join('\n');
}

/**
 * Get categorized file changes summary
 */
function getFileSummary(files: FileChange[]): string {
  const included = files.filter(f => !f.shouldIgnore && !f.isBinary);
  const ignored = files.filter(f => f.shouldIgnore);
  const binary = files.filter(f => f.isBinary && !f.shouldIgnore);

  let summary = '';

  if (included.length > 0) {
    summary += '**Included changes:**\n';
    summary += included.map(f => {
      const changes = `(+${f.additions}/-${f.deletions})`;
      return `  ${changes.padEnd(12)} ${f.path}`;
    }).join('\n');
  }

  if (ignored.length > 0) {
    summary += '\n\n**Ignored files (lockfiles/generated):**\n';
    summary += ignored.map(f => `  ${f.path}`).join('\n');
    summary += '\n  _(Changes to these files are omitted from diff output)_';
  }

  if (binary.length > 0) {
    summary += '\n\n**Binary files:**\n';
    summary += binary.map(f => `  ${f.path}`).join('\n');
  }

  return summary;
}

/**
 * Get filtered diff output (excluding ignored files)
 */
async function getFilteredDiff(staged: boolean): Promise<string> {
  // Get list of files to exclude
  const numstat = staged
    ? await $`git diff --staged --numstat`.text()
    : await $`git diff HEAD~1..HEAD --numstat`.text();

  const files = parseNumstat(numstat);
  const filesToExclude = files
    .filter(f => f.shouldIgnore || f.isBinary)
    .map(f => f.path);

  // Build diff command with exclusions
  if (filesToExclude.length === 0) {
    return staged
      ? await $`git diff --staged`.text()
      : await $`git diff HEAD~1..HEAD`.text();
  }

  // Git diff with pathspec exclusions - construct as array to avoid shell quoting issues
  try {
    const baseArgs = staged ? ['diff', '--staged'] : ['diff', 'HEAD~1..HEAD'];
    const args = [...baseArgs, '--', '.', ...filesToExclude.map(f => `:(exclude)${f}`)];

    // Use Bun.spawn to call git with proper argument handling
    const proc = Bun.spawn(['git', ...args], {
      cwd: process.cwd(),
      stdout: 'pipe',
      stderr: 'pipe',
    });

    const output = await new Response(proc.stdout).text();
    await proc.exited;

    return output;
  } catch {
    // If exclusion fails, return diff without exclusions
    return staged
      ? await $`git diff --staged`.text()
      : await $`git diff HEAD~1..HEAD`.text();
  }
}

/**
 * Truncate diff with per-file character/line limits and smart culling
 */
function truncateDiff(diff: string, maxLines: number, filesInfo: string): string {
  const lines = diff.split('\n');

  if (lines.length <= maxLines) {
    return diff;
  }

  // Parse into individual file diffs
  const fileDiffs: Array<{
    header: string;
    content: string;
    lineCount: number;
    path: string;
  }> = [];
  let currentFile: { header: string; lines: string[]; path: string } | null = null;

  for (const line of lines) {
    if (line.startsWith('diff --git')) {
      if (currentFile) {
        fileDiffs.push({
          header: currentFile.header,
          content: currentFile.lines.join('\n'),
          lineCount: currentFile.lines.length,
          path: currentFile.path,
        });
      }
      // Extract file path from "diff --git a/path b/path"
      const match = line.match(/diff --git a\/(.*?) b\//);
      const path = match ? match[1] : 'unknown';
      currentFile = { header: line, lines: [line], path };
    } else if (currentFile) {
      currentFile.lines.push(line);
    }
  }

  if (currentFile) {
    fileDiffs.push({
      header: currentFile.header,
      content: currentFile.lines.join('\n'),
      lineCount: currentFile.lines.length,
      path: currentFile.path,
    });
  }

  // Process each file with per-file limits and smart culling
  let totalLines = 0;
  const includedDiffs: string[] = [];
  const omittedFiles: Array<{file: string, reason: string}> = [];

  for (const fileDiff of fileDiffs) {
    // Check if binary file
    if (isBinaryFile(fileDiff.path)) {
      const summary = formatBinarySummary(fileDiff.path);
      includedDiffs.push(summary);
      totalLines += countLines(summary);
      continue;
    }

    let content = fileDiff.content;
    const fileLines = fileDiff.lineCount;
    const fileChars = content.length;
    let truncationNotice = '';

    // Apply per-file safety limits
    const CHAR_THRESHOLD = 10000;
    const LINE_THRESHOLD = 1500;

    if (fileChars > CHAR_THRESHOLD || fileLines > LINE_THRESHOLD) {
      // File exceeded threshold - check if it should be culled
      if (shouldCullAggressively(fileDiff.path, content)) {
        // Check if it's a lockfile (special handling)
        if (LOCKFILE_PATTERNS.some(p => p.test(fileDiff.path))) {
          content = truncateToLines(content, 100);
          truncationNotice = `\n... (lockfile truncated - showing first 100 of ${fileLines} lines)`;
        } else {
          // Other noise - aggressive truncation
          content = truncateToLines(content, 30);
          truncationNotice = `\n... (generated/noisy file truncated - showing first 30 of ${fileLines} lines, ${formatBytes(fileChars)} total)`;
        }
      } else {
        // Legitimate large file - more generous truncation
        content = truncateToLines(content, 300);
        truncationNotice = `\n... (large file truncated - showing first 300 of ${fileLines} lines, ${formatBytes(fileChars)} total)`;
      }
    }

    // Check if it fits in global budget
    const contentLines = countLines(content);
    if (totalLines + contentLines <= maxLines - 10) { // Reserve space for summary
      includedDiffs.push(content + truncationNotice);
      totalLines += contentLines;
    } else {
      omittedFiles.push({
        file: fileDiff.path,
        reason: 'global line budget exceeded'
      });
    }
  }

  let result = includedDiffs.join('\n\n');

  if (omittedFiles.length > 0) {
    result += '\n\n---\n';
    result += `**Note:** ${omittedFiles.length} file(s) omitted due to output size limit:\n`;
    result += omittedFiles.map(f => `  - ${f.file} (${f.reason})`).join('\n');
    result += '\n\n_Full changes visible in git status/stat output above._';
  }

  return result;
}

/**
 * Get preview of new files being added (with per-file and total limits)
 */
async function getNewFilesPreviews(maxFiles: number = 5, maxLinesPerFile: number = 50): Promise<string> {
  try {
    // Get list of new files (A = added)
    const newFiles = await $`git diff --staged --name-only --diff-filter=A`.text();
    const files = newFiles.trim().split('\n').filter(f => f);

    if (files.length === 0) {
      return '';
    }

    const previews: string[] = [];
    const filesToShow = files.slice(0, maxFiles);
    let totalChars = 0;
    const MAX_TOTAL_CHARS = 30000;
    const MAX_CHARS_PER_FILE = 10000;

    for (const file of filesToShow) {
      // Skip binary files
      if (isBinaryFile(file)) {
        previews.push(`=== ${file} ===\n(binary file)`);
        continue;
      }

      try {
        const content = await Bun.file(file).text();

        // Apply per-file char limit FIRST (prevents single-line disasters)
        if (content.length > MAX_CHARS_PER_FILE) {
          if (shouldCullAggressively(file, content)) {
            previews.push(`=== ${file} ===\n(generated/noisy file - preview omitted)\nSize: ${formatBytes(content.length)}`);
          } else {
            const truncated = content.slice(0, MAX_CHARS_PER_FILE);
            previews.push(`=== ${file} ===\n${truncated}\n... (truncated from ${formatBytes(content.length)})`);
          }
          continue;
        }

        // Apply line limit
        const lines = content.split('\n');
        const truncatedLines = lines.slice(0, maxLinesPerFile);
        const truncated = truncatedLines.join('\n');
        const notice = lines.length > maxLinesPerFile
          ? `\n... (${lines.length - maxLinesPerFile} more lines)`
          : '';

        const preview = `=== ${file} ===\n${truncated}${notice}`;

        // Check total budget
        if (totalChars + preview.length > MAX_TOTAL_CHARS) {
          const remaining = files.length - previews.length;
          previews.push(`\n... (${remaining} more file(s) omitted - preview size limit reached)`);
          break;
        }

        previews.push(preview);
        totalChars += preview.length;
      } catch {
        previews.push(`=== ${file} ===\n(unreadable)`);
      }
    }

    if (files.length > maxFiles && previews[previews.length - 1]?.includes('omitted') === false) {
      previews.push(`\n... (${files.length - maxFiles} more new file(s) not shown)`);
    }

    return previews.join('\n\n');
  } catch {
    return '';
  }
}

/**
 * Generate context for staged changes
 */
async function stagedContext(maxLines: number): Promise<string> {
  // Check if there are staged changes
  try {
    await $`git diff --staged --quiet`;
    // If command succeeds (exit 0), there are no changes
    throw new Error('No staged changes to commit');
  } catch (err) {
    // Exit code 1 means there are changes (expected)
    // Any other error will be re-thrown
    if (err && typeof err === 'object' && 'exitCode' in err && err.exitCode !== 1) {
      throw err;
    }
  }

  // Gather all git information
  const [status, numstat, recentCommits] = await Promise.all([
    $`git status`.text(),
    $`git diff --staged --numstat`.text(),
    $`git log --format='%h %s' -10`.text(),
  ]);

  const files = parseNumstat(numstat);
  const stats = getChangeStats(files);
  const fileSummary = getFileSummary(files);
  const fileTypes = getFileTypeDistribution(files);

  // Calculate how many lines we can use for diff
  const headerLines = 50; // Approximate lines for headers/summaries
  const diffMaxLines = Math.max(100, maxLines - headerLines);

  const diff = await getFilteredDiff(true);
  const truncatedDiff = truncateDiff(diff, diffMaxLines, fileSummary);

  const newFilesPreviews = await getNewFilesPreviews(5, 50);

  // Build output
  let output = '# Git Commit Context (Staged Changes)\n\n';

  output += '## Status\n```\n' + status.trim() + '\n```\n\n';

  output += '## Change Summary\n';
  output += `**Files:** ${stats.files} | **Additions:** ${stats.additions} | **Deletions:** ${stats.deletions}\n\n`;

  output += '## Files Changed\n' + fileSummary + '\n\n';

  output += '## File Types Modified\n```\n' + fileTypes + '\n```\n\n';

  output += '## Staged Changes (Diff)\n';
  output += '```diff\n' + truncatedDiff.trim() + '\n```\n\n';

  if (newFilesPreviews) {
    output += '## New Files Preview\n```\n' + newFilesPreviews + '\n```\n\n';
  }

  output += '## Recent Commit Style\n```\n' + recentCommits.trim() + '\n```\n';

  // Final safety: ensure total output doesn't exceed safe limit
  const MAX_TOTAL_OUTPUT = 150000; // 150K chars, leaves 50K headroom
  if (output.length > MAX_TOTAL_OUTPUT) {
    const lastNewline = output.slice(0, MAX_TOTAL_OUTPUT).lastIndexOf('\n');
    output = output.slice(0, lastNewline) +
      '\n\n[OUTPUT TRUNCATED - Exceeds safe character limit for AI context]\n' +
      `(Shown ${lastNewline.toLocaleString()} of ${output.length.toLocaleString()} chars)`;
  }

  return output;
}

/**
 * Generate context for amending last commit
 */
async function amendContext(maxLines: number): Promise<string> {
  // Check if we have any commits
  try {
    await $`git rev-parse HEAD`;
  } catch {
    throw new Error('No commits to amend');
  }

  // Gather git information
  const [stagedStat, lastCommitStat, recentCommits] = await Promise.all([
    $`git diff --staged --stat`.text(),
    $`git show --stat --pretty=format: HEAD`.text().then(s => s.split('\n').filter(l => l.trim()).join('\n')),
    $`git log --oneline -5`.text(),
  ]);

  let output = '# Git Commit Context (Amend)\n\n';

  output += '## Current Staged Changes\n';
  if (stagedStat.trim()) {
    output += '```\n' + stagedStat.trim() + '\n```\n\n';
  } else {
    output += '_No staged changes (message-only amendment)_\n\n';
  }

  output += '## Files in Most Recent Commit\n';
  output += '```\n' + lastCommitStat.trim() + '\n```\n\n';

  output += '## Recent Commit History (for style reference)\n';
  output += '```\n' + recentCommits.trim() + '\n```\n';

  return output;
}

/**
 * Main entry point
 */
async function main() {
  const args = Bun.argv.slice(2);

  if (args.length === 0 || (!args[0].startsWith('--staged') && !args[0].startsWith('--amend'))) {
    console.error('Usage: commit-helper --staged [maxLines] | --amend [maxLines]');
    console.error('  Default maxLines: 1000');
    process.exit(1);
  }

  const mode = args[0];
  const maxLines = args[1] ? parseInt(args[1], 10) : 1000;

  if (isNaN(maxLines) || maxLines < 100) {
    console.error('Error: maxLines must be a number >= 100');
    process.exit(1);
  }

  try {
    let output: string;

    if (mode === '--staged') {
      output = await stagedContext(maxLines);
    } else if (mode === '--amend') {
      output = await amendContext(maxLines);
    } else {
      throw new Error(`Unknown mode: ${mode}`);
    }

    console.log(output);
  } catch (error) {
    if (error instanceof Error) {
      console.error(`Error: ${error.message}`);
      if (error.stack) {
        console.error('\nStack trace:');
        console.error(error.stack);
      }
    } else if (error && typeof error === 'object') {
      console.error('Error details:', JSON.stringify(error, null, 2));
    } else {
      console.error('Error: Unknown error occurred:', error);
    }
    process.exit(1);
  }
}

main();