mirror of
https://github.com/Xevion/dotfiles.git
synced 2026-01-31 14:24:09 -06:00
- Add per-file character/line limits to prevent massive diffs - Detect and aggressively truncate lockfiles, minified, and base64-heavy files - Expand binary file detection and format binary summaries - Add global output safety limit (150K chars) to prevent context overflow
616 lines
19 KiB
Plaintext
616 lines
19 KiB
Plaintext
#!/usr/bin/env bun
|
|
|
|
/**
|
|
* commit-helper - Efficient git context gathering for AI-assisted commits
|
|
*
|
|
* Provides optimized git context with smart truncation and filtering.
|
|
* Designed to give AI assistants the right amount of context without overwhelming them.
|
|
*
|
|
* Usage:
|
|
* commit-helper --staged [maxLines] # For committing staged changes
|
|
* commit-helper --amend [maxLines] # For amending last commit
|
|
*
|
|
* Default maxLines: 1000
|
|
*/
|
|
|
|
import { $ } from "bun";
|
|
|
|
// Files to ignore in diffs (lockfiles, generated files, etc.)
|
|
const IGNORE_PATTERNS = [
|
|
/package-lock\.json$/,
|
|
/yarn\.lock$/,
|
|
/pnpm-lock\.yaml$/,
|
|
/Cargo\.lock$/,
|
|
/poetry\.lock$/,
|
|
/bun\.lockb?$/,
|
|
/\.min\.(js|css)$/,
|
|
/\.bundle\.(js|css)$/,
|
|
/dist\/.*\.map$/,
|
|
];
|
|
|
|
// Binary/large file extensions to skip in diffs
|
|
const BINARY_EXTENSIONS = [
|
|
'.png', '.jpg', '.jpeg', '.gif', '.ico', '.webp', '.avif', '.bmp',
|
|
'.svg', // Often base64-encoded or huge
|
|
'.pdf', '.zip', '.tar', '.gz', '.woff', '.woff2', '.ttf', '.eot', '.otf',
|
|
'.mp4', '.mp3', '.wav', '.avi', '.mov', '.webm',
|
|
'.so', '.dylib', '.dll', '.exe', '.bin',
|
|
'.wasm', '.pyc', '.class',
|
|
'.db', '.sqlite', '.sqlite3',
|
|
'.lockb', // Bun binary lockfile
|
|
];
|
|
|
|
// Lockfiles - always truncate to first 100 lines
|
|
const LOCKFILE_PATTERNS = [
|
|
/\.lock$/,
|
|
/lock\.(json|yaml)$/,
|
|
/^(package|pnpm|yarn|bun|composer|Cargo|Gemfile|Pipfile|poetry)[-.]lock/,
|
|
/^go\.sum$/,
|
|
];
|
|
|
|
interface ChangeStats {
|
|
files: number;
|
|
additions: number;
|
|
deletions: number;
|
|
}
|
|
|
|
interface FileChange {
|
|
path: string;
|
|
additions: number;
|
|
deletions: number;
|
|
isBinary: boolean;
|
|
shouldIgnore: boolean;
|
|
}
|
|
|
|
/**
|
|
* Parse git diff numstat output into structured data
|
|
*/
|
|
function parseNumstat(numstat: string): FileChange[] {
|
|
return numstat
|
|
.split('\n')
|
|
.filter(line => line.trim())
|
|
.map(line => {
|
|
const parts = line.split('\t');
|
|
const additions = parts[0] === '-' ? 0 : parseInt(parts[0], 10);
|
|
const deletions = parts[1] === '-' ? 0 : parseInt(parts[1], 10);
|
|
const path = parts[2] || '';
|
|
|
|
const isBinary = parts[0] === '-' && parts[1] === '-';
|
|
const shouldIgnore = IGNORE_PATTERNS.some(pattern => pattern.test(path)) ||
|
|
BINARY_EXTENSIONS.some(ext => path.endsWith(ext));
|
|
|
|
return { path, additions, deletions, isBinary, shouldIgnore };
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Check if content is predominantly base64-encoded
|
|
*/
|
|
function hasBase64Pattern(content: string): boolean {
|
|
const base64Chunks = content.match(/[A-Za-z0-9+/=]{100,}/g) || [];
|
|
const base64Length = base64Chunks.reduce((sum, chunk) => sum + chunk.length, 0);
|
|
return base64Length > content.length * 0.3; // >30% base64
|
|
}
|
|
|
|
/**
|
|
* Determine if a file should be aggressively truncated
|
|
*/
|
|
function shouldCullAggressively(file: string, content: string): boolean {
|
|
const lines = content.split('\n');
|
|
const avgLineLength = content.length / Math.max(lines.length, 1);
|
|
|
|
return (
|
|
// Known lockfiles
|
|
LOCKFILE_PATTERNS.some(p => p.test(file)) ||
|
|
|
|
// Extremely long average line length (minified/generated)
|
|
avgLineLength > 200 ||
|
|
|
|
// Any single line over 5000 chars
|
|
lines.some(line => line.length > 5000) ||
|
|
|
|
// Predominantly base64 content
|
|
hasBase64Pattern(content) ||
|
|
|
|
// Known generated patterns
|
|
/\.generated\./i.test(file) ||
|
|
/\.min\./i.test(file) ||
|
|
/\.bundle\./i.test(file) ||
|
|
content.includes('/* @generated */') ||
|
|
content.includes('// Auto-generated') ||
|
|
content.includes('@autogenerated')
|
|
);
|
|
}
|
|
|
|
/**
|
|
* Check if file is binary based on extension
|
|
*/
|
|
function isBinaryFile(path: string): boolean {
|
|
return BINARY_EXTENSIONS.some(ext => path.toLowerCase().endsWith(ext));
|
|
}
|
|
|
|
/**
|
|
* Format file size in human-readable format
|
|
*/
|
|
function formatBytes(bytes: number): string {
|
|
if (bytes < 1024) return `${bytes} B`;
|
|
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
|
|
return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
|
|
}
|
|
|
|
/**
|
|
* Count lines in text
|
|
*/
|
|
function countLines(text: string): number {
|
|
return text.split('\n').length;
|
|
}
|
|
|
|
/**
|
|
* Truncate text to maximum number of lines
|
|
*/
|
|
function truncateToLines(text: string, maxLines: number): string {
|
|
const lines = text.split('\n');
|
|
if (lines.length <= maxLines) return text;
|
|
return lines.slice(0, maxLines).join('\n');
|
|
}
|
|
|
|
/**
|
|
* Format binary file summary for diff
|
|
*/
|
|
function formatBinarySummary(filePath: string): string {
|
|
return `Binary file: ${filePath}\n(content omitted)`;
|
|
}
|
|
|
|
/**
|
|
* Get overall change statistics
|
|
*/
|
|
function getChangeStats(files: FileChange[]): ChangeStats {
|
|
return files.reduce((acc, file) => ({
|
|
files: acc.files + 1,
|
|
additions: acc.additions + file.additions,
|
|
deletions: acc.deletions + file.deletions,
|
|
}), { files: 0, additions: 0, deletions: 0 });
|
|
}
|
|
|
|
/**
|
|
* Get file type distribution summary
|
|
*/
|
|
function getFileTypeDistribution(files: FileChange[]): string {
|
|
const extensions = files.map(f => {
|
|
const match = f.path.match(/\.([^.]+)$/);
|
|
return match ? match[1] : '(no extension)';
|
|
});
|
|
|
|
const counts = new Map<string, number>();
|
|
for (const ext of extensions) {
|
|
counts.set(ext, (counts.get(ext) || 0) + 1);
|
|
}
|
|
|
|
return Array.from(counts.entries())
|
|
.sort((a, b) => b[1] - a[1])
|
|
.map(([ext, count]) => ` ${count.toString().padStart(3)} .${ext}`)
|
|
.join('\n');
|
|
}
|
|
|
|
/**
|
|
* Get categorized file changes summary
|
|
*/
|
|
function getFileSummary(files: FileChange[]): string {
|
|
const included = files.filter(f => !f.shouldIgnore && !f.isBinary);
|
|
const ignored = files.filter(f => f.shouldIgnore);
|
|
const binary = files.filter(f => f.isBinary && !f.shouldIgnore);
|
|
|
|
let summary = '';
|
|
|
|
if (included.length > 0) {
|
|
summary += '**Included changes:**\n';
|
|
summary += included.map(f => {
|
|
const changes = `(+${f.additions}/-${f.deletions})`;
|
|
return ` ${changes.padEnd(12)} ${f.path}`;
|
|
}).join('\n');
|
|
}
|
|
|
|
if (ignored.length > 0) {
|
|
summary += '\n\n**Ignored files (lockfiles/generated):**\n';
|
|
summary += ignored.map(f => ` ${f.path}`).join('\n');
|
|
summary += '\n _(Changes to these files are omitted from diff output)_';
|
|
}
|
|
|
|
if (binary.length > 0) {
|
|
summary += '\n\n**Binary files:**\n';
|
|
summary += binary.map(f => ` ${f.path}`).join('\n');
|
|
}
|
|
|
|
return summary;
|
|
}
|
|
|
|
/**
|
|
* Get filtered diff output (excluding ignored files)
|
|
*/
|
|
async function getFilteredDiff(staged: boolean): Promise<string> {
|
|
// Get list of files to exclude
|
|
const numstat = staged
|
|
? await $`git diff --staged --numstat`.text()
|
|
: await $`git diff HEAD~1..HEAD --numstat`.text();
|
|
|
|
const files = parseNumstat(numstat);
|
|
const filesToExclude = files
|
|
.filter(f => f.shouldIgnore || f.isBinary)
|
|
.map(f => f.path);
|
|
|
|
// Build diff command with exclusions
|
|
if (filesToExclude.length === 0) {
|
|
return staged
|
|
? await $`git diff --staged`.text()
|
|
: await $`git diff HEAD~1..HEAD`.text();
|
|
}
|
|
|
|
// Git diff with pathspec exclusions - construct as array to avoid shell quoting issues
|
|
try {
|
|
const baseArgs = staged ? ['diff', '--staged'] : ['diff', 'HEAD~1..HEAD'];
|
|
const args = [...baseArgs, '--', '.', ...filesToExclude.map(f => `:(exclude)${f}`)];
|
|
|
|
// Use Bun.spawn to call git with proper argument handling
|
|
const proc = Bun.spawn(['git', ...args], {
|
|
cwd: process.cwd(),
|
|
stdout: 'pipe',
|
|
stderr: 'pipe',
|
|
});
|
|
|
|
const output = await new Response(proc.stdout).text();
|
|
await proc.exited;
|
|
|
|
return output;
|
|
} catch {
|
|
// If exclusion fails, return diff without exclusions
|
|
return staged
|
|
? await $`git diff --staged`.text()
|
|
: await $`git diff HEAD~1..HEAD`.text();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Truncate diff with per-file character/line limits and smart culling
|
|
*/
|
|
function truncateDiff(diff: string, maxLines: number, filesInfo: string): string {
|
|
const lines = diff.split('\n');
|
|
|
|
if (lines.length <= maxLines) {
|
|
return diff;
|
|
}
|
|
|
|
// Parse into individual file diffs
|
|
const fileDiffs: Array<{
|
|
header: string;
|
|
content: string;
|
|
lineCount: number;
|
|
path: string;
|
|
}> = [];
|
|
let currentFile: { header: string; lines: string[]; path: string } | null = null;
|
|
|
|
for (const line of lines) {
|
|
if (line.startsWith('diff --git')) {
|
|
if (currentFile) {
|
|
fileDiffs.push({
|
|
header: currentFile.header,
|
|
content: currentFile.lines.join('\n'),
|
|
lineCount: currentFile.lines.length,
|
|
path: currentFile.path,
|
|
});
|
|
}
|
|
// Extract file path from "diff --git a/path b/path"
|
|
const match = line.match(/diff --git a\/(.*?) b\//);
|
|
const path = match ? match[1] : 'unknown';
|
|
currentFile = { header: line, lines: [line], path };
|
|
} else if (currentFile) {
|
|
currentFile.lines.push(line);
|
|
}
|
|
}
|
|
|
|
if (currentFile) {
|
|
fileDiffs.push({
|
|
header: currentFile.header,
|
|
content: currentFile.lines.join('\n'),
|
|
lineCount: currentFile.lines.length,
|
|
path: currentFile.path,
|
|
});
|
|
}
|
|
|
|
// Process each file with per-file limits and smart culling
|
|
let totalLines = 0;
|
|
const includedDiffs: string[] = [];
|
|
const omittedFiles: Array<{file: string, reason: string}> = [];
|
|
|
|
for (const fileDiff of fileDiffs) {
|
|
// Check if binary file
|
|
if (isBinaryFile(fileDiff.path)) {
|
|
const summary = formatBinarySummary(fileDiff.path);
|
|
includedDiffs.push(summary);
|
|
totalLines += countLines(summary);
|
|
continue;
|
|
}
|
|
|
|
let content = fileDiff.content;
|
|
const fileLines = fileDiff.lineCount;
|
|
const fileChars = content.length;
|
|
let truncationNotice = '';
|
|
|
|
// Apply per-file safety limits
|
|
const CHAR_THRESHOLD = 10000;
|
|
const LINE_THRESHOLD = 1500;
|
|
|
|
if (fileChars > CHAR_THRESHOLD || fileLines > LINE_THRESHOLD) {
|
|
// File exceeded threshold - check if it should be culled
|
|
if (shouldCullAggressively(fileDiff.path, content)) {
|
|
// Check if it's a lockfile (special handling)
|
|
if (LOCKFILE_PATTERNS.some(p => p.test(fileDiff.path))) {
|
|
content = truncateToLines(content, 100);
|
|
truncationNotice = `\n... (lockfile truncated - showing first 100 of ${fileLines} lines)`;
|
|
} else {
|
|
// Other noise - aggressive truncation
|
|
content = truncateToLines(content, 30);
|
|
truncationNotice = `\n... (generated/noisy file truncated - showing first 30 of ${fileLines} lines, ${formatBytes(fileChars)} total)`;
|
|
}
|
|
} else {
|
|
// Legitimate large file - more generous truncation
|
|
content = truncateToLines(content, 300);
|
|
truncationNotice = `\n... (large file truncated - showing first 300 of ${fileLines} lines, ${formatBytes(fileChars)} total)`;
|
|
}
|
|
}
|
|
|
|
// Check if it fits in global budget
|
|
const contentLines = countLines(content);
|
|
if (totalLines + contentLines <= maxLines - 10) { // Reserve space for summary
|
|
includedDiffs.push(content + truncationNotice);
|
|
totalLines += contentLines;
|
|
} else {
|
|
omittedFiles.push({
|
|
file: fileDiff.path,
|
|
reason: 'global line budget exceeded'
|
|
});
|
|
}
|
|
}
|
|
|
|
let result = includedDiffs.join('\n\n');
|
|
|
|
if (omittedFiles.length > 0) {
|
|
result += '\n\n---\n';
|
|
result += `**Note:** ${omittedFiles.length} file(s) omitted due to output size limit:\n`;
|
|
result += omittedFiles.map(f => ` - ${f.file} (${f.reason})`).join('\n');
|
|
result += '\n\n_Full changes visible in git status/stat output above._';
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* Get preview of new files being added (with per-file and total limits)
|
|
*/
|
|
async function getNewFilesPreviews(maxFiles: number = 5, maxLinesPerFile: number = 50): Promise<string> {
|
|
try {
|
|
// Get list of new files (A = added)
|
|
const newFiles = await $`git diff --staged --name-only --diff-filter=A`.text();
|
|
const files = newFiles.trim().split('\n').filter(f => f);
|
|
|
|
if (files.length === 0) {
|
|
return '';
|
|
}
|
|
|
|
const previews: string[] = [];
|
|
const filesToShow = files.slice(0, maxFiles);
|
|
let totalChars = 0;
|
|
const MAX_TOTAL_CHARS = 30000;
|
|
const MAX_CHARS_PER_FILE = 10000;
|
|
|
|
for (const file of filesToShow) {
|
|
// Skip binary files
|
|
if (isBinaryFile(file)) {
|
|
previews.push(`=== ${file} ===\n(binary file)`);
|
|
continue;
|
|
}
|
|
|
|
try {
|
|
const content = await Bun.file(file).text();
|
|
|
|
// Apply per-file char limit FIRST (prevents single-line disasters)
|
|
if (content.length > MAX_CHARS_PER_FILE) {
|
|
if (shouldCullAggressively(file, content)) {
|
|
previews.push(`=== ${file} ===\n(generated/noisy file - preview omitted)\nSize: ${formatBytes(content.length)}`);
|
|
} else {
|
|
const truncated = content.slice(0, MAX_CHARS_PER_FILE);
|
|
previews.push(`=== ${file} ===\n${truncated}\n... (truncated from ${formatBytes(content.length)})`);
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// Apply line limit
|
|
const lines = content.split('\n');
|
|
const truncatedLines = lines.slice(0, maxLinesPerFile);
|
|
const truncated = truncatedLines.join('\n');
|
|
const notice = lines.length > maxLinesPerFile
|
|
? `\n... (${lines.length - maxLinesPerFile} more lines)`
|
|
: '';
|
|
|
|
const preview = `=== ${file} ===\n${truncated}${notice}`;
|
|
|
|
// Check total budget
|
|
if (totalChars + preview.length > MAX_TOTAL_CHARS) {
|
|
const remaining = files.length - previews.length;
|
|
previews.push(`\n... (${remaining} more file(s) omitted - preview size limit reached)`);
|
|
break;
|
|
}
|
|
|
|
previews.push(preview);
|
|
totalChars += preview.length;
|
|
} catch {
|
|
previews.push(`=== ${file} ===\n(unreadable)`);
|
|
}
|
|
}
|
|
|
|
if (files.length > maxFiles && previews[previews.length - 1]?.includes('omitted') === false) {
|
|
previews.push(`\n... (${files.length - maxFiles} more new file(s) not shown)`);
|
|
}
|
|
|
|
return previews.join('\n\n');
|
|
} catch {
|
|
return '';
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate context for staged changes
|
|
*/
|
|
async function stagedContext(maxLines: number): Promise<string> {
|
|
// Check if there are staged changes
|
|
try {
|
|
await $`git diff --staged --quiet`;
|
|
// If command succeeds (exit 0), there are no changes
|
|
throw new Error('No staged changes to commit');
|
|
} catch (err) {
|
|
// Exit code 1 means there are changes (expected)
|
|
// Any other error will be re-thrown
|
|
if (err && typeof err === 'object' && 'exitCode' in err && err.exitCode !== 1) {
|
|
throw err;
|
|
}
|
|
}
|
|
|
|
// Gather all git information
|
|
const [status, numstat, recentCommits] = await Promise.all([
|
|
$`git status`.text(),
|
|
$`git diff --staged --numstat`.text(),
|
|
$`git log --format='%h %s' -10`.text(),
|
|
]);
|
|
|
|
const files = parseNumstat(numstat);
|
|
const stats = getChangeStats(files);
|
|
const fileSummary = getFileSummary(files);
|
|
const fileTypes = getFileTypeDistribution(files);
|
|
|
|
// Calculate how many lines we can use for diff
|
|
const headerLines = 50; // Approximate lines for headers/summaries
|
|
const diffMaxLines = Math.max(100, maxLines - headerLines);
|
|
|
|
const diff = await getFilteredDiff(true);
|
|
const truncatedDiff = truncateDiff(diff, diffMaxLines, fileSummary);
|
|
|
|
const newFilesPreviews = await getNewFilesPreviews(5, 50);
|
|
|
|
// Build output
|
|
let output = '# Git Commit Context (Staged Changes)\n\n';
|
|
|
|
output += '## Status\n```\n' + status.trim() + '\n```\n\n';
|
|
|
|
output += '## Change Summary\n';
|
|
output += `**Files:** ${stats.files} | **Additions:** ${stats.additions} | **Deletions:** ${stats.deletions}\n\n`;
|
|
|
|
output += '## Files Changed\n' + fileSummary + '\n\n';
|
|
|
|
output += '## File Types Modified\n```\n' + fileTypes + '\n```\n\n';
|
|
|
|
output += '## Staged Changes (Diff)\n';
|
|
output += '```diff\n' + truncatedDiff.trim() + '\n```\n\n';
|
|
|
|
if (newFilesPreviews) {
|
|
output += '## New Files Preview\n```\n' + newFilesPreviews + '\n```\n\n';
|
|
}
|
|
|
|
output += '## Recent Commit Style\n```\n' + recentCommits.trim() + '\n```\n';
|
|
|
|
// Final safety: ensure total output doesn't exceed safe limit
|
|
const MAX_TOTAL_OUTPUT = 150000; // 150K chars, leaves 50K headroom
|
|
if (output.length > MAX_TOTAL_OUTPUT) {
|
|
const lastNewline = output.slice(0, MAX_TOTAL_OUTPUT).lastIndexOf('\n');
|
|
output = output.slice(0, lastNewline) +
|
|
'\n\n[OUTPUT TRUNCATED - Exceeds safe character limit for AI context]\n' +
|
|
`(Shown ${lastNewline.toLocaleString()} of ${output.length.toLocaleString()} chars)`;
|
|
}
|
|
|
|
return output;
|
|
}
|
|
|
|
/**
|
|
* Generate context for amending last commit
|
|
*/
|
|
async function amendContext(maxLines: number): Promise<string> {
|
|
// Check if we have any commits
|
|
try {
|
|
await $`git rev-parse HEAD`;
|
|
} catch {
|
|
throw new Error('No commits to amend');
|
|
}
|
|
|
|
// Gather git information
|
|
const [stagedStat, lastCommitStat, recentCommits] = await Promise.all([
|
|
$`git diff --staged --stat`.text(),
|
|
$`git show --stat --pretty=format: HEAD`.text().then(s => s.split('\n').filter(l => l.trim()).join('\n')),
|
|
$`git log --oneline -5`.text(),
|
|
]);
|
|
|
|
let output = '# Git Commit Context (Amend)\n\n';
|
|
|
|
output += '## Current Staged Changes\n';
|
|
if (stagedStat.trim()) {
|
|
output += '```\n' + stagedStat.trim() + '\n```\n\n';
|
|
} else {
|
|
output += '_No staged changes (message-only amendment)_\n\n';
|
|
}
|
|
|
|
output += '## Files in Most Recent Commit\n';
|
|
output += '```\n' + lastCommitStat.trim() + '\n```\n\n';
|
|
|
|
output += '## Recent Commit History (for style reference)\n';
|
|
output += '```\n' + recentCommits.trim() + '\n```\n';
|
|
|
|
return output;
|
|
}
|
|
|
|
/**
|
|
* Main entry point
|
|
*/
|
|
async function main() {
|
|
const args = Bun.argv.slice(2);
|
|
|
|
if (args.length === 0 || (!args[0].startsWith('--staged') && !args[0].startsWith('--amend'))) {
|
|
console.error('Usage: commit-helper --staged [maxLines] | --amend [maxLines]');
|
|
console.error(' Default maxLines: 1000');
|
|
process.exit(1);
|
|
}
|
|
|
|
const mode = args[0];
|
|
const maxLines = args[1] ? parseInt(args[1], 10) : 1000;
|
|
|
|
if (isNaN(maxLines) || maxLines < 100) {
|
|
console.error('Error: maxLines must be a number >= 100');
|
|
process.exit(1);
|
|
}
|
|
|
|
try {
|
|
let output: string;
|
|
|
|
if (mode === '--staged') {
|
|
output = await stagedContext(maxLines);
|
|
} else if (mode === '--amend') {
|
|
output = await amendContext(maxLines);
|
|
} else {
|
|
throw new Error(`Unknown mode: ${mode}`);
|
|
}
|
|
|
|
console.log(output);
|
|
} catch (error) {
|
|
if (error instanceof Error) {
|
|
console.error(`Error: ${error.message}`);
|
|
if (error.stack) {
|
|
console.error('\nStack trace:');
|
|
console.error(error.stack);
|
|
}
|
|
} else if (error && typeof error === 'object') {
|
|
console.error('Error details:', JSON.stringify(error, null, 2));
|
|
} else {
|
|
console.error('Error: Unknown error occurred:', error);
|
|
}
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
main();
|