-
Notifications
You must be signed in to change notification settings - Fork 41
/
Copy pathindex.js
executable file
·380 lines (327 loc) · 12 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
#!/usr/bin/env node
/**
* git2txt - A command-line tool to convert GitHub repositories into readable text files
*
* This tool clones a GitHub repository, processes its text files, and combines them
* into a single output file. It's useful for code review, analysis, and documentation
* purposes.
*
* Features:
* - Supports public GitHub repositories
* - Filters binary and large files
* - Customizable file size threshold
* - Debug mode for troubleshooting
* - Progress indicators for long operations
*
* @module git2txt
*/
import meow from 'meow';
import ora from 'ora';
import chalk from 'chalk';
import { glob } from 'glob';
import fs from 'fs/promises';
import path from 'path';
import { fileURLToPath } from 'url';
import { filesize as formatFileSize } from 'filesize';
import { isBinaryFile } from 'isbinaryfile';
import os from 'os';
import { exec } from 'child_process';
import { promisify } from 'util';
const execAsync = promisify(exec);
// CLI help text with usage instructions and examples
const helpText = `
${chalk.bold('Usage')}
$ git2txt <repository-url>
${chalk.bold('Options')}
--output, -o Specify output file path
--threshold, -t Set file size threshold in MB (default: 0.5)
--include-all Include all files regardless of size or type
--debug Enable debug mode with verbose logging
--help Show help
--version Show version
${chalk.bold('Examples')}
$ git2txt https://github.com/username/repository
$ git2txt https://github.com/username/repository --output=output.txt
`;
/**
* Custom exit function that handles both production and test environments
* @param {number} code - Exit code to return
* @throws {Error} In test environment instead of exiting
*/
const exit = (code) => {
if (process.env.NODE_ENV === 'test') {
throw new Error(`Exit called with code: ${code}`);
} else {
process.exit(code);
}
};
// Initialize CLI parser with meow
export const cli = meow(helpText, {
importMeta: import.meta,
flags: {
output: {
type: 'string',
shortFlag: 'o'
},
threshold: {
type: 'number',
shortFlag: 't',
default: 0.1
},
includeAll: {
type: 'boolean',
default: false
},
debug: {
type: 'boolean',
default: false
}
}
});
/**
* Normalizes various GitHub URL formats to a consistent format
* @param {string} url - The GitHub repository URL to normalize
* @returns {string} Normalized GitHub URL
* @throws {Error} If URL format is invalid
*/
function normalizeGitHubUrl(url) {
try {
// Remove trailing slashes
url = url.replace(/\/+$/, '');
// Handle git@ URLs
if (url.startsWith('[email protected]:')) {
return url;
}
// Handle full HTTPS URLs
if (url.startsWith('https://github.com/')) {
return url;
}
// Handle short format (user/repo)
if (url.match(/^[\w-]+\/[\w-]+$/)) {
return `https://github.com/${url}`;
}
throw new Error('Invalid GitHub repository URL format');
} catch (error) {
throw new Error(`Invalid GitHub URL: ${url}`);
}
}
/**
* Validates the command line input
* @param {string[]} input - Command line arguments
* @returns {Promise<string>} Validated repository URL
* @throws {Error} If input is missing or invalid
*/
export async function validateInput(input) {
if (!input || input.length === 0) {
throw new Error('Repository URL is required');
}
const url = input[0];
if (!url.includes('github.com') && !url.match(/^[\w-]+\/[\w-]+$/)) {
throw new Error('Only GitHub repositories are supported');
}
return url;
}
/**
* Downloads a GitHub repository to a temporary directory
* @param {string} url - GitHub repository URL
* @returns {Promise<Object>} Object containing temporary directory path and repository name
* @throws {Error} If download fails
*/
export async function downloadRepository(url) {
const spinner = process.env.NODE_ENV !== 'test' ? ora('Downloading repository...').start() : null;
const tempDir = path.join(os.tmpdir(), `git2txt-${Date.now()}`);
try {
// Normalize the GitHub URL
const normalizedUrl = normalizeGitHubUrl(url);
const repoName = url.split('/').pop().replace('.git', '');
if (cli.flags.debug) {
console.log(chalk.blue('Debug: Normalized URL:'), normalizedUrl);
console.log(chalk.blue('Debug: Temp directory:'), tempDir);
}
// Create temp directory
await fs.mkdir(tempDir, { recursive: true });
// Clone the repository
const cloneCommand = `git clone --depth 1 ${normalizedUrl} ${tempDir}`;
if (cli.flags.debug) {
console.log(chalk.blue('Debug: Executing command:'), cloneCommand);
}
await execAsync(cloneCommand, {
maxBuffer: 1024 * 1024 * 100 // 100MB buffer
});
// Verify the download
const files = await fs.readdir(tempDir);
if (files.length === 0) {
throw new Error('Repository appears to be empty');
}
if (spinner) spinner.succeed('Repository downloaded successfully');
return { tempDir, repoName };
} catch (error) {
if (spinner) spinner.fail('Failed to download repository');
if (cli.flags.debug) {
console.log(chalk.blue('Debug: Full error:'), error);
}
if (process.env.NODE_ENV !== 'test') {
console.error(chalk.red('Error: Could not access the repository. Please check:'));
console.error(chalk.yellow(' 1. The repository exists and is public'));
console.error(chalk.yellow(' 2. You have the correct repository URL'));
console.error(chalk.yellow(' 3. GitHub is accessible from your network'));
console.error(chalk.yellow(' 4. Git is installed and accessible from command line'));
}
await cleanup(tempDir);
throw error;
}
}
/**
* Processes files in the repository directory and combines them into a single text output
* @param {string} directory - Path to the repository directory
* @param {Object} options - Processing options
* @param {number} options.threshold - File size threshold in MB
* @param {boolean} options.includeAll - Whether to include all files regardless of size/type
* @returns {Promise<string>} Combined content of all processed files
* @throws {Error} If file processing fails
*/
export async function processFiles(directory, options) {
let spinner = process.env.NODE_ENV !== 'test' ? ora('Processing files...').start() : null;
const thresholdBytes = options.threshold * 1024 * 1024;
let output = '';
let processedFiles = 0;
let skippedFiles = 0;
/**
* Recursively processes files in a directory
* @param {string} dir - Directory to process
*/
async function processDirectory(dir) {
const entries = await fs.readdir(dir, { withFileTypes: true });
for (const entry of entries) {
const fullPath = path.join(dir, entry.name);
if (entry.isDirectory() && entry.name !== 'node_modules' && entry.name !== '.git') {
// Recursively process subdirectories
await processDirectory(fullPath);
continue;
}
if (!entry.isFile()) continue;
try {
const stats = await fs.stat(fullPath);
// Skip if file is too large and we're not including all files
if (!options.includeAll && stats.size > thresholdBytes) {
if (process.env.DEBUG) console.log(`Skipping large file: ${entry.name}`);
skippedFiles++;
continue;
}
// Skip binary files unless includeAll is true
if (!options.includeAll) {
if (await isBinaryFile(fullPath)) {
if (process.env.DEBUG) console.log(`Skipping binary file: ${entry.name}`);
skippedFiles++;
continue;
}
}
const content = await fs.readFile(fullPath, 'utf8');
const relativePath = path.relative(directory, fullPath);
output += `\n${'='.repeat(80)}\n`;
output += `File: ${relativePath}\n`;
output += `Size: ${formatFileSize(stats.size)}\n`;
output += `${'='.repeat(80)}\n\n`;
output += `${content}\n`;
processedFiles++;
if (process.env.DEBUG) {
console.log(`Processed file: ${relativePath}`);
}
} catch (error) {
if (process.env.DEBUG) {
console.error(`Error processing ${entry.name}:`, error);
}
skippedFiles++;
}
}
}
try {
// Process the entire directory tree
await processDirectory(directory);
if (spinner) {
spinner.succeed(`Processed ${processedFiles} files successfully (${skippedFiles} skipped)`);
}
if (processedFiles === 0 && process.env.DEBUG) {
console.warn('Warning: No files were processed');
}
return output;
} catch (error) {
if (spinner) {
spinner.fail('Failed to process files');
}
throw error;
}
}
/**
* Writes the processed content to an output file
* @param {string} content - Content to write
* @param {string} outputPath - Path to the output file
* @returns {Promise<void>}
* @throws {Error} If writing fails
*/
export async function writeOutput(content, outputPath) {
let spinner = process.env.NODE_ENV !== 'test' ? ora('Writing output file...').start() : null;
try {
await fs.writeFile(outputPath, content);
if (spinner) spinner.succeed(`Output saved to ${chalk.green(outputPath)}`);
} catch (error) {
if (spinner) spinner.fail('Failed to write output file');
if (process.env.NODE_ENV !== 'test') {
console.error(chalk.red('Write error:'), error);
}
throw error;
}
}
/**
* Cleans up temporary files and directories
* @param {string} directory - Directory to clean up
* @returns {Promise<void>}
*/
export async function cleanup(directory) {
try {
await fs.rm(directory, { recursive: true, force: true });
} catch (error) {
if (process.env.NODE_ENV !== 'test') {
console.error(chalk.yellow('Warning: Failed to clean up temporary files'));
}
}
}
/**
* Main application function that orchestrates the entire process
* @returns {Promise<void>}
*/
export async function main() {
let tempDir;
try {
const url = await validateInput(cli.input);
if (process.env.NODE_ENV !== 'test') {
const result = await downloadRepository(url);
tempDir = result.tempDir;
const outputPath = cli.flags.output || `${result.repoName}.txt`;
const content = await processFiles(tempDir, {
threshold: cli.flags.threshold,
includeAll: cli.flags.includeAll
});
if (!content) {
throw new Error('No content was generated from the repository');
}
await writeOutput(content, outputPath);
}
} catch (error) {
if (process.env.NODE_ENV === 'test') {
throw error;
} else {
console.error(chalk.red('\nAn unexpected error occurred:'));
console.error(error.message || error);
exit(1);
}
} finally {
if (tempDir) {
await cleanup(tempDir);
}
}
}
// Only run main if not in test environment
if (process.env.NODE_ENV !== 'test') {
main();
}