-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregexpp.js
executable file
·2514 lines (2325 loc) · 158 KB
/
regexpp.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env node
//Streaming Regex-based Macro Preprocessor (aka Fun With Regex Macros)
//- allows regex-based macros to be applied to a source file; allows DSL to be created using regex macros
//Copyright (c) 2018-2019 djulien
//rev history:
// 0.8 4/5/19 DJ rework vm / #directive eval
// 0.81 8/10/19 DJ fixes to work with MPASM (non-C src), use #if 0/#endif for language-neutral comments
// 0.82 8/16/19 DJ expand macros first before checking for preprocessor commands: allows caller-defined out-commenting to bypass #directives, don't expand #defines within strings, fix #define expr arg substitution, show eval/macro results in output
// 0.84 9/7/19 DJ TODO: reduce to 1 built-in #directive: #definex; define other directives using #definex (to create a DSL ukernel)
//to debug:
// node inspect scripts/rexpp.js <args>
// c, n, s, bt, .exit, repl
//alternate approach: https://stackoverflow.com/questions/3545875/custom-gcc-preprocessor
//custom control sttrs: https://www.chiark.greenend.org.uk/~sgtatham/mp/
//other choices:
//https://codegolf.stackexchange.com/questions/20721/create-a-c-preprocessor
//https://github.com/parksprojets/C-Preprocessor
//regex notes:
//https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions
//https://stackoverflow.com/questions/7376238/javascript-regex-look-behind-alternative
//(?:x) non-capturing match
//x(?=y) positive lookahead
//x(?!y) negative lookahead
//(?<=y)x positive lookbehind
//(?<!y)x negative lookbehind
//can't use variable len look-behinds :( work-arounds:
//https://stackoverflow.com/questions/9030305/regular-expression-lookbehind-doesnt-work-with-quantifiers-or
//https://stackoverflow.com/questions/17286667/regular-expression-using-negative-lookbehind-not-working-in-notepad/48727748#48727748
//https://javascript.info/regexp-backreferences
//string matching:
//https://stackoverflow.com/questions/6462578/regex-to-match-all-instances-not-inside-quotes/23667311#23667311
//capture up until a string:
//https://stackoverflow.com/questions/8584697/javascript-regular-expression-match-anything-up-until-something-if-there-it-ex
//nested regex:
//https://stackoverflow.com/questions/14952113/how-can-i-match-nested-brackets-using-regex/25489964
////const str = "1ab1cd1ef2221ab1cd1ef222";
////const re = /(1(?:\1??[^1]*?2))+/;
//const str = "(ab(cd(ef)))(ab(cd(ef)))";
//const re = /(\((?:\(??[^(]*?\)))+/;
//console.error(JSON.stringify(str.match(re)));
//XRegExp plug-in: http://xregexp.com/api/#matchRecursive
//no worky:
//const str = "(ab(cd(ef)))(ab(cd(ef)))";
////const re = /\(((?>[^()]+)|(?R))*\)/g; //not impl in Javascript
//const re = /(?=\()(?:(?=(?(1).*?(?=\1)).*?\((.*))(?=(?(2).*?(?=\2)).*?\)(.*)).)+?(?>.*?(?=\1))[^(]*?(?=\2$)/g; //http://www.drregex.com/2017/11/match-nested-brackets-with-regex-new.html
//console.error(JSON.stringify(str.match(re)));
//process.exit();
//works* https://stackoverflow.com/questions/6462578/regex-to-match-all-instances-not-inside-quotes/23667311#23667311
"use strict";
require("magic-globals"); //__file, __line, __stack, __func, etc
require("colors").enabled = true; //for easier to read console output; https://github.com/Marak/colors.js/issues/127
//process.on('uncaughtException', (err) => fatal(`[UNCAUGHT] ${err}`));
//const fs = require("fs");
//const vm = require("vm"); //https://nodejs.org/api/vm.html
//const glob = require("glob");
const pathlib = require("path"); //NOTE: called it something else to reserve "path" for other var names
//const JSON5 = require("json5"); //more reader-friendly JSON; https://github.com/json5/json5
const XRegExp = require("xregexp"); //https://github.com/slevithan/xregexp
//const safe = require('safe-regex'); //https://github.com/substack/safe-regex; https://regex101.com/
//const CaptureConsole = require("capture-console"); //https://github.com/joepie91/node-combined-stream2
const nodeCleanup = require('node-cleanup'); //https://github.com/jtlapp/node-cleanup, https://stackoverflow.com/questions/14031763/doing-a-cleanup-action-just-before-node-js-exits
//const debug = //TODO
const CircularJSON = require('circular-json');
//streams:
const EventEmitter = require('events');
//TODO? const miss = require("mississippi"); //stream utils
const thru2 = require("through2"); //https://www.npmjs.com/package/through2
//const byline = require('byline'); //, {LineStream} = byline;
//kludge: LineStream() generates extra linefeeds when piped, so use wraper function API instead:
//NO WORKY: function LineStream(opts)
//{
// if (!(this instanceof LineStream)) return new LineStream(opts);
//// .pipe(createStream(new shebang(opts), {keepEmptyLines: true}))
// const instrm = new PassThrough(); //inner end-point
// const outstrm = createStream(instrm, {keepEmptyLines: true, });
// return new Duplex(outstrm, instrm); //return end-point wrapper so caller can use pipe(); CAUTION: swap in + out
//}
//const RequireFromString = require('require-from-string');
//const DuplexStream = require("duplex-stream"); //https://github.com/samcday/node-duplex-stream
//const CombinedStream = require("combined-stream2"); //
const {Readable, /*Writable, Duplex,*/ PassThrough} = require("stream");
//const Duplex = DuplexStream; //TODO: API is different
//see also https://medium.freecodecamp.org/node-js-streams-everything-you-need-to-know-c9141306be93
//const {echo_stream} = require("./dsl.js");
extensions(); //hoist to allow in-line usage further down
////////////////////////////////////////////////////////////////////////////////
////
/// Regex preprocessor (stream):
//
//common regex sub-patterns:
//using regex as lexer :)
//captures must be unnamed to allow multiple occurrences; caller can enclose within capture group if desired
//(?:x) non-capturing match
//x(?=y) positive lookahead
//x(?!y) negative lookahead
//(?<=y)x positive lookbehind
//(?<!y)x negative lookbehind
const ESC = `\\\\`; //escape char; CAUTION: double esc here
const VisibleSpace = "\u00b7"; //String.fromCharCode(0xa4); //easier for debug of len/spacing
const ANYCHAR = `[\\s\\S]`; //`[^]`;
const WHITE = `(?: \\s+ )`; //ignore white space; caller must append "?" if optional; NOTE: matches \n
const WHITE_END = `(?: [^\\S\\n]+ )`; //white space at end of line *excluding newline*; see https://stackoverflow.com/questions/3469080/match-whitespace-but-not-newlines
//const SEP_re = /\s*,\s*/g; //new RegExp(`${WHITE},${WHITE}`, "g");
const NOT_ESC = `(?<! ${ESC} )`; //negative look-behind for escape char
const NUMBER = `(?: 0x [A-Fa-f\\d]+ | \\d+ )`; //hex or dec
const QUO_STR = `(?: ${NOT_ESC} " (?: ${ESC} " | [^"] )* " | ${NOT_ESC} ' (?: ${ESC} ' | [^'] )* ' | ${NOT_ESC} \` (?: ${ESC} \` | [^\`] )* \` )`; //don't use named capture; might occur multiple times in pattern
//const MACRO_NAME = "\w+"; //word chars: [a-z0-9_] //TODO: allow $ or @ in name?; allow regex pattern in place of name?
//const IDENT = `(?! \\d ) [\\w\\$\\@]+`; //allow "$" and "@" in identifiers; //`[A-Za-z$_][\\w\\$]*`; //`[A-Za-z\\$_][A-Za-z0-9\\$_]*`; //"\w+"; //\w[\w\d$_]*
//allow special chars to reduce need for parameterized regex macros:
const IDENT = `(?! ${NOT_ESC} \\d ) (?: ${ESC} ${ANYCHAR} | [\\w@$] )+`; //allow "$" and "@" in identifiers; can't start with digit; //`[A-Za-z$_][\\w\\$]*`; //`[A-Za-z\\$_][A-Za-z0-9\\$_]*`; //"\w+"; //\w[\w\d$_]*
const FILENAME = `(?: ${QUO_STR} | (?: ${ESC} ${ANYCHAR} | [^?:<>|&] )+ )`;
// const QUO_STR = `\\\\ ${ANYCHAR} | " (?: \\\\ " | [^"] )* " | ' (?: \\\\ ' | [^'] )* '`; //\\\\ ['"] //match escaped quotes or quoted strings; based on https://stackoverflow.com/questions/6462578/regex-to-match-all-instances-not-inside-quotes/23667311#23667311a
//const QUO_STR = `(?<quotype> ${NOT_ESC} ['"] ) (?: ${ANYCHAR} (?! ${NOT_ESC} \\k<quotype> ) )* .?`;
//named capture only allows one str :(
//not good for >1 match: const QUO_STR = `(?<quotype> ${NOT_ESC} ['"] ) (?: ${ESC} ${ANYCHAR} | (?! \\k<quotype> ) ${ANYCHAR} )*? \\k<quotype>`; //match non-escaped quote followed by anything escaped or non-quote char up until closing quote (lazy); NOTE: can't use negative look-behind with varible len pattern; use look-ahead instead
//const QUO_STR = `( ${NOT_ESC} " (?: ${ESC} " | [^"] )* " | ${NOT_ESC} ' (?: ${ESC} ' | [^'] )* ' )`; //don't use named capture; this
const NOT_QUOTED = `${QUO_STR} |`; //`(?<! ${QUO_STR})`;
//const NOT_BRACED = `${NOT_ESC} \\{ (?: ${NOT_QUOTED} ${ESC} ${ANYCHAR} | [^}] )* \\} |`; //`(<! ...)`;
//function TEMPLATED(str) { return `${NOT_ESC} \` (?: ${ESC} \` | \\$ \\{ (?: ${NOT_QUOTED} ( \\b${name}\\b ) | ${ESC} \\} | [^}] )* \\} | [^\`] )* \` |`; } //enclosed within "`...${}...`"; CAUTION: only handles first one?
const PARAM_LIST = `(?: (?<hasargs> \\( ) ${WHITE}? (?<params> ${IDENT} (?: ${WHITE}? , ${WHITE}? (?: ${IDENT} | ${escre("...")} ) )* )? ${WHITE}? \\) )?`; //optional param list; C preproc expects no space between macro name and param list, so follow that convention here as well
//const NESTED_EXPR = ` #regex for macro param; accepts nested expr syntax with helper function
//# ( #needs to be captured by group# so it can be replaced by position in param list; can't use named capture (names might occur multiple times)
//# (?:
// ${NOT_QUOTED} ${ESC} ${ANYCHAR} | #any quoted or escaped char?
//# (?<pushexpr> [({[] ) | | (?<popexpr> [)}\\]] ) #\( ... \) | \{ ... \} | \[ ... \] #nested expr
//#? ${NOT_ESC} \\( (?: ${ESC} [()] | [^()] )* \\) |
//# \\( (?: [^({[] TBD )* \\) |
//# \\{ (?: [^({[] TBD )* \\} |
//# \\[ (?: [^({[] TBD )* \\] |
//# ( [()] ) | #nested expr begin/end (must be captured); helper function used for nesting/matchup
// ${TAGCHAR("(", 1)} (?: ${NOT_QUOTED} [^${TAGCHAR(")", 1)}] )*? ${TAGCHAR(")", 1)}
//# [^,] #any param non-delimiter
//#? [^,(){}[\\]]+ #any char that does not cause expr nesting or param delimiter (comma)
//# )*?
// )`; //.*?
const NESTED_EXPR = `#regex for macro param; accepts nested expr syntax with helper function
(?:
${NOT_QUOTED} ${ESC} ${ANYCHAR} | #any quoted or escaped char?
${/*escu*/(TAGCHAR("(", 1)/*, 2*/)} (?: ${NOT_QUOTED} [^${/*escu*/(TAGCHAR(")", 1)/*, 2*/)}] )* ${/*escu*/(TAGCHAR(")", 1)/*, 2*/)} | #expr nested within "()"
${ANYCHAR}*? #any other chars (non-greedy)
)`;//.xre_tidy; //tidy() to avoid "#" and \n interaction in parent pattern
//const BODY = `( (?: \\s+ ) (?! ${escre(opts.EOL)} ) (?<body> (?: [^] (?! ${escre(opts.EOL)} ) ) [^]*? ) )?`; //skip white space; body can't start with eol delimiter
// const BODY = `(?<body> ( ${QUO_STR} | (?! \\s* ${NOT_ESC} ${escre(opts.EOL)} ) ${ANYCHAR} )*? )`; //skip white space; body can't start with eol delimiter
// const STMT = `(?: ${QUO_STR} | ${ESC} ${ANYCHAR} | [^;] )*?`;
// const BLOCK = `(?: ${NOT_ESC} \\{ \\s* ${STMT} (?: \\s* ; \\s* ${STMT} )*? \\s* ${NOT_ESC} \\} )`;
// const BODY = `(?<body> (?: ${QUO_STR} | ${NOT_ESC} \\{ (?: ${ESC} [{}] | [^{}] | ( [{}] ) ) (?! \\s* ${escre(opts.EOL)} ) ${ANYCHAR} )*? )`; //skip white space; body can't start with eol delimiter
//const BODY = `(?: ${WHITE} ${NOT_ESC} \\{ ( ${NOT_QUOTED} ${ESC} ${ANYCHAR} | [^}] )* \\} | ${WHITE} ( ${NOT_QUOTED} ${ANYCHAR} )*? )`; //skip white space; body can't start with eol delimiter
//const BODY = `(?: (?<isfunc> ${NOT_ESC} \\{ ) (?: ${NOT_QUOTED} ${ESC} ${ANYCHAR} | [^}] )* \\} | ${NOT_QUOTED} ${ESC} ${ANYCHAR} | ${ANYCHAR} )*?`; //caller should skip white space; body can't start with eol delimiter; non-greedy unbraced body allows caller to match eol
const EOL_ph = "%EOL%"; //placeholder for live EOL option value
const EOL_JUNK = `${WHITE}? (?<eol_junk> ${NOT_ESC} ${escre(EOL_ph)} ${ANYCHAR}* )? $`; //optional trailing comment
const EOL_KEEP = (keep) => `${WHITE}? (?<${keep || "keep"}> (?: ${NOT_QUOTED} ${ANYCHAR} )*? ) ${EOL_JUNK}`; //expr/body + optional trailing comment
//const EOL_BODY = EOL_KEEP.replace(/?<keep>/g, "?<body>");
//TODO: function EOL_xre(eol) { return new XRegExp(`${WHITE} (?<keep> (?: ${NOT_QUOTED} ${NOT_BRACED} ${ANYCHAR} )*? ) ${EOL_JUNK(eol)}`.anchorRE, "x"); } //optional trailing comment
const NEWLINE_SAVEJOIN = TAGCHAR("\n", 1); //kludge: preserve newlines during line joins when splitting regular newlines
//const ENDCOLOR = unesc(xre_tidy(ANSI_COLOR(false), 0)); //.replace(/\s/g, "");
const ENDCOLOR = "%".blue_lt.replace(/^.*%/, ""); //extract color-end control seq
//common regex ("g" flag for all matches):
const ANYCHAR_re = /[^]/g;
const NEWLINE_re = /\r?\n/g; //used for spliting lines
const UNICODE_re = /[\u0100-\uffff]/g; //gu; // /(?:[(){}])#\d+/g;
const COLOR_DETECT_xre = `(?<end> ${ANSI_COLOR(false)} ) | (?<start> ${ANSI_COLOR(true)} )`.XRE("gx"); //CAUTION: need to check end before start
const SRCLINE_xre = `@ (?<file> .*? ) : (?<line> ${NUMBER} (?: \\. (?<column> ${NUMBER} ) )? )`.anchorRE.XRE("gx"); //split(":"); //NOTE: need "g" for multi-srcline
//XRE.debug = true;
const LINEJOIN_xre = `${WHITE_END}? ${escre("\\")} ${WHITE_END}? \\r?\\n ${WHITE_END}?`.XRE("gx"); //CAUTION: non-std behavior: trim white space before, allow white space after "\"
const LINESPLIT_re = /[^\S\n]*\r?\n[^\S\n]*/g; //no worky: `${WHITE_END}? (?! ${escre("\\")} ${WHITE_END}? ) \\r?\\n ${WHITE_END}?`.XRE("gx"); //only split if no line continuation char found
//console.error(LINESPLIT_xre.xregexp.source.escnp.escnl, srcline());
//const regexproc =
//module.exports.regexproc =
function regexproc(opts) // = {}) //{filename, replacements, prefix, suffix, echo, debug, run, ast, shebang}; CAUTION: opts also used as vm context
{
//debug.once("" + __func);
debug.once(`expand.debug = ${expand.debug = -1} (sticky)`.red_lt);
opts = opts || {};
if (this instanceof regexproc) fatal(`don't use "new" with regexproc()`);
debug.once("node.js versions:".cyan_lt, process.versions); //${JSON.stringify(process.versions, null, 2)}`.cyan_lt);
debug("regexproc opts:", opts);
expand.context.where = `@${opts.infilename || "stdin"}:1`; //run-time srcline
return Object.assign(thru2(xform, flush), {lineproc}); //, where: `@${opts.infilename || "stdin"}:1`}); //Object.assign(thru2(/*{objectMode: false},*/ xform, flush), {pushline});
// return Object.assign(thru2(xform, flush), {pushline}); //Object.assign(thru2(/*{objectMode: false},*/ xform, flush), {pushline});
function xform(chunk, enc, cb)
{
// if (typeof chunk != "string)") chunk = chunk.toString();
++this.numchunks || (this.numchunks = 1);
// const PRESERVE = TAGCHAR("\n", 1); //kludge: preserve line join indicators when splitting regular newlines
// const NEWLINE_SAVEJOIN_xre = escre(NEWLINE_SAVEJOIN).XRE(); //need regex to get match ofs
// const EOL_ph_xre = escre(EOL_ph).XRE();
const frags = tostr(chunk)
.replaceAll(NEWLINE_SAVEJOIN, (...args) => fatal(`conflict: input stream already contains tagged newline at ${/*this*/expand.context.where}.${args.at(-2)}: ${highlight(chunk, args.at(-2)/*, -100, +100*/)}`))
.replace(LINEJOIN_xre, NEWLINE_SAVEJOIN)
.replaceAll(EOL_ph, (...args) => fatal(`conflict: input stream already contains tagged EOL at ${/*this*/expand.context.where}.${args.at(-2)}: ${highlight(chunk, args.at(-2)/*, -100, +100*/)}`))
.replaceAll(opts.EOL, EOL_ph) //NOTE: so generic EOL internally so system regexs/macros don't need to change
.split(LINESPLIT_re);
debug(`chunk[${this.numchunks}] len ${chunk.length} with ${commas(plural(numlines(chunk)))} line${plural.suffix} -> ${commas(plural(frags.length))} frag${plural.suffix}`.cyan_lt, trunc(chunk, 120)); //.escnp.escnl);
//frags.forEach((frag, inx, all) => debug(`chunk[${this.numchunks}], ${typeof frag} frag[${inx}/${all.length}]:`, frag.escnp.escnl));
frags.forEach((line, inx, all) => this.remainder = this.lineproc((this.remainder || "") + line.replaceAll(NEWLINE_SAVEJOIN, "\n"), inx == all.length - 1), this); //process input stream line-by-line for #directives to work correctly; CAUTION: need to preserve \n for correct line#s and in-line comments; NOTE: frag contains \n if lines were joined; \n matches white space so regex should still work correctly
cb();
}
function flush(cb)
{
this.lineproc(this.remainder);
const parts = /*this*/expand.context.where.match(SRCLINE_xre); // /^(.*):(\d+)(?:\.(\d+))$/); //split(":");
if (!parts) fatal(`can't get srcline parts from '${/*this*/expand.context.where}'`);
debug(`${commas(plural(Math.floor(+parts.line)))} line${plural.suffix} processed from '${parts.file}'`);
cb(); //debug(`eof ${this.numlines} lines`); cb(); }
}
function lineproc(linebuf, keep_frag)
{
// debug(`got line ${this.numlines} ${linebuf}`);
if (keep_frag /*|| isundef(linebuf)*/) return linebuf; //save line fragment to join with next chunk
//debug(`in[${this.numlines || 0}]:`, `${linebuf.length}:'${linebuf}'`);
// ++this.numlines || (this.numlines = 1);
// const parts = this.where.match(SRCLINE_xre);
// if (!parts) fatal(`bad where: '${this.where}'`);
debugger;
//debug(`in[${this.srcline}]: ${numlines(linebuf)} lines`.red_lt);
// if (linebuf.slice(-1) == "\\") return linebuf.slice(0, -1) + "\n"; //line continuation (mainly for macros); \n allows single-line comments
// const LINEJOIN_xre = `${WHITE}? ${escre("\\")} ${WHITE}? $`.XRE(); //CAUTION: non-std behavior: trim white space before, allow white space after "\"
// const parts = linebuf.match(LINEJOIN_xre);
// return linebuf.replace(LINEJOIN_xre, (match, keep) => {});
// const parts = linebuf.split(LINEJOIN_xre);
// if (parts.length > 1) return parts[0] + "\n"; //keep fragment for later; CAUTION: keep \n to allow in-line JS comments
// linebuf = linebuf.replace(EOL_ph, escre(opts.EOL)); //CAUTION: lazy eval uses latest EOL value
// this.pushline(`${this.numlines}. ${linebuf.length}:'${linebuf.escnl}'`);
expand.debug = true;
// expand.context.inbuf = "";
const expanded = expand(linebuf/*.replace(EOL_ph, opts.EOL)*/, expand.context.where);
if (!isundef(expanded)) this.pushline(tostr(expanded).replaceAll(EOL_ph, opts.EOL || "//"));
// [keep_frag, expand.context.inbuf] = [expand.context.inbuf, ""]; //allow macros to inject text
// return keep_frag; //expand.context.inbuf;
//const SRCLINE_xre = `@ (?<file> .*? ) : (?<line> ${NUMBER} (?: \\. (?<column> ${NUMBER} ) )? )`.anchorRE.XRE("gx");
// if (!expand.context.where.match(SRCLINE_xre)) fatal(`can't get srcline parts from '${/*this*/expand.context.where}'`);
// expand.context.where = /*this*/expand.context.where.replace(`(?<=:) ${NUMBER} ( \\. ${NUMBER} )? $`.XRE(), (linenum) => numlines(linebuf) + linenum); // /${opts.infilename || "stdin"}:${this.numlines}`; //use relative line#s so macros can change it; drop col# at start of each new line
const parts = expand.context.where.match(SRCLINE_xre);
if (!parts) fatal(`can't get srcline parts from '${/*this*/expand.context.where}'`);
expand.context.where = `@${parts.file}:${+parts.line + numlines(linebuf)}`; //use relative line#s so macros can change it; drop col# when processing next line
// this.numlines += numlines(linebuf) - 1; //maintain correct line#; CAUTION: linebuf can contain \n if lines were joined
const retval = expand.context.inbuf; //allow macros to inject more text into stream
expand.context.inbuf = "";
return retval;
}
}
module.exports.regexproc = regexproc;
module.exports.version = "0.9.19";
//main dictionary of defined macros:
//2 types of macros: named or regex
//both types of macros can use functions with named param lists or simple text substitutions
//text substitions are expanded before functions
//CAUTION: store other macro housekeeping somewhere else to avoid conflicts with user-defined macros
const macros = {};
module.exports.macros = macros;
//define a new macro:
//2 types: regular C pp style #define and new regex #definex
//either type can have params an use static text substitutions or a Javascript function
//macros.create =
function cre_macro(parts, /*EOL,*/ where) //, body_inx, argbuf)
{
//debug("cre macro:", JSON.stringify(parts));
const TYPE = parts.named? "named": "regex"; //macro type: named vs. regex
const ignore_dups = false; //~tostr(parts.flags).indexOf("D");
const body_ofs = parts.input.indexOf(parts.body);
if (!~body_ofs) fatal("can't find body");
//debug("input:", parts.input.escnp.escnl); //console.error(parts.input.escnp.escnl);
//debug("body start:", parts.body.indexOf("{")); //console.error(parts.body.indexOf("{"));
//debug("body:", parts.body.escnp.escnl.color_fixup); //console.error(parts.body.escnp.escnl.color_fixup);
//debug("highlight:", highlight(parts.body, parts.body.indexOf("{"))); //console.error(highlight(parts.body, parts.body.indexOf("{")));
// if (!parts.isfunc && ~parts.body.indexOf("{")) warn(`isfunc might be wrong in body at ${where.replace(/\.\d+$/, "")}.${body_ofs}+${parts.body.indexOf("{")}: '${parts.input.slice(0, body_ofs).escnp.escnl}${highlight(parts.body, parts.body.indexOf("{"))}${parts.input.slice(body_ofs + parts.body.length).escnp.escnl}'`); //parts.isfunc = true
// const desc = parts.name? "macro": parts.regex? "regex macro": "?UNKN TYPE?";
//xre_fixup.debug = true;
// xre_fixup(DEFINE_xre, parts);
//debug(JSON.stringify(parts.body));
//debug(JSON.stringify(parts[3]));
// [parts.body, parts.regex] = [pre_parse.undo(parts.body), pre_parse.undo(parts.regex)]; //undo nested pre-parse; need correct syntax to compile body
// [parts.body, parts.regex] = [TAG_CHAR(parts.body), TAGCHAR(parts.regex)]; //undo nested pre-parse; need correct syntax to compile body
// parts.body = pre_parse.undo(parts.body); //undo nested pre-parse; need correct syntax to compile body
// if (body_inx && (parts.body != parts[body_inx])) warn(`body ${(parts.body || "").length}:'${parts.body}' != parts[${body_inx}] ${(parts[body_inx] || "").length}:'${parts[body_inx]}'`); //XRegExp bug?
// if (parts.body) parts.body = TAGCHAR(parts.body); //undo nested pre-parse
// if (parts.regex) parts.regex = TAGCHAR(parts.regex); //undo nested pre-parse
// [parts.body, parts.regex] = [pre_parse.undo(parts.body), pre_parse.undo(parts.regex)]; //undo nested pre-parse
parts.regex = xre_tidy(TAGCHAR(parts.regex, -1)); //promote nested "(){}"
parts.body = TAGCHAR(parts.body); //untag and let JavaScript handle it
const FUNCBODY_xre = `${WHITE}? \\{ ${ANYCHAR}* \\} ${WHITE}?`.anchorRE.XRE();
parts.isfunc = parts.body.match(FUNCBODY_xre); //!parts.body.indexOf("{");
//debug.once("func xre:", xre_tidy(FUNCBODY_xre));
if (cre_macro.debug) debug(`cre ${TYPE} macro[${numkeys(macros)}]:`, parts[TYPE].cyan_lt, "delim:", parts.delim || "", "flags:", parts.flags || "", "hasargs?", !!parts.hasargs, "params:", parts.params || "", "isfunc?", !!parts.isfunc, "body:", parts.body || "", "eol junk:", parts.eol_junk || "", "where:", where); //"eol:", /*this.opts.*/EOL.escnl, srcline);
// const param_re = (parts.params || []).map((param) => new RegExp(`\\b${param}\\b`));
// const body = parts.params? (...params) => `${parts.body}`.replace(): parts.body; //body must be a text replace
//? if (parts.body) parts.body = parts.body.trim(); //drop trailing spaces
// if (parts.params) parts.body = eval(`(function ${parts.params} ${parts.body})`); //treat body as a function, include params with macro body/function; see also https://stackoverflow.com/questions/2573548/given-a-string-describing-a-javascript-function-convert-it-to-a-javascript-func
// else parts.body = eval(`(function() { return "${parts.body || ""}"; })`);
// parts.body = !parts.params? parts.body: //|| "": //simple string replacement
// eval(debug(`(function ${parts.params || "()"} { ${parts.body || ""} })`, "<:regex macro body function")); //treat body as a function, include params with macro body/function; see also https://stackoverflow.com/questions/2573548/given-a-string-describing-a-javascript-function-convert-it-to-a-javascript-func
// if (parts.params) parts.params = `match, ${parts.params}`;
// return `function ${parts[1]}${parts[2] || "()"} { ${parts[4]} }`; //convert to function def
// return `//define ${parts.name}`.pink_lt; //annotate source file (mainly for debug)
// return; //no output from this line
// opts.macros[parts.name] = {name: parts.name, args: parts.params.split(/\s*,\s*/)), body: parts.body | ""};
// opts.macros[parts.name] = {/*pattern: new Regexp("[^a-z0-9_]" + parts[1],*/ arglist: parts.params, body: parts.body, srcline: this.srcline};
// const EXPAND_xre = new XRegExp(`
// ${QUO_STR} | #match and ignore to prevent matches within strings; must be first option to override other patterns
//# (?: \\s* ${escre(opts.EOL)} .* $ ) | #match and ignore junk/comment at end of line (not captured); must match before macro name
// (?: ${EOL_JUNK} $ ) | #match and ignore junk/comment at end of line (not captured); must match before macro name
// (?<occurrence>
// \\b ${escre(parts.name)} \\b #don't match partial names
// ${parts.params? `\\( \\s* ${NESTED_EXPR} ( \\s* , \\s* ${NESTED_EXPR} )* \\s* \\)`: ""} #just use var args always
// )`.replace(/<(push|pop)expr>/g, (name, prefix) => `<${prefix}${++num_expr >> 1}>`), "gx"); //kludge: XRegExp wants unique capture names
// let sv_rex;
// let num_expr = 0;
//#no worky for var #args :( ${parts.params? `\\( ${WHITE} (?: ( ${NESTED_EXPR} ) (?: ${WHITE} , ${WHITE} ( ${NESTED_EXPR} ) )* )? ${WHITE} \\)`: ""} #just use var args always; NOTE: can't use named capture due to multiple occurrences
// const EXPAND_xre = new XRegExp(sv_rex = xre_tidy(parts.regex || `#regex to match named macro with params
//XRE.debug = true;
//const PARAM_LIST = `(?: (?<hasargs> \\( ) ${WHITE}? (?<params> ${IDENT} (?: ${WHITE}? , ${WHITE}? (?: ${IDENT} | ${escre("...")} ) )* )? ${WHITE}? \\) )?`; //optional param list; C preproc expects no space between macro name and param list, so follow that
//debug.once("nested expr:", tostr(NESTED_EXPR));
//debug.once("nested expr:", escu(xre_tidy(NESTED_EXPR)).escnp.escnl);
//console.error("nested expr:", escu(xre_tidy(NESTED_EXPR)).escnp.escnl);
//function show(pattern) { console.error("pattern:", escu(xre_tidy(pattern)).escnp.escnl, srcline(+1)); return pattern; }
//console.error("params", xre_tidy(parts.hasargs? `\\( ${WHITE}? (?: ${(parts.params || "").split(/\s*,\s*/).map((arg) => `( ${escu(xre_tidy(NESTED_EXPR))/*.escnp.escnl*/} )`).join(` (?: ${WHITE}? , ${WHITE}? ) `)} ) ${WHITE}? \\)`: ""));
//debug("matcher:", parts.hasargs? `\\( ws ${parts.params? `(?: ${parts.params.split(/\s*,\s*/).map((arg) => arg && `( nested )`).join(` (?: ws , ws ) `)} ws )`: ""} \\)`: "(no params)");
const EXPAND_xre = (parts.regex || `#regex to match named macro with params
#? (?: ${"QUO_STR"} ) | #ignore anything within a quoted string
# (?: ${"EOL_JUNK"} ) | #ignore comments
${parts[TYPE].match(/^\w/)? "\\b": ""} ${/*escre*/(parts[TYPE])} ${parts[TYPE].match(/\w$/)? "\\b": ""} #don't match partial names (if they are alphanumeric)
#NOTE: nested "()" are tagged with nesting level so only need to match top-level "()" here; avoids need for recursive expr helper function
${parts.hasargs? `\\( ${WHITE}? ${parts.params? `(?: ${parts.params.split(/\s*,\s*/).map((arg) => arg && `( ${escu(xre_tidy(NESTED_EXPR), false)/*.escnp.escnl*/} )`).join(` (?: ${WHITE}? , ${WHITE}? ) `)} ${WHITE}? )`: ""} \\)`: ""} #just use var args always; NOTE: can't use named capture due to multiple occurrences
#? | (?: ${"ANYCHAR"} ) #ignore anything else
`).XRE(tostr(parts.flags)/*.replace("D", "", "!global")*/ || "gx"); //.replace(/<(push|pop)expr>/g, (name, prefix) => "<${prefix}expr${++num_expr >> 1}>"), "gx"); //kludge: XRegExp wants unique capture names
//# ${parts.params? `\\( ${WHITE} (?: ( ${NESTED_EXPR} ) (?: ${WHITE} , ${WHITE} ( ${NESTED_EXPR} ) )* )? ${WHITE} \\)`: ""} #just use var args always; NOTE: can't use named capture due to multiple occurrences
//#fixed #args: ${parts.params? `\\( \\s* ${parts.params.split(/\s*,\s*/).map((param) => (param == "...")? `${NESTED_EXPR} ( \\s* , \\s* ${NESTED_EXPR} )*`: NESTED_EXPR).join(" \\s* , \\s* ")} \\s* \\)`: ""}
// !parts.params.length? `${parts.name} \\s* \\( \\s* \\)`:
// const body = parts.params? (...params) => `${parts.body}`.replace(): parts.body; //body must be a text replace
// const params_re = parts.params? parts.params/*.slice(1, -1).trim()*/.split(/\s*,\s*/).map((param) => new RegExp(`${QUO_STR} | \\b ${param} \\b`)): null;
// if (parts.params)
//debug("params:", JSON.stringify(parts.params));
// parts.body = !parts.params? parts.body: //|| "": //simple string replacement
// eval(`(function(match, ...args) //param substitution
// {
// return "${parts.body || ""}" ${(parts.params || "").split(/\s*,\s*/).map((arg, i) => arg? `.replace(/\\b${arg}\\b/g, args[${i}] || "")`: "").join("")};
// })`); //xlate body to function, include params with macro body/function; see also https://stackoverflow.com/questions/2573548/given-a-string-describing-a-javascript-function-convert-it-to-a-javascript-func
//params body macro
//- str str
//(...) str { return str.replace(param, arg); }
//- {} func
//(...) {} func
// if (parts.params && (parts.body[0] != "{")) parts.body = `{ return ${parts.body}; }`;
if (parts.isfunc && parts.hasargs && !(parts.params || "").match(/^match\b/)) parts.params = `match${parts.params? `, ${parts.params}`: ""}`; //placeholder for entire match if caller did not include one
//function TEMPLATED(str) { return `${NOT_ESC} \` (?: ${ESC} \` | \\$ \\{ (?: ${NOT_QUOTED} ( \\b${name}\\b ) | ${ESC} \\} | [^}] )* \\} | [^\`] )* \` |`; } //enclosed within "`...${}...`"; CAUTION: only handles first one?
const TEMPLATED = `TBD!`.XRE("gx");
debug.once("TBD".red_lt);
const params_re = parts.params? parts.params.split(/\s*,\s*/).map((name) => `${TEMPLATED(name)} | ${NOT_QUOTED} ( \\b${name}\\b)`.XRE("gx")): []; //new RegExp(`\\b${name}\\b`, "g")): []; //RE used for param substitutions; only used for text (non-function) macros
//debug(JSON.stringify(parts.params), JSON.stringify(params_re));
//CAUTION: if macro body uses dependent macros, those must be defined first or don't use braces (else need to defer eval until actual usage)
//no- lazy compile: don't compile macro until first use; this allows dependent macros to be defined after function macro
// parts.body = parts.isfunc? eval_try(debug(`(function (${parts.params || ""}) ${parts.body})`, "<:macro body function")): //use body function as-is
// parts.params? eval_try(debug(`(function (${parts.params}) { return "${parts.body || ""}"${parts.params.split(/\s*,\s*/).slice(1-1).map((arg, inx) => `.replace(/\\b${arg}\\b/g, ${arg})`).join("")}; })`, "<:parameterized macro string")): //param substitution
// parts.body || ""; //simple string replacement
// const args = get_paramlist(match) || []; //use helper function to extract possibly recursive parameters
// else parts.body = eval(`(function() { return "${parts.body || ""}"; }`);
if (cre_macro.debug) debug(`define ${TYPE} macro[${numkeys(macros)}]`.cyan_lt, xre_abbrev(parts[TYPE])/*.escnp.escnl*/, "id:", cre_macro.seqnum + 1, "matcher:", xre_tidy(EXPAND_xre), "func?:", !!parts.isfunc, `${typeof parts.body} body:`, parts.body/*.escnl*/ || "", `${plural(params_re.length)} param${plural.suffix}:`, parts.params || "", "where:", where); //`'${this.linebuf}' => define macro '${parts.name}', body '${(parts.body || "(none)").escnl}'`);
// if (opts.macros[parts.name]) return warn(`ignoring duplicate macro '${parts.name}' definition on ${this.srcline}, previous was on ${opts.macros[parts.name].srcline}`);
const dup = /*defined.*/macros[parts[TYPE]];
if (dup /*&& !ignore_dups*/) ((dup.body == parts.body)? warn: error)(`${TYPE} macro '${parts[TYPE]}' redefined at ${where} (previous was at ${dup.where})`); //if value didn't change just make it a warning (this occurs in some of the Microchip files)
// defined.ignore_dups = false; //auto-reset for next time
if (dup !== null) cre_macro.order.splice(cre_macro.order.length - cre_macro.num_funcs + (parts.isfunc? cre_macro.num_funcs++: 0), 0, parts[TYPE]); //defined.order.length); //put function macros after text macros; otherwise preserve order of definition
cre_macro.debug = false; //auto-reset
++cre_macro.seqnum; //detect when function macros need to be re-compiled
return macros[parts[TYPE]] = {xre: EXPAND_xre, body: /*(parts.body || "").trim(), params_re*/parts.body, isfunc: parts.isfunc, params: parts.params, params_re, where, seqnum: cre_macro.seqnum, ID: cre_macro.seqnum, type: TYPE, key: parts[TYPE], undefine: function() { undef(this.key); }}; //convert to regex for macro expander
// return opts.EOL + `'${this.linebuf}' => define regex macro '${JSON.stringify(opts.macros[parts.regex].xre).pink_lt}'`.yellow_lt;
function undef(key)
{
debug("undef macro:".cyan_lt, xre_abbrev(key));
const retval = macros[key];
macros[key] = null; //NOTE: null performs better than delete()
++cre_macro.seqnum; //|| (cre_macro.seqnum = 1); //detect when function macros need to be re-compiled \\
return retval;
}
}//;
//Object.defineProperties(cre_macro,
//{
//!enumerable:
// seqnum: { value: 0, writable: true, }, //allows checking for stale function macros
// order: { value: [], writable: true, }, //text macros are applied first, then function macros
// num_funcs: { value: 0, writable: true, }, //for updating order faster
//});
Object.assign(cre_macro, {seqnum: 0, order: [], num_funcs: 0}); //stale checking and expansion order for macro functions
module.exports.cre_macro = cre_macro;
//expand macros:
//repeats until nothing changes
//string macros are expanded first (pass 1), then function macros (pass 2), in order of definition
//#directives are just function macros
//NOTE: need to call expand() on empty lines as well; macros might append/replace linebuf
//macros.expand =
function expand(linebuf, where, want_funcs = true) //, where)
{
//Object.assign(expand, {context: /*module.exports*/ {}, count: 0, perf_time: 0, substs: 0, max_iter: 0, compiles_ok: 0, recompiles_ok: 0, compiles_err: 0, recompiles_err: 0}); //init perf stats
++expand.count; //|| Object.assign(expand, {count: 1, perf_time: 0, substs: 0, max_iter: 0, compiles_ok: 0, recompiles_ok: 0, compiles_err: 0, recompiles_err: 0}); //init perf stats
expand.perf_time -= process.hrtime.bigint();
//keep expanding until no more changes:
let num_iter, did_funcs = false;
for (num_iter = 0; /*num_iter >= 0*/; ++num_iter) //apply macros until no more matches (not changes)
{
if (num_iter > 20) fatal("inf loop?");
const svbuf = linebuf;
//if (expand.debug) pre_parse.debug = true; //pre_parse.debug ||= expand.debug;
linebuf = pre_parse(linebuf, where); //tag nested expr for easier regex matching; compensates for no recursive regex
if (expand.debug) debug(`expand[${expand.count}] iter[${num_iter}] apply ${commas(plural(cre_macro.order.length) - cre_macro.num_funcs)}${want_funcs? "+" + commas(cre_macro.num_funcs): ""} macro${plural.suffix} to linebuf:`.blue_lt, linebuf, `from ${where} ${srcline(+1)}`.blue_lt);
//debug(`prepped iter[${num_iter}]:`, linebuf);
//expand macros
const more_expand = cre_macro.order.every(expand_macro); //(key, inx, all) => expand_macro(macros[key], `${macros[key].isfunc? "function": "text"} macro[${inx}/${all.length}] '${key}'`)); //TODO? maybe use indexOf to kick-start regex match; OTOH, regex perf is good, don't really need to
// sv_linebuf = linebuf;
linebuf = TAGCHAR(linebuf); //pre_parse.undo(linebuf);
if (linebuf == svbuf) break; //stop when no further changes; just compare result rather than trying to count substitutions
if (!more_expand) break;
// if (linebuf.match(NOEXPAND_re)) break; //CAUTION: exclude #definex and #if; might interfere with regex/macro handling
if (expand.debug /*&& (svbuf != linebuf)*/) debug(`old linebuf[${num_iter}]:`.cyan_lt, svbuf/*.quoted*/, where) && debug(`new linebuf[${num_iter}]:`.cyan_lt, linebuf, "highlt:", highlight(linebuf/*.quoted*/, strcmp(linebuf, svbuf), 0, strcmp.difflen), where); //, "ok to expand again?:", !linebuf.match(NOEXPAND_re)); //separate lines for easier visual compare
fatal("quit");
}
expand.perf_time += process.hrtime.bigint();
if (num_iter > expand.max_iter) [expand.max_iter, expand.maxwhere] = [num_iter, where];
//debugger;
if (expand.debug != -1) expand.debug = false; //auto-reset for next time if nono-sticky
return /*`EXP[${expand.count - 1}]:` +*/ linebuf;
function expand_macro(key, inx, all) //macro, desc) //, inx, all) //[name, macro]
{
const CONTINUE = true, CANCEL = false;
const macro = macros[key];
if (macro === null) return CONTINUE; //deleted (#undefine'd) macro; //console.once(`'${name}' at ${where} not a macro (deleted?)`.red_lt), false;
if (!macro.xre) fatal(`${macros[key].isfunc? "function": "text"} macro[${inx}/${all.length}] '${trunc(key, 50)}' at ${where} invalid def?`); //not a macro (vm context contains other globals)
const old_linebuf = linebuf;
let found = 0;
// for (;;) { const was_linebuf = linebuf; //kludge: replace not processing all occurrences even with "g" flag, so use a loop:
//if (expand.debug) debug(`${macros[key].isfunc? "function": "text"} macro[${inx}/${all.length}]:`, trunc(key, 50), "matches?", !!linebuf.match(macro.xre));
let want_cancel = false;
if (expand.debug && !linebuf.match(macro.xre)) debug(`macro[${inx}/${all.length}]:`, escu(xre_tidy(macro.xre)), "!match:", escu(linebuf));
linebuf = linebuf.replace(macro.xre, (match, ...args) => //TODO: fix "g" flag
{
if (want_cancel) return match; //no more changes
if (macro.isfunc && !want_funcs) return match; //don't apply function macros to macro bodies (could interfere with JS syntax - JS is outside DSL)
//debugger;
const input = args.pop(); //drop src str
const ofs = args.pop(); //drop match ofs
// const matchstr = args.shift(); //drop matched str
if (expand.debug) debug(`match[${found}] with macro[${inx}] ${macro.named || xre_abbrev(macro.xre).quoted}:`.cyan_lt, `${typeof match}:`, match, "ofs:", ofs, `${plural(args.length)} repl arg${plural.suffix}:`, /*json_tidy(JSON.stringify(args.map((arg) => TAGCHAR(arg)), null, 2))*/args, "xre flags:", macro.xre.xregexp.flags, `apply ${/*typeof (macro.cached_body || macro.body || "")*/!macro.isfunc? "text": macro.cached_body? "func": "uncompiled"} macro:`, /*macro.regex || macro.*/trunc(key, 50), "stale?", `${macro.seqnum != cre_macro.seqnum} (${macro.seqnum} vs. ${cre_macro.seqnum})`, "match str:", match); //"input str:", /*trunc(input, 80)*/input); //with ${plural(macro.params? macro.params.length: -1)} param${plural.suffix}
//lazy compile: don't (re)compile macro until first time used; this allows dependent macros to be defined after function macro (and improves performance)
++found;
if (found > 20) fatal("possible loop"); //paranoid
if (/*!num_iter &&*/ macro.isfunc && (!macro.cached_body)) //|| (macro.seqnum !== defined.seqnum))) //need to recompile macro body; only do this during first pass
{
const refresh = macro.cached_body? "re": "";
macros[key] = null; //don't try to apply self to self; TODO: allow recursive macros?
expand.debug = true;
const expanded_body = /*(macro.seqnum != cre_macro.seqnum)?*/ expand(macro.body, macro.where, false); //: macro.body; //no need to re-expand unless other macros changed since then
//debug("compile macro");
macro.cached_body = eval_try(`function macro_func_${inx}_${macro.named || xre_abbrev(macro.xre, 1)}(${macro.params || ""}) ${expanded_body}`); //, `<:macro body function ${refresh}compile#${defined.seqnum}`)); //use body function as-is (after dependant macros expanded)
debug("compiled ok?", !!macro.cached_body);
if (macro.cached_body) macro.seqnum = cre_macro.seqnum; //CAUTION: mark compiled first to avoid infinite recursion
macros[key] = macro; //restore self
// macro.params? eval_try(debug(`function (${macro.params}) { return "${expand(macro.body, macro.srcline) || ""}"${parts.params.split(/\s*,\s*/).slice(1-1).map((arg, inx) => `.replace(/\\b${arg}\\b/g, ${arg})`).join("")}; }`, `<:parameterized macro string recompile#${defined.seqnum}`)): //param substitution
// macro.body || "", //simple string replacement
// if (/*!num_iter &&*/ macro.isfunc && (!macro.cached_body || (macro.seqnum !== defined.seqnum))) warn(`${refresh}compile '${name}' failed`);
++expand[`${refresh}compiles_${macro.cached_body? "ok": "err"}`];
if (!macro.cached_body) fatal(`compile macro ${macro.named || xre_abbrev(macro.xre, 1)} failed`); //complain because macro is needed now (lazy compile)
}
// if (macro.isfunc && !macro.cached_body) fatal(`failed to compile macro '${name}' at ${where}`);
//NOTE: undef/string macro bodies were converted to functions at macro definition time
const expanded = macro.cached_body? macro.cached_body.call(expand.context /*null*/, match.toString(), ...(args.map((arg) => /*expand_arg(arg, where)*/arg && TAGCHAR(arg, -1)))/*eval_try(arg, true) || arg.toString())*/): //function call to replace text; remain unchanged if retval undef (caller should return "" if wanted)
/*expand(*/macro.params_re.reduce((result, arg_re, inx, all) => result.replace(arg_re, /*expand_arg(args[inx], where)*/(match, argname) => argname? escrepl(TAGCHAR(args[inx], -1)): match), macro.body || ""); //, where); //CAUTION: expand macros *after* param substituion
//debug("here3");
if (expand.debug)
if (expanded != match.toString()) debug(/*"match ofs:", ofs, "str:", trunc(matchstr, 30),*/ `before expand ${typeof match}:`, match.toString()) && debug(`after expand ${typeof expanded}:`, expanded);
// if (expanded != match.toString()) debug(`after expand ${typeof expanded}:`, expanded, highlight(expanded, strcmp(expanded, match), -expanded.length, strcmp.difflen)/*.escnp.escnl*/, "match was:", match); //trunc(match/*.toString()*/, 80));
// else debug("no change after replacement");
//okay, caller might want: if ((expanded == match) /*&& (typeof macro.body != "function")*/) error(`match[${found}] didn't change after expansion: '${expanded}'`);
// ++expand.count || (expand.count = 1);
// if (expand.count > (expand.limit || 20))
//debug("want_canel:", want_cancel, "body?", !!macro.cached_body, "more dir?", !!tostr(expanded).match(MOREDIR_xre));
const retval = isundef(expanded, match.toString()); //preserve original line unless macro function returns one; //NO- use ret val as-is; macro func must add quotes if needed; (typeof expanded == "string")? quote(unquote(expanded)): expanded;
const MOREDIR_xre = `^ ${WHITE}? ${ESC}? \\# ${WHITE}? \\w`.XRE();
want_cancel = macro.cached_body && !tostr(retval).match(MOREDIR_xre); //retval = CANCEL; //stop expansion if function returned result
//debug(`${typeof retval} retval:`, tostr(retval), "want_cancel?", want_cancel, "xre:", xre_tidy(MOREDIR_xre), MOREDIR_xre.xregexp.flags, "match?", tostr(retval).match(MOREDIR_xre));
return retval;
});
if (!want_cancel && ~macro.xre.xregexp.flags.indexOf("g") && linebuf.match(macro.xre)) fatal("didn't replace all matches");
// if (linebuf == was_linebuf) break; }
// if (pre_parse.sv_junk) warn(`pre parse sv junk: '${pre_parse.sv_junk}'` )
// const svsv_junk = pre_parse.sv_junk; pre_parse.sv_junk = "";
// linebuf = pre_parse.call(this, TAGCHAR(linebuf)); //need to re-tag in case nesting changed
// pre_parse.sv_junk = svsv_junk;
//no; found could be from prev iter: if (found && (linebuf == old_linebuf)) error(`found match but didn't change: ${linebuf}`);
debug(`${plural(found)} substitution${plural.suffix} for this macro`);
expand.substs += found;
if (found > (expand.max_subst || {}).count || 0) expand.max_subst = {count: found, where};
return want_cancel? CANCEL: CONTINUE;
// if (found > 20) { if ((++expand.total || (expand.total = 1)) > 100) process.exit(); error("possible loop"); return 0; }
//if (expand.debug && found) debug(`macro expand: ${plural(found)} expansion${plural.suffix}, buf changed? ${linebuf != old_linebuf}`);
// return (linebuf != old_linebuf)? found: 0; //CAUTION: start from first macro against after each match to ensure deterministic behavior
// if (linebuf != old_linebuf) debug("old linebuf:", trunc(old_linebuf, 200)) && debug("new linebuf:", trunc(linebuf, 200));
}
//no; already expanded, just promote function expand_arg(arg, where) { return arg && expand(TAGCHAR(arg, -1), where); } //promote + expand arg
}//;
module.exports.expand = expand;
Object.assign(expand, {context: /*module.exports*/ {}, count: 0, perf_time: 0, substs: 0, max_iter: 0, compiles_ok: 0, recompiles_ok: 0, compiles_err: 0, recompiles_err: 0}); //init perf stats
/////////////////////////////////////////////////////////////////////////////////
////
/// Command-line interface (usable in shebangs):
//
function CLI(opts) //= {})
{
if (this instanceof CLI) fatal(`don't use "new" with CLI()`);
const files_seen = []; //source files to process (in order)
const startup_code = []; //Object.assign([], {join: language_neutral_join});
const downstream_args = [];
// process.argv_unused = {};
debugger;
sys_macros(startup_code);
const num_sysmacr = startup_code.length;
opts = JSON.parse(JSON.stringify(opts || {})); //deep copy; don't alter caller's data
opts.debug = true;
// for (let i = 0; i < process.argv.length; ++i)
// (((i == 2)? shebang_args("#!CLI " + process.argv[i]): null) || [process.argv[i]]).forEach((arg, inx, all) => //shebang might also have args (need to split and strip comments)
//console.error("here2", process.argv.length, process.argv.splice);
process.argv.splice_fluent(2, 1, ...(shebang_args("#!CLI " + process.argv[2]) || process.argv.slice(2, 3))) //split shebang into args and strip comments
.forEach((arg, inx, all) =>
{
debug(`arg[${inx}/${all.length}]:`, /*(i == 1)? pathlib.relative(CWD, arg):*/ arg/*.quoted*/.blue_lt);
if (inx < 2) { debug.more("=>".blue_lt, "SKIP".blue_lt); return; } //skip node + script file names
// var parts = arg.match(/^([+-])?([^=]+)(=(.*))?$/);
// debug.more(`${parts.name}${!isundef(parts.value)? ` = ${spquote(parts.value.toString(), "'")}`: ""}`.cyan_lt); //${opts[parts.name.toLowerCase()]}`.cyan_lt;
startup_code.push(`\/\/arg[${inx}/${all.length}]: '${arg}'`); //kludge: maintain arg count for error messages
set_opt(arg); //, (str) => startup_code.push(str));
});
//debug.enabled = true;
//process.exit();
// console.log(JSON.stringify(opts, null, " "));
// Object.keys(opts).forEach((key) =>
// {
// if (key.toLowerCase() in opts.changes) return;
// debug_out.push(`default option: ${`${key} = ${opts[key]}`.cyan_lt}`); //show default options also
// });
debug.enabled = opts.debug; //|| true; //flush or discard now that caller's intention is known; gets turned off here if caller didn't specify
debug(`${__file}:`.green_lt, `${commas(plural(files_seen.length))} source file${plural.suffix} to process:`.green_lt, files_seen.join(", "));
// opts.infilename = (files_seen.top || "stdin").replace(/\.(?!.*\.)/g, "-wrapper."); //`${__file}-cli-in`; //"startup_code";
if (!files_seen.length) startup_code.push("#include -"); //files.push("-"); //read from stdin if no other input files specified
startup_code.splice(num_sysmacr, startup_code.length - num_sysmacr, ...language_neutral(startup_code.slice(num_sysmacr))); //keep sys macros at start
return str2strm(startup_code.join("\n").replaceAll(EOL_ph, opts.EOL || "//"))
// .pipe(echo_stream(opts))
.pipe(regexproc(opts)) //: new PassThrough())
.emit_fluent_delayed("args", downstream_args)
.on(SBEVT, more_opts) //no worky
// .on("data", (data) => { debug(`data: ${data}`.blue_lt)}) //CAUTION: pauses flow
.on("finish", () => eof("finish"))
.on("close", () => eof("close"))
.on("done", () => eof("done"))
.on("end", () => eof("end"))
.on("error", err => { eof(`ERROR ${err}`.red_lt); process.exit(); });
// debug("preproc: finish asynchronously".green_lt);
// retstrm.emit("dsl-opts", opts);
// return retstrm;
function eof(why)
{
// CaptureConsole.stopCapture(process.stdout);
debug(`${__file} stream: ${why || "eof"}`.green_lt);
if (warn.count) console.error(`${commas(plural(warn.count || 0))} warning${plural.suffix}`.yellow_lt.color_fixup);
if (error.count) console.error(`${commas(plural(error.count || 0))} error${plural.suffix}`.red_lt.color_fixup);
warn.count = error.count = 0;
}
function more_opts(opts, inject) //no worky from here, but allow it to be called from elsewhere
{
debug("more opts:", opts); //JSON.stringify(opts));
const sv_debug = debug.enabled;
debug.enabled = []; //kludge: debug.more() within get_arg() requires buffering
startup_code.length = 0; //clear previously processed #define/#include
(opts || []).forEach((arg, inx, all) => debug(`more args[${inx}/${all.length}]:`, arg) && /*opts.*/get_arg(arg)); //parse new options
debug.enabled = sv_debug;
if (startup_code.length) inject(startup_code.join("\n")); //inject #includes and #defines into stream
}
function set_opt(str) //, cmd)
{
//might as well abuse (er, leverage) regex as mush as possible: :)
const OPTION_xre = `
# (?<lhs>
# (?<onoff> [-+] )? #turn option on/off (optional); allows caller to override either orientation of defaults
# (?<name> (?<first> \\S ) (?<other> [^]*? ) ) #[^=\\s]+ ) #name of option; non-greedy to allow spaces within name (needed for Windows path names)
# )
# (?:
# \\s*
# = #assign non-boolean value (optional)
# \\s*
# (?<value> .+ )
# )?
#yes/no options:
(?<onoff> [-+] ) #turn option on/off allows caller to override either orientation of defaults
(?<yesno_opt> echo | dead | linenums | debug )
|
#valued options:
(?: [-+] )? #optional
(?<val_opt> (?<debug_opt> debug ) | eol )
${WHITE}? = ${WHITE}? (?<value> .+ )
|
#macros:
(?: [-+] ) #required but ignored
(?: U ) (?<undef_name> ${IDENT} )
|
(?: [-+] ) #required but ignored
(?: D ) (?<def_name> ${IDENT} )
(?: = ) (?<def_value> .+ )
|
#includes:
(?: [-+] ) #required but ignored
(?: I ) ${WHITE}? (?<incl_value> .+ )
|
#files:
(?! [-+] ) #disambiguate file names from options
(?<file_name> ${FILENAME} )
`/*.xre_tidy*/.anchorRE.XRE();
//debug(debug_out.join_flush("\n"));
const parts = /*unquoescape(str)*/tostr(str).match(OPTION_xre) || {}; //|| {name: arg.unquoescaped};
if (parts.def_name || parts.undef_name)
{
const [dir, cmd] = parts.def_name? ["#define", "D"]: ["#undef", "U"];
startup_code.push(`${dir} ${(parts.undef_name || `${parts.def_name} ${parts.def_value}`).unesc}`);
return debug.more(`${cmd} ${(parts.undef_name || `${parts.def_name} = ${parts.def_value}`).unesc}`.cyan_lt, "MACRO".blue_lt);
}
if (parts.incl_value)
{
startup_code.push(`#incl_folder ${quote(parts.incl_value.unesc)}`);
return debug.more(parts.incl_value.unesc.cyan_lt, "INCL_FOLDER".blue_lt);
}
if (parts.file_name)
{
// if (!files_seen.length) opts.srcfilename = parts.file_name.unesc; //`${parts.value}-out`; //assume first file is main
files_seen.push(parts.file_name.unesc);
// startup_code./*pop_fluent().pushexe(*/top = `#include "${parts.name}"`;
startup_code.push(`#include ${quote(parts.file_name.unesc)}`);
return debug.more(parts.file_name.unesc.cyan_lt, "FILE".blue_lt);
}
downstream_args.push(str); //pass +/-/= options down stream
if (parts.onoff) return debug.more(`${parts.yesno_opt.unesc} = ${opts[parts.yesno_opt.unesc] = (parts.onoff == "+")}`.cyan_lt, "OPTS".blue_lt);
if (parts.value) return debug.more(`${(parts.debug_opt || "EOL").unesc} = ${opts[(parts.debug_opt || "EOL").unesc] = parts.value.unesc}`.cyan_lt, "OPTS".blue_lt);
error(`invalid option: '${str}'`);
return debug.more(str.cyan_lt, "OPTION?".blue_lt); //unknown/unhandled option
}
}
module.exports.CLI = CLI;
if (!module.parent) process.nextTick(() => CLI().pipe(process.stdout)); //auto-run CLI after other init
const META_xre = `
^ ${WHITE}? ${ESC}?
(?:
\\#definex ${WHITE}
(?<delim> \\S ) (?<regex> (?: ${ESC} ${ANYCHAR} | \\# [^\\n]+ \\n | (?! \\k<delim> ) ${ANYCHAR} )*? ) \\k<delim>
(?<flags> [gmsix]+ )?
|
\\#define ${WHITE} (?<named> ${IDENT} )
)
${PARAM_LIST}
${EOL_KEEP("body")}
`.unindent/*.anchorRE*/.XRE(); // #$-{WHITE}? (?<body> \\{ [^}]*? \\} ) $-{WHITE}? //CAUTION: simplified regex here; won't handle every regex; only needs to handle bootstrap macro
//generate system macros:
//uKernel: #definex is the only built-in directive; use it to define all the other directives (rexpp is extensible DSL)
//other directives are defined to closely match standard C preprocessor behavior
//NOTE: nested/recursive expr handling needs helper function because Javascript doesn't support recursive regex
//CAUTION: at least first line of function macros needs to end with "\" so body will be attached
//CAUTION: need to regen whenever EOL changes
//TODO: move to external file; how to define WHITE, EOL_JUNK, etc?
//global.debug = debug;
function sys_macros(src_code) //, EOL)
{
// debug("globals:".red_lt, typeof global, /*JSON.stringify(global)*/ Object.keys(global).join(","), "${srcline()}".red_lt);
// debug("this:".red_lt, typeof this, /*JSON.stringify(this)*/ Object.keys(this).join(","), "${srcline()}".red_lt);
// if (sys_macros.ran) fatal("system macros only need to be generated 1x");
// cre_macro.ignore_dups = true;
// const defs = [];
// const src_start = src_code.length;
// if (src_code.length) fatal("system macros aren't created soon enough");
const bootstrap = []; //some macros need to pre-exist before creating other macros
//helper macros:
src_code.push(`#define paranoid() if (!this.is_rexpp) fatal("lost context \${srcline()}")`);
bootstrap.push(src_code.top);
src_code.push(`#define commented(str) "${EOL_ph}" + \`\${match.toString().escnp/*.escnl*/} => \${str}\`.yellow_lt.color_fixup`);//CAUTION: "str" must be outside quotes or within braces to be expanded
bootstrap.push(src_code.top);
src_code.push(`#define copyof(thing) JSON.parse(JSON.stringify(thing))`);
src_code.push(`#define abspath(relpath) relpath.replace(/^~(?=\/)|^~$/, process.env.HOME).replace(/^\.(?=\/)|^\.$/, process.cwd())`); //or require("os").homedir(); //resolve home dir; NOTE: only works for current user
//messages:
src_code.push(`\\
#definex /^ ${WHITE}? \\#((?:pragma\\s+)?message|warning|error) ${EOL_KEEP("raw_msg")}/x(msgtype, raw_msg, eol_junk) \\
{ \\
paranoid(); \\
const MsgTypes = {message: [console.error, "cyan_lt"], warning: [warn, "yellow_lt"], error: [error /*fatal*/, "red_lt"]}; \\
const [func, color] = MsgTypes[msgtype]; \\
const msg = isundef(eval_try(raw_msg, "${srcline()}"), raw_msg); \\
func(\`[\${msgtype.toUpperCase()}] \${raw_msg}\${(msg != raw_msg)? \\\` ("\${msg}")\\\`: ""} \${eol_junk} @\${this/*.file_stack.top*/.srcline}\`[color].color_fixup); \\
return commented(new regex macro[\${numkeys(macros) - 1}]); \\
// return "${EOL_ph}" + \`\${match} => new regex macro[\${numkeys(macros) - 1}]\`.yellow_lt.color_fixup; \\
}`.unindent);
//conditional directives:
let priority = 0; //CAUTION: these macros must be executed before others
// const IFDEF_xre = `
// (?: \\b defined )
// (?<name>
// (?: \\( ) ${WHITE}? ${IDENT} ${WHITE}? (?: \\) ) #name is enclosed within "()"
// |
// ${WHITE} ${IDENT} #no "()"
// )`.unindent.XRE();
//// module.exports.IFDEF_xre = IFDEF_xre;
// global.IFDEF_xre = IFDEF_xre; //allow eval() to find it
//module.exports.IFDEF_xre = IFDEF_xre;
src_code.splice(priority++, 0, `\\
#definex /^ ${WHITE}? \\#(el)?if ${EOL_KEEP("raw_expr")}/x(elif, raw_expr) \\
{ \\
paranoid(); \\
const IFDEF_xre = \` \\
(?: \\b defined ) \\
(?<name> \\
(?: \\( ) ${WHITE}? ${IDENT} ${WHITE}? (?: \\) ) #name is enclosed within "()" \\
| \\
${WHITE} ${IDENT} #no "()" \\
)\`.unindent.XRE(); \\
const expr = raw_expr.replace(IFDEF_xre, (match, name) => !!macros[name]); //check defined *before* eval/expand \\
const active = eval_try(expr, "${srcline()}"); \\
if (isundef(active)) error(\`can't eval '\${expr}'\`); \\
if (elif) this.if_stack.top = (this.if_stack.top === false)? !!active: null; \\
else this.if_stack.push((this.if_stack.top !== null)? !!active: null); \\
// this.not_active = !active; \\
return commented(conditional push (\${!!active})); \\
// return "${EOL_ph}" + \`\${match} => conditional push (\${!!active})\`.yellow_lt.color_fixup; \\
}`.unindent);
// src_code.splice(0, 0, `\\
src_code.splice(priority++, 0, `\\
#definex /^ ${WHITE}? \\#else ${EOL_KEEP("ignored")}/x(ignored) \\
{ \\
paranoid(); \\
if (ignored) warn(\`ignoring junk: '\${ignored}'\`); \\
this.if_stack.top = (this.if_stack.top !== null)? !this.if_stack.top: null; \\
// this.not_active = !this.not_active; \\
return commented(conditional flip (\${this.if_stack.top})); \\
// return "${EOL_ph}" + \`\${match} => conditional flip (\${this.if_stack.top})\`.yellow_lt.color_fixup; \\
}`.unindent);
// src_code.splice(0, 0, `\\
src_code.splice(priority++, 0, `\\
#definex /^ ${WHITE}? \\#endif ${EOL_KEEP("ignored")}/x(ignored) \\
{ \\
paranoid(); \\
if (ignored) warn(\`ignoring junk: '\${ignored}'\`); \\
this.if_stack.pop(); \\
// this.not_active = !this.if_stack.top; \\
return commented(conditional pop (\${this.if_stack.top || "empty/on"})); \\
// return "${EOL_ph}" + \`\${match} => conditional pop (\${this.if_stack.top || "empty/on"})\`.yellow_lt.color_fixup; \\
}`.unindent);
src_code.splice(priority++, 0, `\\
#definex /^ ${EOL_KEEP("line")} $/x(line) \\
{ \\
paranoid(); \\
if (this.if_stack.length && !this.if_stack.top) \\
return "${EOL_ph}" + match/*.nocolors*/.gray_dk; //disabled line \\
}`.unindent);
//macro mgmt:
// const IS_IDENT_xre = `^${IDENT}`.XRE();
// global.IS_IDENT_xre = IS_IDENT_xre; //allow eval() to find it
src_code.push(`\\
#definex /^ ${WHITE}? \\#if(n)?def ${EOL_KEEP("raw_expr")}/x(ifndef, raw_expr, eol_junk) \\
{ \\
paranoid(); \\
const IS_IDENT_xre = \`^${IDENT}\`.XRE(); \\
const expr = raw_expr.replace(IS_IDENT_xre, \`\${ifndef? "!": ""}defined($&)\`); //rewrite as "#if defined()" \\
// return "${EOL_ph}" + \`\${match}\`.yellow_lt; \\
return \`#if \${expr} \${eol_junk} \${EOL_ph}rewrote as regular #if\`; //use normal #if eval \\
}`.unindent);
// src_code.push(`\\
// #definex /^ ${WHITE}? \\#define ${WHITE} (?<named> ${IDENT} ) ${PARAM_LIST} ${EOL_KEEP("body")}/x(named, params, body) \\
// { \\
// paranoid(); \\
//cre_macro.debug = true; \\
// cre_macro({named, params, body, input: match}, srcline(+1)); \\
// return commented(new named macro[\${numkeys(macros) - 1}]); \\
//// return "${EOL_ph}" + \`\${match} => new named macro[\${numkeys(macros) - 1}]\`.yellow_lt.color_fixup; \\
// }`.unindent);
// bootstrap.push(src_code.top);
//bootstrap:
//meta-macro to define all other directives:
// eval_try(`{ const {cre_macro, debug} = require("${__filename}"); console.error(typeof cre_macro, JSON.stringify(cre_macro)); }`);
// console.error(typeof global, /*JSON.stringify(global)*/ Object.keys(global).join(","), srcline().red_lt);
// const global = (function(){ return this || (1, eval)("this"); }()); \\
// src_code.push(`\\
// #definex /^ ${WHITE}? ${ESC}? \\#definex ${WHITE} (?<delim> \\S ) #any non-space char as delim (typically "/") \\
// (?<regex> (?: ${ESC} ${ANYCHAR} | \\# [^\\n]+ \\n | (?! \\k<delim> ) ${ANYCHAR} )*? ) #collect anything up to unescaped delim; include comments \\
// \\k<delim> #trailing delim same as leading delim \\
// (?<flags> [gmsix]+ )? #optional flags \\
// ${PARAM_LIST} #optional param list \\
// # $-{WHITE} (?<body> $-{BODY} ) \\
// ${EOL_KEEP("body")} #body/replacement string and optional trailing comment \\
// /x(delim, regex, flags, params, body) \\
// { \\
//// console.error(typeof global, /*JSON.stringify(global)*/ Object.keys(global).join(","), "${srcline()}".red_lt); \\
// Object.assign(this, {is_rexpp: true, if_stack: [], file_stack: [], folders: []}); \\
//// console.error(typeof this, /*JSON.stringify(this)*/ Object.keys(this).join(","), "${srcline()}".red_lt); \\
// cre_macro({regex, flags, params, body, input: match}, srcline(+1)); \\
// debug("bootstrap:".red_lt, typeof bootstrap_macro, bootstrap_macro.where, "${srcline()}"); \\
// bootstrap_macro.undefine(); //replace bootstrap with this one \\
// return \`${EOL_ph}\${match} => new regex macro[\${numkeys(macros) - 1}]\`.yellow_lt; \\
// }`.unindent);
src_code.push(`\\
#definex /^ ${WHITE}? ${ESC}? \\
(?: \\
\\#definex ${WHITE} \\
(?<delim> \\S ) #any non-space char as delim (typically "/") \\
(?<regex> (?: ${ESC} ${ANYCHAR} | \\# [^\\n]+ \\n | (?! \\k<delim> ) ${ANYCHAR} )*? ) #collect anything up to unescaped delim; include comments \\
\\k<delim> #trailing delim same as leading delim \\
(?<flags> [gmsix]+ )? #optional flags \\
| \\
\\#define ${WHITE} (?<named> ${IDENT} ) \\
) \\
${PARAM_LIST} #optional param list \\
# $-{WHITE} (?<body> $-{BODY} ) \\
${EOL_KEEP("body")} #body/replacement string and optional trailing comment \\
/x(delim, regex, flags, named, hasargs, params, body) \\
{ \\
debug("named:", named) && debug("params:", params) && debug("body:", body); \\
// console.error(typeof global, /*JSON.stringify(global)*/ Object.keys(global).join(","), "${srcline()}".red_lt); \\
if (!this.is_rexpp) Object.assign(this, {is_rexpp: true, if_stack: [], /*file_stack: [],*/ folders: []}); \\
paranoid(); \\
// console.error(typeof this, /*JSON.stringify(this)*/ Object.keys(this).join(","), "${srcline()}".red_lt); \\
// if (~regex.indexOf("#definex")) bootstrap_macro.undefine(); //replace temp bootstrap macro with new one \\
cre_macro.debug = true; \\
cre_macro({named, regex, flags, hasargs, params, body, input: match}, srcline(+1)); \\
//debug("bootstrap:".red_lt, typeof bootstrap_macro, bootstrap_macro.where, "${srcline()}"); \\
return commented(new \${named? "named": "regex"} macro[\${numkeys(macros) - 1}]); \\
// return "${EOL_ph}" + \`\${match.escnp/*.escnl*/} => new regex macro[\${numkeys(macros) - 1}]\`.yellow_lt.color_fixup; \\
}`.unindent);
bootstrap.push(src_code.top);
// const META_xre = `^ ${WHITE}? ${ESC}? \\#definex ${WHITE} (?<delim> / ) ${WHITE}? (?<regex> ${ANYCHAR}+? ) / (?<flags> x ) ${PARAM_LIST} ${EOL_KEEP("body")}`/*.anchorRE*/.XRE(); // #$-{WHITE}? (?<body> \\{ [^}]*? \\} ) $-{WHITE}? //CAUTION:
// const DEFINEX_xre = `
// ${WHITE}? \\#definex ${WHITE} (?<delim> \\S ) #any non-space char as delim (typically "/")
// (?<regex> (?: ${ESC} ${ANYCHAR} | (?! \\k<delim> ) ${ANYCHAR} )*? ) #collect anything up to unescaped delim
// \\k<delim> #trailing delim same as leading delim
// (?<flags> [gmsixD]+ )? #optional flags; custom: "D" to silently allow duplicates
// ${PARAM_LIST} #optional param list
//# ${WHITE} (?<body> ${BODY} )
// ${EOL_KEEP} #body/replacement string and optional trailing comment
// `.anchorRE.XRE();
// defs.push(`\\
// #definex / ${DEFINEX_xre} /xD()
// `)
src_code.push(`\\
#definex /^ ${WHITE}? \\#undef(?:ine)? ${WHITE} ${EOL_KEEP("name")}/x(name) #(?<name> ${IDENT} ) \\
{ \\
paranoid(); \\
const mactype = !macros[name]? (error(\`undefined macro '\${name}' at \${this/*.file_stack.top*/.srcline}\`), "unknown"): \\
macros[name].undefine().type; \\
// return "${EOL_ph}" + \`\${match} => undefine \`.yellow_lt; \\
return commented(undefine \${mactype} macro); \\
// return "${EOL_ph}" + \`\${match} => undefine \${mactype} macro\`.yellow_lt.color_fixup; \\
}`.unindent);
src_code.push(`\\
#definex /^ ${WHITE}? \\#dump ${EOL_KEEP("ignored")}/x(ignored) \\
{ \\
const dumpout = []; \\
if (ignored) warn(\`ignoring junk: '\${ignored}'\`); \\
paranoid(); \\
dumpout.push(\`\${commas(plural(numkeys(macros)) || "no")} macro\${plural.suffix}:\`.yellow_lt); \\
Object.entries(macros).map(([key, macro], inx, all) => dumpout.push(\`\${macro.isfunc? "func": "text"} macro[\${inx}/\${all.length}]: \${key.cyan_lt} => \${macro.xre? macro.body: "(deleted)"} from \${macro.where}\`.yellow_lt.color_fixup)); \\
// return dumpout.map((line) => \`${EOL_ph}\${line}\\n\`).join(""); \\
// return "${EOL_ph}" + \`\${match} => \${dumpout.join("\\n${EOL_ph}".reset)}\`.yellow_lt.color_fixup; \\
return commented() + dumpout.join("\\n${EOL_ph}"); //CAUTION: no color to left of EOL \\
// return "${EOL_ph}" + \`\${match} => \`.yellow_lt + dumpout.join("\\n${EOL_ph}"); //CAUTION: no color to left of EOL \\
}`.unindent);
//source file mgmt directives:
src_code.push(`\\
#definex /^ ${WHITE}? \\# ${WHITE}? (?<linenum> ${NUMBER} ) ${WHITE} ${EOL_KEEP("filename")}/x(linenum, filename) #(?<filename> ${QUO_STR} )? \\
{ \\
paranoid(); \\
const srcparts = this.srcline.match(SRCLINE_xre); \\
this/*.file_stack.top*/.srcline = \`@\${filename || srcparts.filename}:\${linenum}\`; \\
return commented(srcline \${this/*.file_stack.top*/.srcline}); \\
// return "${EOL_ph}" + \`\${match} => srcline \${this/*.file_stack.top*/.srcline}\`.yellow_lt.color_fixup; \\
}`.unindent);
src_code.push(`\\
#definex /^ ${WHITE}? \\#include ${EOL_KEEP("expr")}/x(expr) \\
{ \\
paranoid(); \\
const filename = isundef(eval_try(expr, "${srcline()}"), expr).unquoted; \\
// global.iostr.pause(); \\
// this.file_stack.push(filename); \\
// global.iostr.resume(); \\
const try_folders = copyof(this.folders); \\
filename = filename.replace(/^<(.*)>$/, try_folders.push(__dirname, "/include", "/usr/include") && "&1"); //add code folder and default system/global locations *after* caller-defined folders to allow caller to override system-defined files \\
//console.once(\`#incl using \${try_folders.join(", ")}\`.red_lt); \\
// folders.forEach((folder) => debug(folder) && debug(pathlib.resolve(folder, filename), fs.existsSync(pathlib.resolve(folder, filename)))); \\
const filepath = try_folders.reduce((found, next) => fs.existsSync(next = pathlib.resolve(next, filename))? next: found, ""); //|| filename; \\
if (!filepath) return error(\`file '\${filename}' not found in \${plural(try_folders.length).toString().replace(/^1$/, "current")} folder\${plural.suffix}:\${try_folders.join(", ")}\`);
// else
// try{
// fs.createReadStream(filepath), //pathlib.resolve(CWD, filename)), \\
// { \\
// linebuf: this.opts.dead? this.opts.EOL + \`\${this.linebuf} \${this.opts.EOL}depth \${this.opts.file_depth}: '\${filepath}'\`.green_lt: "", \\
// filename: pathlib.relative(CWD, filename), \\
// }); \\
// return "${EOL_ph}" + \`\${match} => file '\${filename}' depth \${this.file_stack.length}\`.yellow_lt.color_fixup; \\
const srcparts = this.srcline.match(SRCLINE_xre); \\
this.inbuf = \`#line 1 "\${filename}"\\n\` + fs.readFileSync(filepath) + \`\\n#line \${1 + srcparts.linenum} "\${srcparts.filename}"\`; \\
//TODO: stream + skip shebang; \\
return commented(file '\${filename}); \\
// return "${EOL_ph}" + \`\${match} => file '\${filename}'\`.yellow_lt.color_fixup; \\
}`.unindent);
src_code.push(`\\
#definex /^ ${WHITE}? \\#incl_folder ${EOL_KEEP("expr")}/x(expr) \\
{ \\
paranoid(); \\
const folder = isundef(eval_try(expr, "${srcline()}"), expr); \\
const abs_folder = abspath(folder); \\
const fold_ok = (glob.sync(abs_folder) || [])[0]; \\
//debug(\`incl folder '\${folder}', exists? \${fold_ok || false}\`); \\
if (!fold_ok) warn(\`folder not found: '\${folder}'\`); \\
else this.folders.push(fold_ok || abs_folder); \\
return commented(folder[\${this.folders.length - 1}] '\${this.folders.top}'); \\
// return "${EOL_ph}" + \`\${match} => folder[\${this.folders.length - 1}] '\${this.folders.top}'\`.yellow_lt.color_fixup; \\
}`.unindent);
//misc:
//not needed: (just define regular macro + execute it)
// src_code.push(`\\
// #definex /^ ${WHITE}? \\#exec } ${EOL_KEEP("body")} $/xD(body) \\
// { \\
// return eval_try(\`(${body || ""})\`); \\
// }`.unindent);
// src_code.push("#dump"); //check sys macros were defined correctly
//manually build bootstrap macros:
//trying to reuse #definex regex
//CAUTION: EOL placeholders are left as-is; no need to substitute correct value since everything uses EOL_ph during bootstrap
//XRE.debug = true;
//const frags = tostr(chunk).replace(LINEJOIN_xre, PRESERVE).split(LINESPLIT_re);
//debug("raw bootstrap macro:", src_code.top.escnp.escnl);
const prepped = bootstrap.map((src, inx, all) => src.replace(LINEJOIN_xre, NEWLINE_SAVEJOIN).split(LINESPLIT_re).map((frag) => frag.replaceAll(NEWLINE_SAVEJOIN, "\n"))); //"\n"); //.replace(EOL_ph, escre(EOL)); //CAUTION: need to preserve \n until comments stripped; really no need to strip comments
//bootstrap.forEach((mac, inx, all) => debug(`bootstrap macro[${inx}/${all.length}]:`, mac.escnp.escnl));
// const SKIP_DEFINEX_xre = `^ ( ${LINEJOIN_xre.raw_src || LINEJOIN_xre.source} )* ${ESC}? \\#definex ${WHITE}`.XRE(); //skip directive in lieu of input stream handling
// const bootstrap = xre_tidy(defs/*.shift()*/[0].replace(SKIP_DEFINEX_xre, "").replace(EOL_ph, escre(EOL, EOL))).replace(LINEJOIN_xre, " "); //, "<: defx src").match(META_xre); //CAUTION: can't strip newlines until regex comments are removed; esc EOL to avoid conflict with special chars
// const meta = pre_parse(defs.shift().replace(EOL_ph, escre(eol)).replace(LINEJOIN_xre, "")); //, "<: defx src").match(META_xre); //NOTE: don't need line joiners
//XRE.debug = true;
// const META_xre = `^ ${WHITE}? ${ESC}? \\#definex ${WHITE} (?<delim> / ) ${WHITE}? (?<regex> ${ANYCHAR}+? ) / (?<flags> x ) ${PARAM_LIST} ${EOL_KEEP("body")}`/*.anchorRE*/.XRE(); // #$-{WHITE}? (?<body> \\{ [^}]*? \\} ) $-{WHITE}? //CAUTION: simplified regex here; won't handle every regex; only needs to handle bootstrap macros
prepped.forEach((frags, inx, all) =>
{
if (frags.length > 1) fatal(`bootstrap macro[${inx}/${all.length}] > 1 frag (${frags.length}): ${frags.map((str) => `${str.length}:'${trunc(str, 40).escnp/*.escnl*/}'`)}`);
debug(`bootstrap macro[${inx}/${all.length}]:`, frags[0]);
const parts = frags[0].match(META_xre);
if (!parts) fatal(`can't parse bootstrap macro[${inx}/${all.length}]: '${frags[0].escnp/*.escnl*/}'`);
cre_macro.debug = true;
//no worky: module.exports.bootstrap_macro = cre_macro(parts, "sys_macros:1");
const macro = cre_macro(parts, "sys_macros:1");
if (~(parts.regex || "").indexOf("definex")) global.bootstrap_macro = macro;
});
//debug("globals:".red_lt, typeof global, /*JSON.stringify(global)*/ Object.keys(global).join(","));
// bootstrap_macro
// src_code.push(`\\
// #define dsl_init() \\
// { \\
// debug("here1".red_lt); \\
// const if_stack = []; \\
// const file_stack = []; \\
// }
// dsl_init();
// `.unindent);
// return src_code.splice_fluent(0, 0, ...defs); //inject system macros at start
}