Skip to content

Commit

Permalink
Merge pull request #222 from veelo/longest_match
Browse files Browse the repository at this point in the history
Implement longest match parser. Closes #218.
  • Loading branch information
PhilippeSigaud authored Feb 26, 2017
2 parents 25a8dd8 + 6aa1374 commit 0901d95
Show file tree
Hide file tree
Showing 5 changed files with 321 additions and 18 deletions.
5 changes: 4 additions & 1 deletion pegged/examples/peggedgrammar.d
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ Pegged:
# Syntactic rules:
Grammar <- Spacing GrammarName Definition+ :eoi
Definition <- LhsName Arrow Expression
Expression <- :OR? Sequence (:OR Sequence)*
Expression <- FirstExpression / LongestExpression
FirstExpression <- :OR? Sequence (:OR Sequence)+
LongestExpression <- :(OR / LONGEST_OR)? Sequence (:LONGEST_OR Sequence)*
Sequence <- Prefix+
Prefix <- (POS / NEG / FUSE / DISCARD / KEEP / DROP / PROPAGATE)* Suffix
Suffix <- Primary (OPTION / ZEROORMORE / ONEORMORE / Action)*
Expand Down Expand Up @@ -66,6 +68,7 @@ SPACEARROW <- '<' Spacing
ACTIONARROW <- '<' Action Spacing
OR <- '/' Spacing
LONGEST_OR <- '|' Spacing
POS <- '&' Spacing
NEG <- '!' Spacing
Expand Down
25 changes: 15 additions & 10 deletions pegged/grammar.d
Original file line number Diff line number Diff line change
Expand Up @@ -771,7 +771,11 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
result = p.matches[0] ~ " = " ~ generateCode(p.children[1]);
break;
case "Pegged.Expression":
if (p.children.length > 1) // OR expression
result ~= generateCode(p.children[0]);
break;
case "Pegged.FirstExpression",
"Pegged.LongestExpression":
if (p.children.length > 1) // [LONGEST_]OR expression
{
// Keyword list detection: "abstract"/"alias"/...
bool isLiteral(ParseTree p)
Expand Down Expand Up @@ -800,13 +804,13 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
}
else
{
result = "pegged.peg.or!(";
result = p.name == "Pegged.FirstExpression" ? "pegged.peg.or!(" : "pegged.peg.longest_match!(";
foreach(seq; p.children)
result ~= generateCode(seq) ~ ", ";
result = result[0..$-2] ~ ")";
}
}
else // One child -> just a sequence, no need for a or!( , )
else // One child -> just a sequence, no need for an or!( , )
{
result = generateCode(p.children[0]);
}
Expand Down Expand Up @@ -906,7 +910,7 @@ string grammar(Memoization withMemo = Memoization.yes)(string definition)
result ~= generateCode(seq) ~ ", ";
result = result[0..$-2] ~ ")";
}
else // One child -> just a sequence, no need for a or!( , )
else // One child -> just a sequence, no need for an or!( , )
{
result = generateCode(p.children[0]);
}
Expand Down Expand Up @@ -2889,12 +2893,13 @@ unittest // Test lambda syntax in semantic actions

assert(p.successful);

auto action = p.children[0].children[1]
.children[2]
.children[0]
.children[0]
.children[0]
.children[1];
auto action = p.children[0].children[1] // Pegged.Definition
.children[2] // Pegged.Expression
.children[0] // Pegged.FirstExpression
.children[0] // Pegged.Sequence
.children[0] // Pegged.Prefix
.children[0] // Pegged.Suffix
.children[1]; // Pegged.Action

assert(action.matches.length == results[idx].length);
foreach(i, s; action.matches)
Expand Down
10 changes: 8 additions & 2 deletions pegged/introspection.d
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,10 @@ pure GrammarInfo grammarInfo(ParseTree p)
{
switch (p.name)
{
case "Pegged.Expression": // choice expressions null-match whenever one of their components can null-match
case "Pegged.Expression":
return nullMatching(p.children[0]);
case "Pegged.FirstExpression",
"Pegged.LongestExpression": // choice expressions null-match whenever one of their components can null-match
foreach(seq; p.children)
if (nullMatching(seq) == NullMatch.yes)
return NullMatch.yes;
Expand Down Expand Up @@ -294,7 +297,10 @@ pure GrammarInfo grammarInfo(ParseTree p)
import std.algorithm.searching: countUntil;
switch (p.name)
{
case "Pegged.Expression": // Choices are left-recursive if any choice is left-recursive
case "Pegged.Expression":
return leftRecursion(p.children[0], cycle);
case "Pegged.FirstExpression",
"Pegged.LongestExpression": // Choices are left-recursive if any choice is left-recursive
// Because memoized left-recursion handling needs to know about all left-recursive cycles,
// we consider all choices, not just one.
auto any_lr = LeftRecursive.no;
Expand Down
97 changes: 92 additions & 5 deletions pegged/parser.d
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ Pegged:
# Syntactic rules:
Grammar <- Spacing GrammarName Definition+ :eoi
Definition <- LhsName Arrow Expression
Expression <- :OR? Sequence (:OR Sequence)*
Expression <- FirstExpression / LongestExpression
FirstExpression <- :OR? Sequence (:OR Sequence)+
LongestExpression <- :(OR / LONGEST_OR)? Sequence (:LONGEST_OR Sequence)*
Sequence <- Prefix+
Prefix <- (POS / NEG / FUSE / DISCARD / KEEP / DROP / PROPAGATE)* Suffix
Suffix <- Primary (OPTION / ZEROORMORE / ONEORMORE / Action)*
Expand Down Expand Up @@ -65,6 +67,7 @@ SPACEARROW <- '<' Spacing
ACTIONARROW <- '<' Action Spacing
OR <- '/' Spacing
LONGEST_OR <- '|' Spacing
POS <- '&' Spacing
NEG <- '!' Spacing
Expand Down Expand Up @@ -136,7 +139,9 @@ import std.functional: toDelegate;

struct GenericPegged(TParseTree)
{
import std.functional : toDelegate;
import pegged.dynamic.grammar;
static import pegged.peg;
struct Pegged
{
enum name = "Pegged";
Expand All @@ -148,6 +153,8 @@ struct GenericPegged(TParseTree)
rules["Grammar"] = toDelegate(&Grammar);
rules["Definition"] = toDelegate(&Definition);
rules["Expression"] = toDelegate(&Expression);
rules["FirstExpression"] = toDelegate(&FirstExpression);
rules["LongestExpression"] = toDelegate(&LongestExpression);
rules["Sequence"] = toDelegate(&Sequence);
rules["Prefix"] = toDelegate(&Prefix);
rules["Suffix"] = toDelegate(&Suffix);
Expand Down Expand Up @@ -176,6 +183,7 @@ struct GenericPegged(TParseTree)
rules["SPACEARROW"] = toDelegate(&SPACEARROW);
rules["ACTIONARROW"] = toDelegate(&ACTIONARROW);
rules["OR"] = toDelegate(&OR);
rules["LONGEST_OR"] = toDelegate(&LONGEST_OR);
rules["POS"] = toDelegate(&POS);
rules["NEG"] = toDelegate(&NEG);
rules["FUSE"] = toDelegate(&FUSE);
Expand Down Expand Up @@ -248,6 +256,7 @@ struct GenericPegged(TParseTree)

static bool isRule(string s)
{
import std.algorithm : startsWith;
return s.startsWith("Pegged.");
}
mixin decimateTree;
Expand Down Expand Up @@ -308,28 +317,80 @@ struct GenericPegged(TParseTree)
{
if(__ctfe)
{
return pegged.peg.defined!(pegged.peg.and!(pegged.peg.discard!(pegged.peg.option!(OR)), Sequence, pegged.peg.zeroOrMore!(pegged.peg.and!(pegged.peg.discard!(OR), Sequence))), "Pegged.Expression")(p);
return pegged.peg.defined!(pegged.peg.or!(FirstExpression, LongestExpression), "Pegged.Expression")(p);
}
else
{
return hooked!(pegged.peg.defined!(pegged.peg.and!(pegged.peg.discard!(pegged.peg.option!(OR)), Sequence, pegged.peg.zeroOrMore!(pegged.peg.and!(pegged.peg.discard!(OR), Sequence))), "Pegged.Expression"), "Expression")(p);
return hooked!(pegged.peg.defined!(pegged.peg.or!(FirstExpression, LongestExpression), "Pegged.Expression"), "Expression")(p);
}
}
static TParseTree Expression(string s)
{
if(__ctfe)
return pegged.peg.defined!(pegged.peg.and!(pegged.peg.discard!(pegged.peg.option!(OR)), Sequence, pegged.peg.zeroOrMore!(pegged.peg.and!(pegged.peg.discard!(OR), Sequence))), "Pegged.Expression")(TParseTree("", false,[], s));
return pegged.peg.defined!(pegged.peg.or!(FirstExpression, LongestExpression), "Pegged.Expression")(TParseTree("", false,[], s));
else
{
forgetMemo();
return hooked!(pegged.peg.defined!(pegged.peg.and!(pegged.peg.discard!(pegged.peg.option!(OR)), Sequence, pegged.peg.zeroOrMore!(pegged.peg.and!(pegged.peg.discard!(OR), Sequence))), "Pegged.Expression"), "Expression")(TParseTree("", false,[], s));
return hooked!(pegged.peg.defined!(pegged.peg.or!(FirstExpression, LongestExpression), "Pegged.Expression"), "Expression")(TParseTree("", false,[], s));
}
}
static string Expression(GetName g)
{
return "Pegged.Expression";
}

static TParseTree FirstExpression(TParseTree p)
{
if(__ctfe)
{
return pegged.peg.defined!(pegged.peg.and!(pegged.peg.discard!(pegged.peg.option!(OR)), Sequence, pegged.peg.oneOrMore!(pegged.peg.and!(pegged.peg.discard!(OR), Sequence))), "Pegged.FirstExpression")(p);
}
else
{
return hooked!(pegged.peg.defined!(pegged.peg.and!(pegged.peg.discard!(pegged.peg.option!(OR)), Sequence, pegged.peg.oneOrMore!(pegged.peg.and!(pegged.peg.discard!(OR), Sequence))), "Pegged.FirstExpression"), "FirstExpression")(p);
}
}
static TParseTree FirstExpression(string s)
{
if(__ctfe)
return pegged.peg.defined!(pegged.peg.and!(pegged.peg.discard!(pegged.peg.option!(OR)), Sequence, pegged.peg.oneOrMore!(pegged.peg.and!(pegged.peg.discard!(OR), Sequence))), "Pegged.FirstExpression")(TParseTree("", false,[], s));
else
{
forgetMemo();
return hooked!(pegged.peg.defined!(pegged.peg.and!(pegged.peg.discard!(pegged.peg.option!(OR)), Sequence, pegged.peg.oneOrMore!(pegged.peg.and!(pegged.peg.discard!(OR), Sequence))), "Pegged.FirstExpression"), "FirstExpression")(TParseTree("", false,[], s));
}
}
static string FirstExpression(GetName g)
{
return "Pegged.FirstExpression";
}

static TParseTree LongestExpression(TParseTree p)
{
if(__ctfe)
{
return pegged.peg.defined!(pegged.peg.and!(pegged.peg.discard!(pegged.peg.option!(pegged.peg.or!(OR, LONGEST_OR))), Sequence, pegged.peg.zeroOrMore!(pegged.peg.and!(pegged.peg.discard!(LONGEST_OR), Sequence))), "Pegged.LongestExpression")(p);
}
else
{
return hooked!(pegged.peg.defined!(pegged.peg.and!(pegged.peg.discard!(pegged.peg.option!(pegged.peg.or!(OR, LONGEST_OR))), Sequence, pegged.peg.zeroOrMore!(pegged.peg.and!(pegged.peg.discard!(LONGEST_OR), Sequence))), "Pegged.LongestExpression"), "LongestExpression")(p);
}
}
static TParseTree LongestExpression(string s)
{
if(__ctfe)
return pegged.peg.defined!(pegged.peg.and!(pegged.peg.discard!(pegged.peg.option!(pegged.peg.or!(OR, LONGEST_OR))), Sequence, pegged.peg.zeroOrMore!(pegged.peg.and!(pegged.peg.discard!(LONGEST_OR), Sequence))), "Pegged.LongestExpression")(TParseTree("", false,[], s));
else
{
forgetMemo();
return hooked!(pegged.peg.defined!(pegged.peg.and!(pegged.peg.discard!(pegged.peg.option!(pegged.peg.or!(OR, LONGEST_OR))), Sequence, pegged.peg.zeroOrMore!(pegged.peg.and!(pegged.peg.discard!(LONGEST_OR), Sequence))), "Pegged.LongestExpression"), "LongestExpression")(TParseTree("", false,[], s));
}
}
static string LongestExpression(GetName g)
{
return "Pegged.LongestExpression";
}

static TParseTree Sequence(TParseTree p)
{
if(__ctfe)
Expand Down Expand Up @@ -1058,6 +1119,32 @@ struct GenericPegged(TParseTree)
return "Pegged.OR";
}

static TParseTree LONGEST_OR(TParseTree p)
{
if(__ctfe)
{
return pegged.peg.defined!(pegged.peg.and!(pegged.peg.literal!("|"), Spacing), "Pegged.LONGEST_OR")(p);
}
else
{
return hooked!(pegged.peg.defined!(pegged.peg.and!(pegged.peg.literal!("|"), Spacing), "Pegged.LONGEST_OR"), "LONGEST_OR")(p);
}
}
static TParseTree LONGEST_OR(string s)
{
if(__ctfe)
return pegged.peg.defined!(pegged.peg.and!(pegged.peg.literal!("|"), Spacing), "Pegged.LONGEST_OR")(TParseTree("", false,[], s));
else
{
forgetMemo();
return hooked!(pegged.peg.defined!(pegged.peg.and!(pegged.peg.literal!("|"), Spacing), "Pegged.LONGEST_OR"), "LONGEST_OR")(TParseTree("", false,[], s));
}
}
static string LONGEST_OR(GetName g)
{
return "Pegged.LONGEST_OR";
}

static TParseTree POS(TParseTree p)
{
if(__ctfe)
Expand Down
Loading

0 comments on commit 0901d95

Please sign in to comment.