Skip to content

Commit

Permalink
Merge pull request #230 from microsoft/new_parser
Browse files Browse the repository at this point in the history
Now using pyparsing and supporting infix notation!
  • Loading branch information
slundberg authored Jun 15, 2023
2 parents d6b855a + 6d72ff8 commit 89289d8
Show file tree
Hide file tree
Showing 34 changed files with 772 additions and 368 deletions.
6 changes: 2 additions & 4 deletions .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,9 @@ name: Unit tests

on:
push:
branches:
- master
branches: [main]
pull_request:
branches:
- master
branches: [main]

jobs:
build:
Expand Down
291 changes: 216 additions & 75 deletions guidance/_grammar.py
Original file line number Diff line number Diff line change
@@ -1,77 +1,218 @@
import parsimonious

# define the Guidance language grammar
grammar = parsimonious.grammar.Grammar(
r"""
template = template_chunk*
template_chunk = comment / slim_comment / escaped_command / unrelated_escape / command / command_block / content
comment = comment_start comment_content* comment_end
comment_start = "{{!--"
comment_content = not_comment_end / ~r"[^-]*"
not_comment_end = "-" !"-}}"
comment_end = "--}}"
slim_comment = slim_comment_start slim_comment_content* slim_comment_end
slim_comment_start = "{{" "~"? "!"
slim_comment_content = not_slim_comment_end / ~r"[^}]*"
not_slim_comment_end = "}" !"}"
slim_comment_end = "}}"
command = command_start command_content command_end
command_block = command_block_open template (command_block_sep template)* command_block_close
command_block_open = command_start "#" block_command_call command_end
command_block_sep = command_start ("or" / "else") command_end
command_block_close = command_start "/" command_name command_end
command_start = "{{" !"!" "~"?
not_command_start = "{" !"{"
not_command_escape = "\\" !"{{"
command_end = "~"? "}}"
command_contents = ~'[^{]*'
block_command_call = command_name command_args?
command_content = command_call / variable_ref
command_call = command_name command_args
command_args = command_arg_and_ws+
command_arg_and_ws = ws command_arg
command_arg = named_command_arg / positional_command_arg
positional_command_arg = command_arg_group / literal / variable_ref
named_command_arg = variable_name "=" (literal / variable_ref)
command_arg_group = "(" command_content ")"
ws = ~r'\s+'
command_contentasdf = ~"[a-z 0-9]*"i
command_name = ~r"[a-z][a-z_0-9\.]*"i / "<" / ">" / "==" / "!=" / ">=" / "<="
variable_ref = not_exact_or not_exact_else ~r"[@a-z][a-z_0-9\.\[\]\"'-]*"i
not_exact_or = ~r"or[@a-z][a-z_0-9\.\[\]\"'-]"i / !"or"
not_exact_else = ~r"else[@a-z][a-z_0-9\.\[\]\"'-]"i / !"else"
variable_name = ~r"[@a-z][a-z_0-9]*"i
contentw = ~r'.*'
content = (not_command_start / not_command_escape / ~r"[^{\\]")+
unrelated_escape = "\\" !command_start
escaped_command = "\\" command_start ~r"[^}]*" command_end
literal = string_literal / number_literal / boolean_literal / array_literal / object_literal
string_literal = ~r'"[^\"]*"' / ~r"'[^\']*'"
number_literal = ~r"[0-9\.]+"
boolean_literal = "True" / "False"
array_literal = empty_array / single_item_array / multi_item_array
empty_array = array_start ws? array_end
single_item_array = array_start ws? array_item ws? array_end
array_sep = ws? "," ws?
multi_item_array = array_start ws? array_item (array_sep array_item)* ws? array_end
array_start = "["
array_end = "]"
from typing import Any
import pyparsing as pp

pp.ParserElement.enable_packrat()
# pp.enable_diag(pp.Diagnostics.enable_debug_on_named_expressions)
# pp.autoname_elements()

program = pp.Forward()
program_chunk = pp.Forward()

## whitespace ##

ws = pp.White()
opt_ws = pp.Optional(ws)


## comments ##

# long-form comments {{!-- my comment --}}
command_end = pp.Suppress(opt_ws + "}}") | pp.Suppress(opt_ws + "~}}" + opt_ws)
long_comment_start = pp.Suppress(pp.Literal("{{!--"))
long_comment_end = pp.Suppress(pp.Literal("--") + command_end)
not_long_comment_end = "-" + ~pp.FollowedBy("-}}") + ~pp.FollowedBy("-~}}")
long_comment_content = not_long_comment_end | pp.OneOrMore(pp.CharsNotIn("-"))
long_comment = pp.Group(pp.Combine(long_comment_start + pp.ZeroOrMore(long_comment_content) + long_comment_end))("long_comment").set_name("long_comment")

# short-form comments {{! my comment }}
comment_start = pp.Suppress("{{" + pp.Optional("~") + "!")
not_comment_end = "}" + ~pp.FollowedBy("}") | "~" + ~pp.FollowedBy("}}")
comment_content = not_comment_end | pp.OneOrMore(pp.CharsNotIn("~}"))
comment = pp.Group(pp.Combine(comment_start + pp.ZeroOrMore(comment_content) + command_end))("comment")


## literals ##

literal = pp.Forward().set_name("literal")

# basic literals
string_literal = pp.Group(pp.Suppress('"') + pp.ZeroOrMore(pp.CharsNotIn('"')) + pp.Suppress('"') | pp.Suppress("'") + pp.ZeroOrMore(pp.CharsNotIn("'")) + pp.Suppress("'"))("string_literal")
number_literal = pp.Group(pp.Word(pp.srange("[0-9.]")))("number_literal")
boolean_literal = pp.Group("True" | pp.Literal("False"))("boolean_literal")

# object literal
object_literal = pp.Forward().set_name("object_literal")
object_start = pp.Suppress("{")
object_end = pp.Suppress("}")
empty_object = object_start + object_end
object_item = string_literal + pp.Suppress(":") + literal
single_item_object = object_start + object_item + object_end
object_sep = pp.Suppress(",")
multi_item_object = object_start + object_item + pp.ZeroOrMore(object_sep + object_item) + object_end
object_literal <<= pp.Group(empty_object | single_item_object | multi_item_object)("object_literal")

# array literal
array_literal = pp.Forward().set_name("array_literal")
array_start = pp.Suppress("[")
array_end = pp.Suppress("]")
array_item = literal
empty_array = array_start + array_end
single_item_array = array_start + array_item + array_end
array_sep = pp.Suppress(",")
multi_item_array = array_start + array_item + pp.ZeroOrMore(array_sep + array_item) + array_end
array_literal <<= pp.Group(empty_array | single_item_array | multi_item_array)("array_literal")

literal <<= string_literal | number_literal | boolean_literal | array_literal | object_literal


## infix operators ##

code_chunk_no_infix = pp.Forward().set_name("code_chunk_no_infix")

class OpNode:
def __repr__(self):
return "{}({})".format(self.__class__.__name__, self.operator)
def __getitem__(self, item):
return getattr(self, item)
def get_name(self):
return self.name

class UnOp(OpNode):
def __init__(self, tokens):
self.operator = tokens[0][0]
self.value = tokens[0][1]
self.name = "unary_operator"

class BinOp(OpNode):
def __init__(self, tokens):
self.operator = tokens[0][1]
self.lhs = tokens[0][0]
self.rhs = tokens[0][2]
self.name = "binary_operator"

infix_operator_block = pp.infix_notation(code_chunk_no_infix, [
('-', 1, pp.OpAssoc.RIGHT),
(pp.one_of('* /'), 2, pp.OpAssoc.LEFT, BinOp),
(pp.one_of('+ -'), 2, pp.OpAssoc.LEFT, BinOp),
(pp.one_of('< > <= >= == != is in'), 2, pp.OpAssoc.LEFT, BinOp),
(pp.one_of('and'), 2, pp.OpAssoc.LEFT, BinOp),
(pp.one_of('or'), 2, pp.OpAssoc.LEFT, BinOp),
])


## commands ##

code_chunk = pp.Forward().set_name("code_chunk")
not_keyword = ~pp.FollowedBy(pp.Keyword("or") | pp.Keyword("else") | pp.Keyword("elif"))
command_name = pp.Combine(not_keyword + pp.Word(pp.srange("[A-Za-z_]"), pp.srange("[A-Za-z_0-9]")))
variable_name = pp.Word(pp.srange("[@A-Za-z_]"), pp.srange("[A-Za-z_0-9]"))
variable_ref = not_keyword + pp.Group(pp.Word(pp.srange("[@A-Za-z_]"), pp.srange("[A-Za-z_0-9\.\[\]\"'-]")))("variable_ref").set_name("variable_ref")
keyword = pp.Group(pp.Keyword("break") | pp.Keyword("continue"))("keyword")

class SavedTextNode:
"""A node that saves the text it matches."""
def __init__(self, s, loc, tokens):
start_pos = tokens[0]
if len(tokens) == 3:
end_pos = tokens[2]
else:
end_pos = loc
self.text = s[start_pos:end_pos]
assert len(tokens[1]) == 1
self.tokens = tokens[1][0]
def __repr__(self):
return "SavedTextNode({})".format(self.text) + self.tokens.__repr__()
def __getitem__(self, item):
return self.tokens[item]
def __len__(self):
return len(self.tokens)
def get_name(self):
return self.tokens.get_name()
def __contains__(self, item):
return item in self.tokens
def __getattr__(self, name):
return getattr(self.tokens, name)
def __call__(self, *args, **kwds):
return self.tokens(*args, **kwds)
def SavedText(node):
return pp.Located(node).add_parse_action(SavedTextNode)

# command arguments
command_arg = pp.Forward()
named_command_arg = variable_name + "=" + code_chunk
command_arg <<= pp.Group(named_command_arg)("named_command_arg").set_name("named_command_arg") | pp.Group(code_chunk)("positional_command_arg").set_name("positional_command_arg")

# whitespace command format {{my_command arg1 arg2=val}}
ws_command_call = pp.Forward().set_name("ws_command_call")
command_arg_and_ws = pp.Suppress(ws) + command_arg
ws_command_args = pp.OneOrMore(command_arg_and_ws)
# note that we have to list out all the operators here because we match before the infix operator checks
ws_command_call <<= command_name("name") + ~pp.FollowedBy(pp.one_of("+ - * / or not is and <= == >= != < >")) + ws_command_args

# paren command format {{my_command(arg1, arg2=val)}}
paren_command_call = pp.Forward().set_name("paren_command_call")
command_arg_and_comma_ws = pp.Suppress(",") + command_arg
paren_command_args = pp.Optional(command_arg) + pp.ZeroOrMore(command_arg_and_comma_ws)
paren_command_call <<= (command_name("name") + pp.Suppress("(")).leave_whitespace() - paren_command_args + pp.Suppress(")")

# code chunks
enclosed_code_chunk = pp.Forward().set_name("enclosed_code_chunk")
paren_group = (pp.Suppress("(") - enclosed_code_chunk + pp.Suppress(")")).set_name("paren_group")
enclosed_code_chunk_cant_infix = (pp.Group(ws_command_call)("command_call") | pp.Group(paren_command_call)("command_call") | literal | keyword | variable_ref | paren_group) + ~pp.FollowedBy(pp.one_of("+ - * / or not is and <= == >= != < >"))
enclosed_code_chunk <<= enclosed_code_chunk_cant_infix | infix_operator_block
code_chunk_no_infix <<= (paren_group | pp.Group(paren_command_call)("command_call") | literal | keyword | variable_ref) # used by infix_operator_block
code_chunk_cant_infix = code_chunk_no_infix + ~pp.FollowedBy(pp.one_of("+ - * / or not is and <= == >= != < >")) # don't match infix operators so we can run this before infix_operator_block
code_chunk_cant_infix.set_name("code_chunk_cant_infix")
code_chunk <<= code_chunk_cant_infix | infix_operator_block

# command/variable
command_start = pp.Suppress("{{" + ~pp.FollowedBy("!") + pp.Optional("~"))
simple_command_start = pp.Suppress("{{" + ~pp.FollowedBy("!") + pp.Optional("~")) + ~pp.FollowedBy(pp.one_of("# / >"))
command = SavedText(pp.Group(simple_command_start + enclosed_code_chunk + command_end)("command"))

# partial
always_call = pp.Group(paren_command_call | command_name("name") + pp.Optional(ws_command_args))
partial = pp.Group(pp.Suppress(pp.Combine(command_start + ">")) + always_call("command_call") + command_end)("partial")

# block command {{#my_command arg1 arg2=val}}...{{/my_command}}
separator = pp.Group(pp.Keyword("or") | pp.Keyword("else") | (pp.Keyword("elif") + ws_command_args))("separator").set_name("separator")
block_command = pp.Forward()
block_command_call = always_call("command_call")
block_command_open = pp.Suppress(pp.Combine(command_start + "#")) + block_command_call + command_end
block_command_sep = (command_start + separator + command_end)("block_command_sep").set_name("block_command_sep")
block_command_close = SavedText(pp.Group(command_start + pp.Suppress("/") + command_name + command_end)("block_command_close").set_name("block_command_close"))
block_command_content = (pp.Group(program)("block_content_chunk") + pp.ZeroOrMore(block_command_sep + pp.Group(program)("block_content_chunk"))).set_name("block_content")
block_command <<= (block_command_open + SavedText(pp.Group(block_command_content)("block_content")) + block_command_close).leave_whitespace()
block_command = SavedText(pp.Group(block_command)("block_command")).set_name("block_command")

# block partial {{#>my_command arg1 arg2=val}}...{{/my_command}}
block_partial = pp.Forward()
block_partial_call = always_call("command_call")
block_partial_open = pp.Combine(command_start + pp.Suppress("#>")) + block_partial_call + command_end
block_partial_close = command_start + pp.Suppress("/") + command_name + command_end
block_partial <<= block_partial_open + program + pp.Suppress(block_partial_close)
block_partial = SavedText(pp.Group(block_partial)("block_partial"))

# escaped commands \{{ not a command }}
not_command_end = "}" + ~pp.FollowedBy("}")
escaped_command = SavedText(pp.Group(pp.Suppress("\\") + command_start + pp.Combine(pp.ZeroOrMore(pp.CharsNotIn("}") | not_command_end)) + command_end)("escaped_command"))
unrelated_escape = "\\" + ~pp.FollowedBy(command_start)


## content ##

not_command_start = "{" + ~pp.FollowedBy("{")
not_command_escape = "\\" + ~pp.FollowedBy("{{")
stripped_whitespace = pp.Suppress(pp.Word(" \t\r\n")) + pp.FollowedBy("{{~")
unstripped_whitespace = pp.Word(" \t\r\n") # no need for a negative FollowedBy because stripped_whitespace will match first
content = pp.Group(pp.Combine(pp.OneOrMore(stripped_whitespace | unstripped_whitespace | not_command_start | not_command_escape | pp.CharsNotIn("{\\ \t\r\n"))))("content").set_name("content")

# keyword_command = SavedText(pp.Group(command_start + keyword + ws_command_args + command_end)("keyword_command"))
# block_content_chunk = long_comment | comment | escaped_command | unrelated_escape | block_partial | block_command | partial | command | content
# block_content <<= pp.ZeroOrMore(block_content_chunk)("program").leave_whitespace()

## global program ##

object_literal = empty_object / single_item_object / multi_item_object
empty_object = object_start ws? object_end
single_item_object = object_start ws? object_item ws? object_end
object_sep = ws? "," ws?
multi_item_object = object_start ws? object_item (object_sep object_item)* ws? object_end
object_start = "{"
object_end = "}"
object_item = string_literal ws? ":" ws? literal
""")
program_chunk <<= (long_comment | comment | escaped_command | unrelated_escape | block_partial | block_command | partial | command | content).leave_whitespace()
program <<= pp.ZeroOrMore(program_chunk)("program").leave_whitespace().set_name("program")
grammar = (program + pp.StringEnd()).parse_with_tabs()
27 changes: 17 additions & 10 deletions guidance/_program.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,18 @@
import html
import uuid
import sys
import parsimonious
# import parsimonious
import logging
import copy
import asyncio
import pathlib
import os
import traceback
import importlib
import time
import datetime
import nest_asyncio
from .llms import _openai
# from .llms import _openai
from . import _utils
from ._program_executor import ProgramExecutor
from . import library
Expand Down Expand Up @@ -163,10 +164,10 @@ def __init__(self, text, llm=None, cache_seed=0, logprobs=None, silent=None, asy
self.update_display = DisplayThrottler(self._update_display, self.display_throttle_limit)

# see if we are in an ipython environment
try:
from IPython import get_ipython
# check if get_ipython variable exists
if hasattr(__builtins__, "get_ipython"):
self._ipython = get_ipython()
except:
else:
self._ipython = None

# if we are echoing in ipython we assume we can display html
Expand Down Expand Up @@ -424,7 +425,7 @@ async def execute(self):
else:
with self.llm.session(asynchronous=True) as llm_session:
await self._executor.run(llm_session)
self._text = self._variables["_prefix"]
self._text = self._variables["@raw_prefix"]

# delete the executor and so mark the program as not executing
self._executor = None
Expand Down Expand Up @@ -471,7 +472,7 @@ def text(self):
@property
def marked_text(self):
if self._executor is not None:
return self._variables["_prefix"]
return self._variables["@raw_prefix"]
else:
return self._text

Expand Down Expand Up @@ -681,7 +682,11 @@ def add_spaces(s):
"if": library.if_,
"unless": library.unless,
"add": library.add,
"BINARY_OPERATOR_+": library.add,
"subtract": library.subtract,
"BINARY_OPERATOR_-": library.subtract,
"multiply": library.multiply,
"BINARY_OPERATOR_*": library.multiply,
"strip": library.strip,
"block": library.block,
"set": library.set,
Expand All @@ -692,11 +697,13 @@ def add_spaces(s):
"assistant": library.assistant,
"break": library.break_,
"equal": library.equal,
"==": library.equal,
"BINARY_OPERATOR_==": library.equal,
"notequal": library.notequal,
"BINARY_OPERATOR_!=": library.notequal,
"greater": library.greater,
">": library.greater,
"BINARY_OPERATOR_>": library.greater,
"less": library.less,
"<": library.less,
"BINARY_OPERATOR_<": library.less,
"contains": library.contains,
"parse": library.parse
}
Expand Down
Loading

0 comments on commit 89289d8

Please sign in to comment.