From 9788275b0772ce200fa6bdfea4c7463a61f309a1 Mon Sep 17 00:00:00 2001 From: Vaibhav Pathak Date: Thu, 29 Feb 2024 21:46:40 +0530 Subject: [PATCH 1/5] Implement fstrings and add test --- src/lpython/parser/parser.yy | 16 ++++++++ src/lpython/parser/semantics.h | 44 +++++++++++++++++++++ src/lpython/parser/tokenizer.re | 16 ++++++-- src/lpython/semantics/python_ast_to_asr.cpp | 13 ++++++ tests/fstring1.py | 8 ++++ 5 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 tests/fstring1.py diff --git a/src/lpython/parser/parser.yy b/src/lpython/parser/parser.yy index 7d773b962f..a9e1e3278b 100644 --- a/src/lpython/parser/parser.yy +++ b/src/lpython/parser/parser.yy @@ -103,6 +103,9 @@ void yyerror(YYLTYPE *yyloc, LCompilers::LPython::Parser &p, const std::string & %token TK_CARET "^" %token TK_AT "@" %token TK_STRING +%token TK_FSTRING_START +%token TK_FSTRING_MIDDLE +%token TK_FSTRING_END %token TK_COMMENT %token TK_EOLCOMMENT %token TK_TYPE_COMMENT @@ -260,6 +263,8 @@ void yyerror(YYLTYPE *yyloc, LCompilers::LPython::Parser &p, const std::string & %type sep_one %type type_comment %type string +%type fstring +%type fstring_middle %type ternary_if_statement %type comprehension %type id_list @@ -1106,8 +1111,19 @@ subscript string : string TK_STRING { $$ = STRING2($1, $2, @$); } // TODO | string KW_STR_PREFIX TK_STRING { $$ = STRING4($1, STRING3($2, $3, @$), @$); } + | string fstring { $$ = CONCAT_FSTRING($1, $2, @$); } | TK_STRING { $$ = STRING1($1, @$); } | KW_STR_PREFIX TK_STRING { $$ = STRING3($1, $2, @$); } + | fstring + ; + +fstring_middle + : fstring_middle TK_FSTRING_MIDDLE expr { $$ = FSTRING_MIDDLE($1, $2, $3, @$); } + | expr { $$ = FSTRING_MIDDLE1($1, @$); } + ; + +fstring + : TK_FSTRING_START fstring_middle TK_FSTRING_END { $$ = FSTRING($1, $2, $3, @$); } ; lambda_parameter diff --git a/src/lpython/parser/semantics.h b/src/lpython/parser/semantics.h index 9a41278783..ba26c4aed3 100644 --- a/src/lpython/parser/semantics.h +++ b/src/lpython/parser/semantics.h @@ -802,10 +802,54 @@ static inline ast_t* concat_string(Allocator &al, Location &l, #define STRING2(x, y, l) concat_string(p.m_a, l, EXPR(x), str_unescape_c(p.m_a, y), nullptr) #define STRING3(prefix, x, l) PREFIX_STRING(p.m_a, l, prefix.c_str(p.m_a), x.c_str(p.m_a)) #define STRING4(x, s, l) concat_string(p.m_a, l, EXPR(x), "", EXPR(s)) +#define FSTRING(s, m, e, l) fstring(p.m_a, l, s.c_str(p.m_a), EXPR(m), e.c_str(p.m_a)) +#define FSTRING_MIDDLE(s, m, e, l) fstring_middle(p.m_a, l, s, m.c_str(p.m_a), EXPR(e)) +#define FSTRING_MIDDLE1(x, l) fstring_middle1(p.m_a, l, EXPR(x)) +#define CONCAT_FSTRING(x, y, l) make_BinOp_t(p.m_a, l, EXPR(x), operatorType::Add, EXPR(y)) #define FLOAT(x, l) make_ConstantFloat_t(p.m_a, l, x, nullptr) #define COMPLEX(x, l) make_ConstantComplex_t(p.m_a, l, 0, x, nullptr) #define BOOL(x, l) make_ConstantBool_t(p.m_a, l, x, nullptr) +static inline ast_t *fstring_middle1(Allocator &al, Location &l, expr_t *start){ + return make_FormattedValue_t(al, l, start, -1, nullptr); +} + +static inline ast_t *fstring_middle(Allocator &al, Location &l, ast_t* start, + char *middle, expr_t *end) { + ast_t *tmp = nullptr; + tmp = start; + if(middle != nullptr && end != nullptr){ + ast_t *middle_string = make_ConstantStr_t(al, l, middle, nullptr); + tmp = make_BinOp_t(al, l, EXPR(tmp), operatorType::Add, EXPR(middle_string)); + + ast_t *right = make_FormattedValue_t(al, l, end, -1, nullptr); + tmp = make_BinOp_t(al, l, EXPR(tmp), operatorType::Add, EXPR(right)); + } + return tmp; +} + +static inline ast_t *fstring(Allocator &al, Location &l, char *start, + expr_t *middle, char *end) { + size_t k = 0; + while(start[k] != '"' && k < sizeof(start)) k++; + //discard the start prefix & quote & end brace characters, prefix can be 'r'|'f' + size_t len = strlen(start)-k++-1; + char *start_str = (char *)malloc(len*sizeof(char)); + strncpy(start_str, start+k, len-1); + start_str[len-1] = '\0'; + + ast_t* tmp = nullptr; + // add left string and middle expr + ast_t *left = make_ConstantStr_t(al, l, start_str, nullptr); + ast_t *value = make_FormattedValue_t(al, l, middle, -1, nullptr); + tmp = make_BinOp_t(al, l, EXPR(left), operatorType::Add, EXPR(value)); + // add the resultant to the end string + ast_t *right = make_ConstantStr_t(al, l, end, nullptr); + tmp = make_BinOp_t(al, l, EXPR(tmp), operatorType::Add, EXPR(right)); + + return tmp; +} + static inline ast_t *PREFIX_STRING(Allocator &al, Location &l, char *prefix, char *s){ Vec exprs; exprs.reserve(al, 4); diff --git a/src/lpython/parser/tokenizer.re b/src/lpython/parser/tokenizer.re index 64b0488b76..27b1fbf8a2 100644 --- a/src/lpython/parser/tokenizer.re +++ b/src/lpython/parser/tokenizer.re @@ -290,6 +290,11 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost | ("''" | "''" "\\"+) [^'\x00\\] | [^'\x00\\] )* "'''"; + + fstring_start = ([fF] | [fF][rR] | [rR][fF]) '"' ('\\'[^\x00{}] | [^"\x00\n\\{}])* '{'; + fstring_middle = '}' ('\\'[^\x00{}] | [^"\x00\n\\{}])* '{'; + fstring_end = '}' ('\\'[^\x00{}] | [^"\x00\n\\{}])* '"'; + type_ignore = "#" whitespace? "type:" whitespace? "ignore" [^\n\x00]*; type_comment = "#" whitespace? "type:" whitespace? [^\n\x00]*; comment = "#" [^\n\x00]*; @@ -434,10 +439,8 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost RET(TK_NAME); } } - [rR][bB] | [bB][rR] - | [fF][rR] | [rR][fF] - | [rR] | [bB] | [fF] | [uU] + | [rR] | [bB] | [uU] { if(cur[0] == '\'' || cur[0] == '"'){ KW(STR_PREFIX); @@ -601,6 +604,10 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost string3 { token_str3(yylval.string); RET(TK_STRING) } string4 { token_str3(yylval.string); RET(TK_STRING) } + fstring_start { token(yylval.string); RET(TK_FSTRING_START) } + fstring_middle { token_str(yylval.string); RET(TK_FSTRING_MIDDLE) } + fstring_end { token_str(yylval.string); RET(TK_FSTRING_END) } + name { token(yylval.string); RET(TK_NAME) } */ } @@ -700,6 +707,9 @@ std::string token2text(const int token) T(TK_AT, "@") T(TK_STRING, "string") + T(TK_FSTRING_START, "fstring_start") + T(TK_FSTRING_MIDDLE, "fstring_middle") + T(TK_FSTRING_END, "fstring_end") T(TK_COMMENT, "comment") T(TK_EOLCOMMENT, "eolcomment") T(TK_TYPE_COMMENT, "type_comment") diff --git a/src/lpython/semantics/python_ast_to_asr.cpp b/src/lpython/semantics/python_ast_to_asr.cpp index 4e11fe01ed..f3edd90c3c 100644 --- a/src/lpython/semantics/python_ast_to_asr.cpp +++ b/src/lpython/semantics/python_ast_to_asr.cpp @@ -3352,6 +3352,19 @@ class CommonVisitor : public AST::BaseVisitor { } tmp = ASR::make_LogicalBinOp_t(al, x.base.base.loc, lhs, op, rhs, dest_type, value); } + + void visit_FormattedValue(const AST::FormattedValue_t &x){ + this->visit_expr(*x.m_value); + // converting x as call_arg for the handle_intrinsic_str function + ASR::expr_t* expr = ASRUtils::EXPR(tmp); + ASR::call_arg_t arg; + arg.loc = expr->base.loc; + arg.m_value = expr; + Vec call_args; + call_args.reserve(al, 1); + call_args.push_back(al, arg); + tmp = intrinsic_node_handler.handle_intrinsic_str(al, call_args, x.base.base.loc); + } void visit_BinOp(const AST::BinOp_t &x) { this->visit_expr(*x.m_left); diff --git a/tests/fstring1.py b/tests/fstring1.py new file mode 100644 index 0000000000..7caf9c925b --- /dev/null +++ b/tests/fstring1.py @@ -0,0 +1,8 @@ +a : str = "FooBar" +b : i32 = 10 +c : i32 = 11 +print(f"{b} + 1 = {c}") # int inside fstring +print(f"Say something! {a}") # string inside fstring +print(f"do some calculation: {b*7+c}") # expression inside fstring +print("9..." f"{b}...{c}") # concatenation of normal string with fstring +print(f"{b} " f"{c}") # concatenation of fstrings \ No newline at end of file From 0282c3093a97c3e3b53e835d2549710d69bbab1b Mon Sep 17 00:00:00 2001 From: Vaibhav Pathak Date: Sat, 2 Mar 2024 12:49:08 +0530 Subject: [PATCH 2/5] Improve implement f-string and add tests --- build0.sh | 2 +- src/lpython/parser/parser.yy | 2 +- src/lpython/parser/semantics.h | 98 ++---- src/lpython/parser/tokenizer.h | 2 +- src/lpython/parser/tokenizer.re | 300 +++++++++++------- src/lpython/semantics/python_ast_to_asr.cpp | 11 + tests/fstring1.py | 12 +- .../ast_new-match_stmt1-9e84d24.json | 2 +- .../ast_new-match_stmt1-9e84d24.stdout | 8 +- tests/reference/ast_new-string1-96b90b3.json | 2 +- .../reference/ast_new-string1-96b90b3.stdout | 228 ++++++++----- 11 files changed, 390 insertions(+), 277 deletions(-) diff --git a/build0.sh b/build0.sh index 7218d748b7..5f96fd08f3 100755 --- a/build0.sh +++ b/build0.sh @@ -15,7 +15,7 @@ python src/libasr/asdl_cpp.py src/libasr/ASR.asdl src/libasr/asr.h python src/libasr/wasm_instructions_visitor.py # Generate the tokenizer and parser -(cd src/lpython/parser && re2c -W -b tokenizer.re -o tokenizer.cpp) +(cd src/lpython/parser && re2c -W -b tokenizer.re -o tokenizer.cpp -c) (cd src/lpython/parser && bison -Wall -d -r all parser.yy) python -c "file = 'src/lpython/parser/parser.tab.cc' diff --git a/src/lpython/parser/parser.yy b/src/lpython/parser/parser.yy index a9e1e3278b..cd2b0ed95a 100644 --- a/src/lpython/parser/parser.yy +++ b/src/lpython/parser/parser.yy @@ -1111,7 +1111,7 @@ subscript string : string TK_STRING { $$ = STRING2($1, $2, @$); } // TODO | string KW_STR_PREFIX TK_STRING { $$ = STRING4($1, STRING3($2, $3, @$), @$); } - | string fstring { $$ = CONCAT_FSTRING($1, $2, @$); } + | string fstring { $$ = STRING4($1, $2, @$); } | TK_STRING { $$ = STRING1($1, @$); } | KW_STR_PREFIX TK_STRING { $$ = STRING3($1, $2, @$); } | fstring diff --git a/src/lpython/parser/semantics.h b/src/lpython/parser/semantics.h index ba26c4aed3..8270b48bb1 100644 --- a/src/lpython/parser/semantics.h +++ b/src/lpython/parser/semantics.h @@ -803,9 +803,8 @@ static inline ast_t* concat_string(Allocator &al, Location &l, #define STRING3(prefix, x, l) PREFIX_STRING(p.m_a, l, prefix.c_str(p.m_a), x.c_str(p.m_a)) #define STRING4(x, s, l) concat_string(p.m_a, l, EXPR(x), "", EXPR(s)) #define FSTRING(s, m, e, l) fstring(p.m_a, l, s.c_str(p.m_a), EXPR(m), e.c_str(p.m_a)) -#define FSTRING_MIDDLE(s, m, e, l) fstring_middle(p.m_a, l, s, m.c_str(p.m_a), EXPR(e)) +#define FSTRING_MIDDLE(s, m, e, l) fstring_middle(p.m_a, l, EXPR(s), m.c_str(p.m_a), EXPR(e)) #define FSTRING_MIDDLE1(x, l) fstring_middle1(p.m_a, l, EXPR(x)) -#define CONCAT_FSTRING(x, y, l) make_BinOp_t(p.m_a, l, EXPR(x), operatorType::Add, EXPR(y)) #define FLOAT(x, l) make_ConstantFloat_t(p.m_a, l, x, nullptr) #define COMPLEX(x, l) make_ConstantComplex_t(p.m_a, l, 0, x, nullptr) #define BOOL(x, l) make_ConstantBool_t(p.m_a, l, x, nullptr) @@ -814,39 +813,39 @@ static inline ast_t *fstring_middle1(Allocator &al, Location &l, expr_t *start){ return make_FormattedValue_t(al, l, start, -1, nullptr); } -static inline ast_t *fstring_middle(Allocator &al, Location &l, ast_t* start, +static inline ast_t *fstring_middle(Allocator &al, Location &l, expr_t *start, char *middle, expr_t *end) { - ast_t *tmp = nullptr; - tmp = start; - if(middle != nullptr && end != nullptr){ - ast_t *middle_string = make_ConstantStr_t(al, l, middle, nullptr); - tmp = make_BinOp_t(al, l, EXPR(tmp), operatorType::Add, EXPR(middle_string)); - - ast_t *right = make_FormattedValue_t(al, l, end, -1, nullptr); - tmp = make_BinOp_t(al, l, EXPR(tmp), operatorType::Add, EXPR(right)); - } + Vec exprs; + exprs.reserve(al, 3); + exprs.push_back(al, start); + ast_t *tmp = make_ConstantStr_t(al, l, middle, nullptr); + exprs.push_back(al, EXPR(tmp)); + tmp = make_FormattedValue_t(al, l, end, -1, nullptr); + exprs.push_back(al, EXPR(tmp)); + tmp = make_JoinedStr_t(al, l, exprs.p, exprs.size()); return tmp; } static inline ast_t *fstring(Allocator &al, Location &l, char *start, expr_t *middle, char *end) { - size_t k = 0; - while(start[k] != '"' && k < sizeof(start)) k++; + size_t p = 0, q = 0; + while(isalpha(start[p]) && p < strlen(start)) p++; + q = p; + while(start[q] == start[p] && q < strlen(start)) q++; //discard the start prefix & quote & end brace characters, prefix can be 'r'|'f' - size_t len = strlen(start)-k++-1; - char *start_str = (char *)malloc(len*sizeof(char)); - strncpy(start_str, start+k, len-1); - start_str[len-1] = '\0'; + std::string str = std::string(start).substr(q, strlen(start)-q-1); + start = LCompilers::s2c(al, str); + str = std::string(end).substr(1, strlen(end)-(q-p)-1); + end = LCompilers::s2c(al, str); - ast_t* tmp = nullptr; - // add left string and middle expr - ast_t *left = make_ConstantStr_t(al, l, start_str, nullptr); - ast_t *value = make_FormattedValue_t(al, l, middle, -1, nullptr); - tmp = make_BinOp_t(al, l, EXPR(left), operatorType::Add, EXPR(value)); - // add the resultant to the end string - ast_t *right = make_ConstantStr_t(al, l, end, nullptr); - tmp = make_BinOp_t(al, l, EXPR(tmp), operatorType::Add, EXPR(right)); - + Vec exprs; + exprs.reserve(al, 3); + ast_t* tmp = make_ConstantStr_t(al, l, start, nullptr); + exprs.push_back(al, EXPR(tmp)); + exprs.push_back(al, middle); + tmp = make_ConstantStr_t(al, l, end, nullptr); + exprs.push_back(al, EXPR(tmp)); + tmp = make_JoinedStr_t(al, l, exprs.p, exprs.size()); return tmp; } @@ -862,49 +861,8 @@ static inline ast_t *PREFIX_STRING(Allocator &al, Location &l, char *prefix, cha } if (strcmp(prefix, "f") == 0 || strcmp(prefix, "fr") == 0 || strcmp(prefix, "rf") == 0) { - std::string str = std::string(s); - std::string s1 = "\""; - std::string id; - std::vector strs; - bool open_paren = false; - for (size_t i = 0; i < str.length(); i++) { - if(str[i] == '{') { - if(s1 != "\"") { - s1.push_back('"'); - strs.push_back(s1); - s1 = "\""; - } - open_paren = true; - } else if (str[i] != '}' && open_paren) { - id.push_back(s[i]); - } else if (str[i] == '}') { - if(id != "") { - strs.push_back(id); - id = ""; - } - open_paren = false; - } else if (!open_paren) { - s1.push_back(s[i]); - } - if(i == str.length()-1 && s1 != "\"") { - s1.push_back('"'); - strs.push_back(s1); - } - } - - for (size_t i = 0; i < strs.size(); i++) { - if (strs[i][0] == '"') { - strs[i] = strs[i].substr(1, strs[i].length() - 2); - tmp = make_ConstantStr_t(al, l, LCompilers::s2c(al, strs[i]), nullptr); - exprs.push_back(al, down_cast(tmp)); - } else { - tmp = make_Name_t(al, l, - LCompilers::s2c(al, strs[i]), expr_contextType::Load); - tmp = make_FormattedValue_t(al, l, EXPR(tmp), -1, nullptr); - exprs.push_back(al, down_cast(tmp)); - } - } - tmp = make_JoinedStr_t(al, l, exprs.p, exprs.size()); + // ignore 'f', assuming it is handled by fstring + tmp = make_ConstantStr_t(al, l, s, nullptr); } else if (strcmp(prefix, "b") == 0) { LCompilers::Str s_; s_.from_str(al, std::string(s)); diff --git a/src/lpython/parser/tokenizer.h b/src/lpython/parser/tokenizer.h index 3fe00226bb..184e277104 100644 --- a/src/lpython/parser/tokenizer.h +++ b/src/lpython/parser/tokenizer.h @@ -20,7 +20,7 @@ class Tokenizer uint32_t prev_loc; // The previous file ended at this location. int last_token=-1; - + int cond; // variable to store re2c conditions bool indent = false; // Next line is expected to be indented int dedent = 0; // Allowed values: 0, 1, 2, see the code below the meaning of this state variable bool colon_actual_last_token = false; // If the actual last token was a colon diff --git a/src/lpython/parser/tokenizer.re b/src/lpython/parser/tokenizer.re index 27b1fbf8a2..8688f26b14 100644 --- a/src/lpython/parser/tokenizer.re +++ b/src/lpython/parser/tokenizer.re @@ -171,6 +171,7 @@ void Tokenizer::record_paren(Location &loc, char c) { #define KW(x) token(yylval.string); RET(KW_##x); #define RET(x) token_loc(loc); last_token=yytokentype::x; return yytokentype::x; +/*!conditions:re2c*/ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnostics &/*diagnostics*/) { if(dedent == 1) { @@ -262,6 +263,10 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost // re2c:define:YYCTXMARKER = ctxmar; re2c:yyfill:enable = 0; re2c:define:YYCTYPE = "unsigned char"; + re2c:define:YYGETCONDITION = "cond"; + re2c:define:YYGETCONDITION:naked = 1; + re2c:define:YYSETCONDITION = "cond = @@;"; + re2c:define:YYSETCONDITION:naked = 1; end = "\x00"; whitespace = [ \t\v]+; @@ -290,10 +295,66 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost | ("''" | "''" "\\"+) [^'\x00\\] | [^'\x00\\] )* "'''"; - - fstring_start = ([fF] | [fF][rR] | [rR][fF]) '"' ('\\'[^\x00{}] | [^"\x00\n\\{}])* '{'; - fstring_middle = '}' ('\\'[^\x00{}] | [^"\x00\n\\{}])* '{'; - fstring_end = '}' ('\\'[^\x00{}] | [^"\x00\n\\{}])* '"'; + + // fstring1 -> "'" + ([fF] | [fF][rR] | [rR][fF]) '"' ('\\'[^\x00{}] | [^"\x00\n\\{}])* '{' => fstring1 { + token(yylval.string); RET(TK_FSTRING_START) + } + '}' ('\\'[^\x00{}] | [^"\x00\n\\{}])* '{' { + token_str(yylval.string); RET(TK_FSTRING_MIDDLE) + } + '}' ('\\'[^\x00{}] | [^"\x00\n\\{}])* '"' => init { + token(yylval.string); RET(TK_FSTRING_END) + } + // fstring2 -> "'" + ([fF] | [fF][rR] | [rR][fF]) "'" ("\\"[^\x00{}] | [^'\x00\n\\{}])* '{' => fstring2 { + token(yylval.string); RET(TK_FSTRING_START) + } + '}' ("\\"[^\x00{}] | [^'\x00\n\\{}])* '{' { + token_str(yylval.string); RET(TK_FSTRING_MIDDLE) + } + '}' ("\\"[^\x00{}] | [^'\x00\n\\{}])* "'" => init { + token(yylval.string); RET(TK_FSTRING_END) + } + // fstring3 -> '"""' + ([fF] | [fF][rR] | [rR][fF]) '"""' ( '\\'[^\x00{}] + | ('"' | '"' '\\'+ '"' | '"' '\\'+) [^"\x00\\{}] + | ('""' | '""' '\\'+) [^"\x00\\{}] + | [^"\x00\\{}] )* '{' => fstring3 { + token(yylval.string); RET(TK_FSTRING_START) + } + '}' ( '\\'[^\x00{}] + | ('"' | '"' '\\'+ '"' | '"' '\\'+) [^"\x00\\{}] + | ('""' | '""' '\\'+) [^"\x00\\{}] + | [^"\x00\\{}] )* '{' { + token_str(yylval.string); RET(TK_FSTRING_MIDDLE) + } + '}' ( '\\'[^\x00{}] + | ('"' | '"' '\\'+ '"' | '"' '\\'+) [^"\x00\\{}] + | ('""' | '""' '\\'+) [^"\x00\\{}] + | [^"\x00\\{}] )* '"""' => init { + token(yylval.string); RET(TK_FSTRING_END) + } + // fstring4 -> "'''" + ([fF] | [fF][rR] | [rR][fF]) "'''" ( "\\"[^\x00{}] + | ("'" | "'" "\\"+ "'" | "'" "\\"+) [^'\x00\\{}] + | ("''" | "''" "\\"+) [^'\x00\\{}] + | [^'\x00\\{}] )* '{' => fstring4 { + token(yylval.string); RET(TK_FSTRING_START) + } + '}' ( "\\"[^\x00{}] + | ("'" | "'" "\\"+ "'" | "'" "\\"+) [^'\x00\\{}] + | ("''" | "''" "\\"+) [^'\x00\\{}] + | [^'\x00\\{}] )* '{' { + token_str(yylval.string); RET(TK_FSTRING_MIDDLE) + } + '}' ( "\\"[^\x00{}] + | ("'" | "'" "\\"+ "'" | "'" "\\"+) [^'\x00\\{}] + | ("''" | "''" "\\"+) [^'\x00\\{}] + | [^'\x00\\{}] )* "'''" => init { + token(yylval.string); RET(TK_FSTRING_END) + } + type_ignore = "#" whitespace? "type:" whitespace? "ignore" [^\n\x00]*; type_comment = "#" whitespace? "type:" whitespace? [^\n\x00]*; @@ -301,7 +362,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost // docstring = newline whitespace? string1 | string2; ws_comment = whitespace? comment? newline; - * { token_loc(loc); + <*> * { token_loc(loc); std::string t = token(); throw parser_local::TokenizerError(diag::Diagnostic( "Token '" + t + "' is not recognized", @@ -310,7 +371,8 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost }) ); } - end { + + <*> end { token_loc(loc); if(parenlevel) { throw parser_local::TokenizerError( @@ -319,7 +381,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost RET(END_OF_FILE); } - whitespace { + <*> whitespace { if(cur[0] == '#') { continue; } if(last_token == yytokentype::TK_NEWLINE && cur[0] == '\n') { continue; @@ -370,40 +432,40 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost } // Keywords - "as" { KW(AS) } - "assert" { KW(ASSERT) } - "async" { KW(ASYNC) } - "await" { KW(AWAIT) } - "break" { KW(BREAK) } - "class" { KW(CLASS) } - "continue" { KW(CONTINUE) } - "def" { KW(DEF) } - "del" { KW(DEL) } - "elif" { KW(ELIF) } - "else" { KW(ELSE) } - "except" { KW(EXCEPT) } - "finally" { KW(FINALLY) } - "for" { KW(FOR) } - "from" { KW(FROM) } - "global" { KW(GLOBAL) } - "if" { KW(IF) } - "import" { KW(IMPORT) } - "in" { KW(IN) } - "is" { KW(IS) } - "lambda" { KW(LAMBDA) } - "None" { KW(NONE) } - "nonlocal" { KW(NONLOCAL) } - "pass" { KW(PASS) } - "raise" { KW(RAISE) } - "return" { KW(RETURN) } - "try" { KW(TRY) } - "while" { KW(WHILE) } - "with" { KW(WITH) } - "yield" { KW(YIELD) } - "yield" whitespace "from" whitespace { KW(YIELD_FROM) } + <*> "as" { KW(AS) } + <*> "assert" { KW(ASSERT) } + <*> "async" { KW(ASYNC) } + <*> "await" { KW(AWAIT) } + <*> "break" { KW(BREAK) } + <*> "class" { KW(CLASS) } + <*> "continue" { KW(CONTINUE) } + <*> "def" { KW(DEF) } + <*> "del" { KW(DEL) } + <*> "elif" { KW(ELIF) } + <*> "else" { KW(ELSE) } + <*> "except" { KW(EXCEPT) } + <*> "finally" { KW(FINALLY) } + <*> "for" { KW(FOR) } + <*> "from" { KW(FROM) } + <*> "global" { KW(GLOBAL) } + <*> "if" { KW(IF) } + <*> "import" { KW(IMPORT) } + <*> "in" { KW(IN) } + <*> "is" { KW(IS) } + <*> "lambda" { KW(LAMBDA) } + <*> "None" { KW(NONE) } + <*> "nonlocal" { KW(NONLOCAL) } + <*> "pass" { KW(PASS) } + <*> "raise" { KW(RAISE) } + <*> "return" { KW(RETURN) } + <*> "try" { KW(TRY) } + <*> "while" { KW(WHILE) } + <*> "with" { KW(WITH) } + <*> "yield" { KW(YIELD) } + <*> "yield" whitespace "from" whitespace { KW(YIELD_FROM) } // Soft Keywords - "match" / [^:\n\x00] { + <*> "match" / [^:\n\x00] { if ((last_token == -1 || last_token == yytokentype::TK_DEDENT || last_token == yytokentype::TK_INDENT @@ -422,7 +484,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost RET(TK_NAME); } } - "case" / [^:\n\x00] { + <*> "case" / [^:\n\x00] { if ((last_token == yytokentype::TK_INDENT || last_token == yytokentype::TK_DEDENT) && parenlevel == 0) { @@ -439,8 +501,10 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost RET(TK_NAME); } } - [rR][bB] | [bB][rR] - | [rR] | [bB] | [uU] + + <*> [rR][bB] | [bB][rR] + | [fF][rR] | [rR][fF] + | [rR] | [fF] | [bB] | [uU] { if(cur[0] == '\'' || cur[0] == '"'){ KW(STR_PREFIX); @@ -452,7 +516,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost } // Tokens - newline { + <*> newline { if(parenlevel) { continue; } if(cur[0] == '#') { RET(TK_NEWLINE); } if (last_token == yytokentype::TK_COLON @@ -467,100 +531,100 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost RET(TK_NEWLINE); } - "\\" newline { continue; } + <*> "\\" newline { continue; } // Single character symbols - "(" { token_loc(loc); record_paren(loc, '('); RET(TK_LPAREN) } - "[" { token_loc(loc); record_paren(loc, '['); RET(TK_LBRACKET) } - "{" { token_loc(loc); record_paren(loc, '{'); RET(TK_LBRACE) } - ")" { token_loc(loc); record_paren(loc, ')'); RET(TK_RPAREN) } - "]" { token_loc(loc); record_paren(loc, ']'); RET(TK_RBRACKET) } - "}" { token_loc(loc); record_paren(loc, '}'); RET(TK_RBRACE) } - "+" { RET(TK_PLUS) } - "-" { RET(TK_MINUS) } - "=" { RET(TK_EQUAL) } - ":" { + <*> "(" { token_loc(loc); record_paren(loc, '('); RET(TK_LPAREN) } + <*> "[" { token_loc(loc); record_paren(loc, '['); RET(TK_LBRACKET) } + <*> "{" { token_loc(loc); record_paren(loc, '{'); RET(TK_LBRACE) } + <*> ")" { token_loc(loc); record_paren(loc, ')'); RET(TK_RPAREN) } + <*> "]" { token_loc(loc); record_paren(loc, ']'); RET(TK_RBRACKET) } + <*> "}" { token_loc(loc); record_paren(loc, '}'); RET(TK_RBRACE) } + <*> "+" { RET(TK_PLUS) } + <*> "-" { RET(TK_MINUS) } + <*> "=" { RET(TK_EQUAL) } + <*> ":" { if(cur[0] == '\n' && !parenlevel){ colon_actual_last_token = true; } RET(TK_COLON); } - ";" { RET(TK_SEMICOLON) } - "/" { RET(TK_SLASH) } - "%" { RET(TK_PERCENT) } - "," { RET(TK_COMMA) } - "*" { RET(TK_STAR) } - "|" { RET(TK_VBAR) } - "&" { RET(TK_AMPERSAND) } - "." { RET(TK_DOT) } - "~" { RET(TK_TILDE) } - "^" { RET(TK_CARET) } - "@" { RET(TK_AT) } + <*> ";" { RET(TK_SEMICOLON) } + <*> "/" { RET(TK_SLASH) } + <*> "%" { RET(TK_PERCENT) } + <*> "," { RET(TK_COMMA) } + <*> "*" { RET(TK_STAR) } + <*> "|" { RET(TK_VBAR) } + <*> "&" { RET(TK_AMPERSAND) } + <*> "." { RET(TK_DOT) } + <*> "~" { RET(TK_TILDE) } + <*> "^" { RET(TK_CARET) } + <*> "@" { RET(TK_AT) } // Multiple character symbols - ">>" { RET(TK_RIGHTSHIFT) } - "<<" { RET(TK_LEFTSHIFT) } - "**" { RET(TK_POW) } - "//" { RET(TK_FLOOR_DIV) } - "+=" { RET(TK_PLUS_EQUAL) } - "-=" { RET(TK_MIN_EQUAL) } - "*=" { RET(TK_STAR_EQUAL) } - "/=" { RET(TK_SLASH_EQUAL) } - "%=" { RET(TK_PERCENT_EQUAL) } - "&=" { RET(TK_AMPER_EQUAL) } - "|=" { RET(TK_VBAR_EQUAL) } - "^=" { RET(TK_CARET_EQUAL) } - "@=" { RET(TK_ATEQUAL) } - "->" { RET(TK_RARROW) } - ":=" { RET(TK_COLONEQUAL) } - "..." { RET(TK_ELLIPSIS) } - "<<=" { RET(TK_LEFTSHIFT_EQUAL) } - ">>=" { RET(TK_RIGHTSHIFT_EQUAL) } - "**=" { RET(TK_POW_EQUAL) } - "//=" { RET(TK_DOUBLESLASH_EQUAL) } + <*> ">>" { RET(TK_RIGHTSHIFT) } + <*> "<<" { RET(TK_LEFTSHIFT) } + <*> "**" { RET(TK_POW) } + <*> "//" { RET(TK_FLOOR_DIV) } + <*> "+=" { RET(TK_PLUS_EQUAL) } + <*> "-=" { RET(TK_MIN_EQUAL) } + <*> "*=" { RET(TK_STAR_EQUAL) } + <*> "/=" { RET(TK_SLASH_EQUAL) } + <*> "%=" { RET(TK_PERCENT_EQUAL) } + <*> "&=" { RET(TK_AMPER_EQUAL) } + <*> "|=" { RET(TK_VBAR_EQUAL) } + <*> "^=" { RET(TK_CARET_EQUAL) } + <*> "@=" { RET(TK_ATEQUAL) } + <*> "->" { RET(TK_RARROW) } + <*> ":=" { RET(TK_COLONEQUAL) } + <*> "..." { RET(TK_ELLIPSIS) } + <*> "<<=" { RET(TK_LEFTSHIFT_EQUAL) } + <*> ">>=" { RET(TK_RIGHTSHIFT_EQUAL) } + <*> "**=" { RET(TK_POW_EQUAL) } + <*> "//=" { RET(TK_DOUBLESLASH_EQUAL) } // Relational operators - "==" { RET(TK_EQ) } - "!=" { RET(TK_NE) } - "<" { RET(TK_LT) } - "<=" { RET(TK_LE) } - ">" { RET(TK_GT) } - ">=" { RET(TK_GE) } + <*> "==" { RET(TK_EQ) } + <*> "!=" { RET(TK_NE) } + <*> "<" { RET(TK_LT) } + <*> "<=" { RET(TK_LE) } + <*> ">" { RET(TK_GT) } + <*> ">=" { RET(TK_GE) } // Logical operators - "not" { RET(TK_NOT) } - "and" { RET(TK_AND) } - "or" { RET(TK_OR) } - "is" whitespace "not" whitespace { RET(TK_IS_NOT) } - "is" whitespace? "\\" newline whitespace? "not" whitespace { RET(TK_IS_NOT) } - "not" whitespace "in" "\\" newline { RET(TK_NOT_IN) } - "not" whitespace "in" whitespace { RET(TK_NOT_IN) } - "not" whitespace "in" newline { RET(TK_NOT_IN) } - "not" whitespace? "\\" newline whitespace? "in" "\\" newline { RET(TK_NOT_IN) } - "not" whitespace? "\\" newline whitespace? "in" whitespace { RET(TK_NOT_IN) } + <*> "not" { RET(TK_NOT) } + <*> "and" { RET(TK_AND) } + <*> "or" { RET(TK_OR) } + <*> "is" whitespace "not" whitespace { RET(TK_IS_NOT) } + <*> "is" whitespace? "\\" newline whitespace? "not" whitespace { RET(TK_IS_NOT) } + <*> "not" whitespace "in" "\\" newline { RET(TK_NOT_IN) } + <*> "not" whitespace "in" whitespace { RET(TK_NOT_IN) } + <*> "not" whitespace "in" newline { RET(TK_NOT_IN) } + <*> "not" whitespace? "\\" newline whitespace? "in" "\\" newline { RET(TK_NOT_IN) } + <*> "not" whitespace? "\\" newline whitespace? "in" whitespace { RET(TK_NOT_IN) } // True/False - "True" { RET(TK_TRUE) } - "False" { RET(TK_FALSE) } + <*> "True" { RET(TK_TRUE) } + <*> "False" { RET(TK_FALSE) } - real { + <*> real { yylval.f = std::strtod(remove_underscore(token()).c_str(), 0); RET(TK_REAL) } - integer { + <*> integer { BigInt::BigInt n; token_loc(loc); lex_int(al, tok, cur, n, loc); yylval.n = n.n; RET(TK_INTEGER) } - imag_number { + <*> imag_number { yylval.f = std::strtod(remove_underscore(token()).c_str(), 0); RET(TK_IMAG_NUM) } - type_ignore { + <*> type_ignore { if (last_token == yytokentype::TK_COLON && !parenlevel) { indent = true; } @@ -569,7 +633,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost return yytokentype::TK_TYPE_IGNORE; } - type_comment { + <*> type_comment { if (last_token == yytokentype::TK_COLON && !parenlevel) { indent = true; } @@ -578,7 +642,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost return yytokentype::TK_TYPE_COMMENT; } - comment { + <*> comment { if(last_token == -1) { RET(TK_COMMENT); } if(parenlevel) { continue; } line_num++; cur_line=cur; @@ -599,16 +663,12 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost } //docstring { RET(TK_DOCSTRING) } - string1 { token_str(yylval.string); RET(TK_STRING) } - string2 { token_str(yylval.string); RET(TK_STRING) } - string3 { token_str3(yylval.string); RET(TK_STRING) } - string4 { token_str3(yylval.string); RET(TK_STRING) } - - fstring_start { token(yylval.string); RET(TK_FSTRING_START) } - fstring_middle { token_str(yylval.string); RET(TK_FSTRING_MIDDLE) } - fstring_end { token_str(yylval.string); RET(TK_FSTRING_END) } + <*> string1 { token_str(yylval.string); RET(TK_STRING) } + <*> string2 { token_str(yylval.string); RET(TK_STRING) } + <*> string3 { token_str3(yylval.string); RET(TK_STRING) } + <*> string4 { token_str3(yylval.string); RET(TK_STRING) } - name { token(yylval.string); RET(TK_NAME) } + <*> name { token(yylval.string); RET(TK_NAME) } */ } } @@ -804,6 +864,7 @@ Result> tokens(Allocator &al, const std::string &input, t.set_string(input, 0); std::vector tst; int token = yytokentype::END_OF_FILE + 1; // Something different from EOF + t.cond = yycinit; while (token != yytokentype::END_OF_FILE) { YYSTYPE y; Location l; @@ -848,7 +909,8 @@ std::string pickle_token(int token, const YYSTYPE &yystype) t += " " + std::to_string(yystype.f); } else if (token == yytokentype::TK_IMAG_NUM) { t += " " + std::to_string(yystype.f) + "j"; - } else if (token == yytokentype::TK_STRING) { + } else if (token == yytokentype::TK_STRING || token == yytokentype::TK_FSTRING_START + || token == yytokentype::TK_FSTRING_MIDDLE || token == yytokentype::TK_FSTRING_END) { t = t + " " + "\"" + str_escape_c(yystype.string.str()) + "\""; } else if (token == yytokentype::TK_TYPE_COMMENT) { t = t + " " + "\"" + yystype.string.str() + "\""; diff --git a/src/lpython/semantics/python_ast_to_asr.cpp b/src/lpython/semantics/python_ast_to_asr.cpp index f3edd90c3c..f238eb0c41 100644 --- a/src/lpython/semantics/python_ast_to_asr.cpp +++ b/src/lpython/semantics/python_ast_to_asr.cpp @@ -3366,6 +3366,17 @@ class CommonVisitor : public AST::BaseVisitor { tmp = intrinsic_node_handler.handle_intrinsic_str(al, call_args, x.base.base.loc); } + void visit_JoinedStr(const AST::JoinedStr_t &x){ + this->visit_expr(*x.m_values[0]); + ASR::expr_t *left = ASRUtils::EXPR(tmp); + for(size_t i = 1; i < x.n_values; i++){ + this->visit_expr(*x.m_values[i]); + ASR::expr_t *right = ASRUtils::EXPR(tmp); + make_BinOp_helper(left, right, ASR::binopType::Add, x.base.base.loc); + left = ASRUtils::EXPR(tmp); + } + } + void visit_BinOp(const AST::BinOp_t &x) { this->visit_expr(*x.m_left); ASR::expr_t *left = ASRUtils::EXPR(tmp); diff --git a/tests/fstring1.py b/tests/fstring1.py index 7caf9c925b..697e400ca8 100644 --- a/tests/fstring1.py +++ b/tests/fstring1.py @@ -5,4 +5,14 @@ print(f"Say something! {a}") # string inside fstring print(f"do some calculation: {b*7+c}") # expression inside fstring print("9..." f"{b}...{c}") # concatenation of normal string with fstring -print(f"{b} " f"{c}") # concatenation of fstrings \ No newline at end of file +print(f"{b}...{c}..." "12" ) # concatenation of normal string with fstring +print(f"{b} " f"{c}") # concatenation of fstrings +print(fR"Hello! {a}") +print(rF""" + Something fun! {a*2} +""") +print(Fr''' + Something fun! {c} +''') +print(r"LEFT " f"RIGHT") +print(f"THIS " r"THAT") \ No newline at end of file diff --git a/tests/reference/ast_new-match_stmt1-9e84d24.json b/tests/reference/ast_new-match_stmt1-9e84d24.json index 6e096f25ea..dae186aea1 100644 --- a/tests/reference/ast_new-match_stmt1-9e84d24.json +++ b/tests/reference/ast_new-match_stmt1-9e84d24.json @@ -6,7 +6,7 @@ "outfile": null, "outfile_hash": null, "stdout": "ast_new-match_stmt1-9e84d24.stdout", - "stdout_hash": "8e43bb4b05ebab0df9520dac9908702af0d2e7f63ddb42bf93baf0a0", + "stdout_hash": "db40dd3e23c2d07d8348a6c09d60b4feb44a9decbb4b8d4dc059227b", "stderr": null, "stderr_hash": null, "returncode": 0 diff --git a/tests/reference/ast_new-match_stmt1-9e84d24.stdout b/tests/reference/ast_new-match_stmt1-9e84d24.stdout index f368aa9399..6b0258f4a4 100644 --- a/tests/reference/ast_new-match_stmt1-9e84d24.stdout +++ b/tests/reference/ast_new-match_stmt1-9e84d24.stdout @@ -1701,8 +1701,12 @@ () ) (FormattedValue - (Name - cls.__name__ + (Attribute + (Name + cls + Load + ) + __name__ Load ) -1 diff --git a/tests/reference/ast_new-string1-96b90b3.json b/tests/reference/ast_new-string1-96b90b3.json index 49a78caf38..0e76f28484 100644 --- a/tests/reference/ast_new-string1-96b90b3.json +++ b/tests/reference/ast_new-string1-96b90b3.json @@ -6,7 +6,7 @@ "outfile": null, "outfile_hash": null, "stdout": "ast_new-string1-96b90b3.stdout", - "stdout_hash": "51806e5893017a386c0ce7a4f3260c7605cabd5ea4e6a16aa300d8c2", + "stdout_hash": "e501ed60d013bcbd95182958e0c49f4a15083c2601230259958c25b6", "stderr": null, "stderr_hash": null, "returncode": 0 diff --git a/tests/reference/ast_new-string1-96b90b3.stdout b/tests/reference/ast_new-string1-96b90b3.stdout index cc011f29f2..3eaafeaceb 100644 --- a/tests/reference/ast_new-string1-96b90b3.stdout +++ b/tests/reference/ast_new-string1-96b90b3.stdout @@ -5,37 +5,41 @@ "Hello, " () ) - (FormattedValue - (Name - first_name - Load - ) - -1 - () - ) - (ConstantStr - " " - () - ) - (FormattedValue - (Name - last_name - Load + (JoinedStr + [(JoinedStr + [(FormattedValue + (Name + first_name + Load + ) + -1 + () + ) + (ConstantStr + " " + () + ) + (FormattedValue + (Name + last_name + Load + ) + -1 + () + )] ) - -1 - () - ) - (ConstantStr - ". You are " - () - ) - (FormattedValue - (Name - age - Load + (ConstantStr + ". You are " + () ) - -1 - () + (FormattedValue + (Name + age + Load + ) + -1 + () + )] ) (ConstantStr " years old." @@ -50,25 +54,31 @@ Load ) [(JoinedStr - [(FormattedValue - (Name - __file__ - Load - ) - -1 - () - ) - (ConstantStr - " executed in " + [(ConstantStr + "" () ) - (FormattedValue - (Name - elapsed - Load + (JoinedStr + [(FormattedValue + (Name + __file__ + Load + ) + -1 + () ) - -1 - () + (ConstantStr + " executed in " + () + ) + (FormattedValue + (Name + elapsed + Load + ) + -1 + () + )] ) (ConstantStr " seconds." @@ -145,6 +155,10 @@ ) -1 () + ) + (ConstantStr + "" + () )] ) ) @@ -161,6 +175,10 @@ ) -1 () + ) + (ConstantStr + "" + () )] ) ) @@ -177,6 +195,10 @@ ) -1 () + ) + (ConstantStr + "" + () )] ) ) @@ -193,6 +215,10 @@ ) -1 () + ) + (ConstantStr + "" + () )] ) ) @@ -209,6 +235,10 @@ ) -1 () + ) + (ConstantStr + "" + () )] ) ) @@ -245,6 +275,10 @@ ) -1 () + ) + (ConstantStr + "" + () )] ) ) @@ -261,6 +295,10 @@ ) -1 () + ) + (ConstantStr + "" + () )] ) ) @@ -325,6 +363,10 @@ ) -1 () + ) + (ConstantStr + "" + () )] ) ) @@ -334,24 +376,30 @@ "Text" () ) - (FormattedValue - (Name - a - Load + (JoinedStr + [(FormattedValue + (Name + a + Load + ) + -1 + () ) - -1 - () + (ConstantStr + ", " + () + ) + (FormattedValue + (Name + b + Load + ) + -1 + () + )] ) (ConstantStr - ", " - () - ) - (FormattedValue - (Name - b - Load - ) - -1 + "" () ) (ConstantStr @@ -365,6 +413,10 @@ ) -1 () + ) + (ConstantStr + "" + () )] ) ) @@ -374,24 +426,30 @@ "Text" () ) - (FormattedValue - (Name - a - Load + (JoinedStr + [(FormattedValue + (Name + a + Load + ) + -1 + () ) - -1 - () + (ConstantStr + ", " + () + ) + (FormattedValue + (Name + b + Load + ) + -1 + () + )] ) (ConstantStr - ", " - () - ) - (FormattedValue - (Name - b - Load - ) - -1 + "" () ) (ConstantStr @@ -406,6 +464,10 @@ "Text" () ) + (ConstantStr + "" + () + ) (FormattedValue (Name b @@ -434,6 +496,10 @@ -1 () ) + (ConstantStr + "" + () + ) (ConstantStr "Text" () @@ -458,6 +524,10 @@ "Text" () ) + (ConstantStr + "" + () + ) (FormattedValue (Name a @@ -497,11 +567,9 @@ ) ) (Expr - (JoinedStr - [(ConstantStr - "\\n" - () - )] + (ConstantStr + "\\n" + () ) ) (Expr From 3333e20e92555bde337075184bab3c1a2fd951d7 Mon Sep 17 00:00:00 2001 From: Vaibhav Pathak Date: Sat, 2 Mar 2024 12:57:05 +0530 Subject: [PATCH 3/5] Update build flags in CI --- build0.sh | 2 +- build0_win.xsh | 2 +- ci/build.xsh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/build0.sh b/build0.sh index 5f96fd08f3..35af05fc6d 100755 --- a/build0.sh +++ b/build0.sh @@ -15,7 +15,7 @@ python src/libasr/asdl_cpp.py src/libasr/ASR.asdl src/libasr/asr.h python src/libasr/wasm_instructions_visitor.py # Generate the tokenizer and parser -(cd src/lpython/parser && re2c -W -b tokenizer.re -o tokenizer.cpp -c) +(cd src/lpython/parser && re2c -W -b -c tokenizer.re -o tokenizer.cpp) (cd src/lpython/parser && bison -Wall -d -r all parser.yy) python -c "file = 'src/lpython/parser/parser.tab.cc' diff --git a/build0_win.xsh b/build0_win.xsh index 8530d835ba..fef935b99d 100644 --- a/build0_win.xsh +++ b/build0_win.xsh @@ -14,5 +14,5 @@ python src/libasr/asdl_cpp.py src/libasr/ASR.asdl src/libasr/asr.h python src/libasr/wasm_instructions_visitor.py # Generate the tokenizer and parser -pushd src/lpython/parser && re2c -W -b tokenizer.re -o tokenizer.cpp && popd +pushd src/lpython/parser && re2c -W -b -c tokenizer.re -o tokenizer.cpp && popd pushd src/lpython/parser && bison -Wall -d -r all parser.yy && popd diff --git a/ci/build.xsh b/ci/build.xsh index fb2d9233c2..ff8cd13c61 100755 --- a/ci/build.xsh +++ b/ci/build.xsh @@ -36,7 +36,7 @@ python grammar/asdl_py.py python src/libasr/wasm_instructions_visitor.py # Generate the tokenizer and parser -pushd src/lpython/parser && re2c -W -b tokenizer.re -o tokenizer.cpp && popd +pushd src/lpython/parser && re2c -W -b -c tokenizer.re -o tokenizer.cpp && popd pushd src/lpython/parser && bison -Wall -d -r all parser.yy && popd $lpython_version=$(cat version).strip() From 1215c66943bd8c504417a607cf3768d7e88d2e71 Mon Sep 17 00:00:00 2001 From: Vaibhav Pathak Date: Sat, 2 Mar 2024 13:20:44 +0530 Subject: [PATCH 4/5] Fix CI: changing conditions directive to type directive re2c --- src/lpython/parser/tokenizer.re | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lpython/parser/tokenizer.re b/src/lpython/parser/tokenizer.re index 8688f26b14..eb9c07ffd4 100644 --- a/src/lpython/parser/tokenizer.re +++ b/src/lpython/parser/tokenizer.re @@ -171,7 +171,7 @@ void Tokenizer::record_paren(Location &loc, char c) { #define KW(x) token(yylval.string); RET(KW_##x); #define RET(x) token_loc(loc); last_token=yytokentype::x; return yytokentype::x; -/*!conditions:re2c*/ +/*!types:re2c*/ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnostics &/*diagnostics*/) { if(dedent == 1) { From 50509f05967717c69b9e2c6b53ce0f618a6e7b49 Mon Sep 17 00:00:00 2001 From: Vaibhav Pathak Date: Sun, 3 Mar 2024 18:12:40 +0530 Subject: [PATCH 5/5] Re-implement fstring efficiently using functions and blocks, remove -c flags --- build0.sh | 2 +- build0_win.xsh | 2 +- ci/build.xsh | 2 +- src/lpython/parser/tokenizer.h | 3 +- src/lpython/parser/tokenizer.re | 395 +++++++------ tests/fstring1.py | 1 + tests/reference/asr-fstring1-d96758f.json | 13 + tests/reference/asr-fstring1-d96758f.stdout | 522 ++++++++++++++++++ tests/reference/ast-fstring1-8d426c9.json | 13 + tests/reference/ast-fstring1-8d426c9.stdout | 445 +++++++++++++++ tests/reference/runtime-fstring1-20a9192.json | 13 + .../reference/runtime-fstring1-20a9192.stdout | 16 + tests/tests.toml | 6 + 13 files changed, 1261 insertions(+), 172 deletions(-) create mode 100644 tests/reference/asr-fstring1-d96758f.json create mode 100644 tests/reference/asr-fstring1-d96758f.stdout create mode 100644 tests/reference/ast-fstring1-8d426c9.json create mode 100644 tests/reference/ast-fstring1-8d426c9.stdout create mode 100644 tests/reference/runtime-fstring1-20a9192.json create mode 100644 tests/reference/runtime-fstring1-20a9192.stdout diff --git a/build0.sh b/build0.sh index 35af05fc6d..7218d748b7 100755 --- a/build0.sh +++ b/build0.sh @@ -15,7 +15,7 @@ python src/libasr/asdl_cpp.py src/libasr/ASR.asdl src/libasr/asr.h python src/libasr/wasm_instructions_visitor.py # Generate the tokenizer and parser -(cd src/lpython/parser && re2c -W -b -c tokenizer.re -o tokenizer.cpp) +(cd src/lpython/parser && re2c -W -b tokenizer.re -o tokenizer.cpp) (cd src/lpython/parser && bison -Wall -d -r all parser.yy) python -c "file = 'src/lpython/parser/parser.tab.cc' diff --git a/build0_win.xsh b/build0_win.xsh index fef935b99d..8530d835ba 100644 --- a/build0_win.xsh +++ b/build0_win.xsh @@ -14,5 +14,5 @@ python src/libasr/asdl_cpp.py src/libasr/ASR.asdl src/libasr/asr.h python src/libasr/wasm_instructions_visitor.py # Generate the tokenizer and parser -pushd src/lpython/parser && re2c -W -b -c tokenizer.re -o tokenizer.cpp && popd +pushd src/lpython/parser && re2c -W -b tokenizer.re -o tokenizer.cpp && popd pushd src/lpython/parser && bison -Wall -d -r all parser.yy && popd diff --git a/ci/build.xsh b/ci/build.xsh index ff8cd13c61..fb2d9233c2 100755 --- a/ci/build.xsh +++ b/ci/build.xsh @@ -36,7 +36,7 @@ python grammar/asdl_py.py python src/libasr/wasm_instructions_visitor.py # Generate the tokenizer and parser -pushd src/lpython/parser && re2c -W -b -c tokenizer.re -o tokenizer.cpp && popd +pushd src/lpython/parser && re2c -W -b tokenizer.re -o tokenizer.cpp && popd pushd src/lpython/parser && bison -Wall -d -r all parser.yy && popd $lpython_version=$(cat version).strip() diff --git a/src/lpython/parser/tokenizer.h b/src/lpython/parser/tokenizer.h index 184e277104..e42d4c39db 100644 --- a/src/lpython/parser/tokenizer.h +++ b/src/lpython/parser/tokenizer.h @@ -20,7 +20,7 @@ class Tokenizer uint32_t prev_loc; // The previous file ended at this location. int last_token=-1; - int cond; // variable to store re2c conditions + int fstring_flag = 0; bool indent = false; // Next line is expected to be indented int dedent = 0; // Allowed values: 0, 1, 2, see the code below the meaning of this state variable bool colon_actual_last_token = false; // If the actual last token was a colon @@ -79,6 +79,7 @@ class Tokenizer void lex_match_or_case(Location &loc, unsigned char *cur, bool &is_match_or_case_keyword); + int lex_fstring(Location &loc, unsigned char * &cur, YYSTYPE &yylval); }; std::string token2text(const int token); diff --git a/src/lpython/parser/tokenizer.re b/src/lpython/parser/tokenizer.re index eb9c07ffd4..a6445c2f5e 100644 --- a/src/lpython/parser/tokenizer.re +++ b/src/lpython/parser/tokenizer.re @@ -171,7 +171,6 @@ void Tokenizer::record_paren(Location &loc, char c) { #define KW(x) token(yylval.string); RET(KW_##x); #define RET(x) token_loc(loc); last_token=yytokentype::x; return yytokentype::x; -/*!types:re2c*/ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnostics &/*diagnostics*/) { if(dedent == 1) { @@ -263,10 +262,6 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost // re2c:define:YYCTXMARKER = ctxmar; re2c:yyfill:enable = 0; re2c:define:YYCTYPE = "unsigned char"; - re2c:define:YYGETCONDITION = "cond"; - re2c:define:YYGETCONDITION:naked = 1; - re2c:define:YYSETCONDITION = "cond = @@;"; - re2c:define:YYSETCONDITION:naked = 1; end = "\x00"; whitespace = [ \t\v]+; @@ -296,73 +291,43 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost | [^'\x00\\] )* "'''"; - // fstring1 -> "'" - ([fF] | [fF][rR] | [rR][fF]) '"' ('\\'[^\x00{}] | [^"\x00\n\\{}])* '{' => fstring1 { - token(yylval.string); RET(TK_FSTRING_START) - } - '}' ('\\'[^\x00{}] | [^"\x00\n\\{}])* '{' { - token_str(yylval.string); RET(TK_FSTRING_MIDDLE) - } - '}' ('\\'[^\x00{}] | [^"\x00\n\\{}])* '"' => init { - token(yylval.string); RET(TK_FSTRING_END) - } - // fstring2 -> "'" - ([fF] | [fF][rR] | [rR][fF]) "'" ("\\"[^\x00{}] | [^'\x00\n\\{}])* '{' => fstring2 { - token(yylval.string); RET(TK_FSTRING_START) - } - '}' ("\\"[^\x00{}] | [^'\x00\n\\{}])* '{' { - token_str(yylval.string); RET(TK_FSTRING_MIDDLE) - } - '}' ("\\"[^\x00{}] | [^'\x00\n\\{}])* "'" => init { - token(yylval.string); RET(TK_FSTRING_END) - } - // fstring3 -> '"""' - ([fF] | [fF][rR] | [rR][fF]) '"""' ( '\\'[^\x00{}] - | ('"' | '"' '\\'+ '"' | '"' '\\'+) [^"\x00\\{}] - | ('""' | '""' '\\'+) [^"\x00\\{}] - | [^"\x00\\{}] )* '{' => fstring3 { - token(yylval.string); RET(TK_FSTRING_START) - } - '}' ( '\\'[^\x00{}] + fstring_format1 = ('\\'[^\x00{}] | [^"\x00\n\\{}])*; + fstring_format2 = ("\\"[^\x00{}] | [^'\x00\n\\{}])*; + fstring_format3 = ( '\\'[^\x00{}] | ('"' | '"' '\\'+ '"' | '"' '\\'+) [^"\x00\\{}] | ('""' | '""' '\\'+) [^"\x00\\{}] - | [^"\x00\\{}] )* '{' { - token_str(yylval.string); RET(TK_FSTRING_MIDDLE) - } - '}' ( '\\'[^\x00{}] - | ('"' | '"' '\\'+ '"' | '"' '\\'+) [^"\x00\\{}] - | ('""' | '""' '\\'+) [^"\x00\\{}] - | [^"\x00\\{}] )* '"""' => init { - token(yylval.string); RET(TK_FSTRING_END) - } - // fstring4 -> "'''" - ([fF] | [fF][rR] | [rR][fF]) "'''" ( "\\"[^\x00{}] + | [^"\x00\\{}] )*; + fstring_format4 = ( "\\"[^\x00{}] | ("'" | "'" "\\"+ "'" | "'" "\\"+) [^'\x00\\{}] | ("''" | "''" "\\"+) [^'\x00\\{}] - | [^'\x00\\{}] )* '{' => fstring4 { - token(yylval.string); RET(TK_FSTRING_START) + | [^'\x00\\{}] )*; + + fstring_prefix = ([fF] | [fF][rR] | [rR][fF]); + fstring_start1 = fstring_prefix '"' fstring_format1 '{'; + fstring_start2 = fstring_prefix "'" fstring_format2 '{'; + fstring_start3 = fstring_prefix '"""' fstring_format3 '{'; + fstring_start4 = fstring_prefix "'''" fstring_format4 '{'; + + fstring_start1 { + fstring_flag = 1; token(yylval.string); RET(TK_FSTRING_START) } - '}' ( "\\"[^\x00{}] - | ("'" | "'" "\\"+ "'" | "'" "\\"+) [^'\x00\\{}] - | ("''" | "''" "\\"+) [^'\x00\\{}] - | [^'\x00\\{}] )* '{' { - token_str(yylval.string); RET(TK_FSTRING_MIDDLE) + fstring_start2 { + fstring_flag = 2; token(yylval.string); RET(TK_FSTRING_START) } - '}' ( "\\"[^\x00{}] - | ("'" | "'" "\\"+ "'" | "'" "\\"+) [^'\x00\\{}] - | ("''" | "''" "\\"+) [^'\x00\\{}] - | [^'\x00\\{}] )* "'''" => init { - token(yylval.string); RET(TK_FSTRING_END) + fstring_start3 { + fstring_flag = 3; token(yylval.string); RET(TK_FSTRING_START) } - - + fstring_start4 { + fstring_flag = 4; token(yylval.string); RET(TK_FSTRING_START) + } + type_ignore = "#" whitespace? "type:" whitespace? "ignore" [^\n\x00]*; type_comment = "#" whitespace? "type:" whitespace? [^\n\x00]*; comment = "#" [^\n\x00]*; // docstring = newline whitespace? string1 | string2; ws_comment = whitespace? comment? newline; - <*> * { token_loc(loc); + * { token_loc(loc); std::string t = token(); throw parser_local::TokenizerError(diag::Diagnostic( "Token '" + t + "' is not recognized", @@ -372,7 +337,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost ); } - <*> end { + end { token_loc(loc); if(parenlevel) { throw parser_local::TokenizerError( @@ -381,7 +346,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost RET(END_OF_FILE); } - <*> whitespace { + whitespace { if(cur[0] == '#') { continue; } if(last_token == yytokentype::TK_NEWLINE && cur[0] == '\n') { continue; @@ -432,40 +397,40 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost } // Keywords - <*> "as" { KW(AS) } - <*> "assert" { KW(ASSERT) } - <*> "async" { KW(ASYNC) } - <*> "await" { KW(AWAIT) } - <*> "break" { KW(BREAK) } - <*> "class" { KW(CLASS) } - <*> "continue" { KW(CONTINUE) } - <*> "def" { KW(DEF) } - <*> "del" { KW(DEL) } - <*> "elif" { KW(ELIF) } - <*> "else" { KW(ELSE) } - <*> "except" { KW(EXCEPT) } - <*> "finally" { KW(FINALLY) } - <*> "for" { KW(FOR) } - <*> "from" { KW(FROM) } - <*> "global" { KW(GLOBAL) } - <*> "if" { KW(IF) } - <*> "import" { KW(IMPORT) } - <*> "in" { KW(IN) } - <*> "is" { KW(IS) } - <*> "lambda" { KW(LAMBDA) } - <*> "None" { KW(NONE) } - <*> "nonlocal" { KW(NONLOCAL) } - <*> "pass" { KW(PASS) } - <*> "raise" { KW(RAISE) } - <*> "return" { KW(RETURN) } - <*> "try" { KW(TRY) } - <*> "while" { KW(WHILE) } - <*> "with" { KW(WITH) } - <*> "yield" { KW(YIELD) } - <*> "yield" whitespace "from" whitespace { KW(YIELD_FROM) } + "as" { KW(AS) } + "assert" { KW(ASSERT) } + "async" { KW(ASYNC) } + "await" { KW(AWAIT) } + "break" { KW(BREAK) } + "class" { KW(CLASS) } + "continue" { KW(CONTINUE) } + "def" { KW(DEF) } + "del" { KW(DEL) } + "elif" { KW(ELIF) } + "else" { KW(ELSE) } + "except" { KW(EXCEPT) } + "finally" { KW(FINALLY) } + "for" { KW(FOR) } + "from" { KW(FROM) } + "global" { KW(GLOBAL) } + "if" { KW(IF) } + "import" { KW(IMPORT) } + "in" { KW(IN) } + "is" { KW(IS) } + "lambda" { KW(LAMBDA) } + "None" { KW(NONE) } + "nonlocal" { KW(NONLOCAL) } + "pass" { KW(PASS) } + "raise" { KW(RAISE) } + "return" { KW(RETURN) } + "try" { KW(TRY) } + "while" { KW(WHILE) } + "with" { KW(WITH) } + "yield" { KW(YIELD) } + "yield" whitespace "from" whitespace { KW(YIELD_FROM) } // Soft Keywords - <*> "match" / [^:\n\x00] { + "match" / [^:\n\x00] { if ((last_token == -1 || last_token == yytokentype::TK_DEDENT || last_token == yytokentype::TK_INDENT @@ -484,7 +449,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost RET(TK_NAME); } } - <*> "case" / [^:\n\x00] { + "case" / [^:\n\x00] { if ((last_token == yytokentype::TK_INDENT || last_token == yytokentype::TK_DEDENT) && parenlevel == 0) { @@ -502,7 +467,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost } } - <*> [rR][bB] | [bB][rR] + [rR][bB] | [bB][rR] | [fF][rR] | [rR][fF] | [rR] | [fF] | [bB] | [uU] { @@ -516,7 +481,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost } // Tokens - <*> newline { + newline { if(parenlevel) { continue; } if(cur[0] == '#') { RET(TK_NEWLINE); } if (last_token == yytokentype::TK_COLON @@ -531,100 +496,106 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost RET(TK_NEWLINE); } - <*> "\\" newline { continue; } + "\\" newline { continue; } // Single character symbols - <*> "(" { token_loc(loc); record_paren(loc, '('); RET(TK_LPAREN) } - <*> "[" { token_loc(loc); record_paren(loc, '['); RET(TK_LBRACKET) } - <*> "{" { token_loc(loc); record_paren(loc, '{'); RET(TK_LBRACE) } - <*> ")" { token_loc(loc); record_paren(loc, ')'); RET(TK_RPAREN) } - <*> "]" { token_loc(loc); record_paren(loc, ']'); RET(TK_RBRACKET) } - <*> "}" { token_loc(loc); record_paren(loc, '}'); RET(TK_RBRACE) } - <*> "+" { RET(TK_PLUS) } - <*> "-" { RET(TK_MINUS) } - <*> "=" { RET(TK_EQUAL) } - <*> ":" { + "(" { token_loc(loc); record_paren(loc, '('); RET(TK_LPAREN) } + "[" { token_loc(loc); record_paren(loc, '['); RET(TK_LBRACKET) } + "{" { token_loc(loc); record_paren(loc, '{'); RET(TK_LBRACE) } + ")" { token_loc(loc); record_paren(loc, ')'); RET(TK_RPAREN) } + "]" { token_loc(loc); record_paren(loc, ']'); RET(TK_RBRACKET) } + "}" { + if(fstring_flag >= 1){ + return lex_fstring(loc, cur, yylval); + }else{ + token_loc(loc); record_paren(loc, '}'); RET(TK_RBRACE) + } + } + "+" { RET(TK_PLUS) } + "-" { RET(TK_MINUS) } + "=" { RET(TK_EQUAL) } + ":" { if(cur[0] == '\n' && !parenlevel){ colon_actual_last_token = true; } RET(TK_COLON); } - <*> ";" { RET(TK_SEMICOLON) } - <*> "/" { RET(TK_SLASH) } - <*> "%" { RET(TK_PERCENT) } - <*> "," { RET(TK_COMMA) } - <*> "*" { RET(TK_STAR) } - <*> "|" { RET(TK_VBAR) } - <*> "&" { RET(TK_AMPERSAND) } - <*> "." { RET(TK_DOT) } - <*> "~" { RET(TK_TILDE) } - <*> "^" { RET(TK_CARET) } - <*> "@" { RET(TK_AT) } + ";" { RET(TK_SEMICOLON) } + "/" { RET(TK_SLASH) } + "%" { RET(TK_PERCENT) } + "," { RET(TK_COMMA) } + "*" { RET(TK_STAR) } + "|" { RET(TK_VBAR) } + "&" { RET(TK_AMPERSAND) } + "." { RET(TK_DOT) } + "~" { RET(TK_TILDE) } + "^" { RET(TK_CARET) } + "@" { RET(TK_AT) } // Multiple character symbols - <*> ">>" { RET(TK_RIGHTSHIFT) } - <*> "<<" { RET(TK_LEFTSHIFT) } - <*> "**" { RET(TK_POW) } - <*> "//" { RET(TK_FLOOR_DIV) } - <*> "+=" { RET(TK_PLUS_EQUAL) } - <*> "-=" { RET(TK_MIN_EQUAL) } - <*> "*=" { RET(TK_STAR_EQUAL) } - <*> "/=" { RET(TK_SLASH_EQUAL) } - <*> "%=" { RET(TK_PERCENT_EQUAL) } - <*> "&=" { RET(TK_AMPER_EQUAL) } - <*> "|=" { RET(TK_VBAR_EQUAL) } - <*> "^=" { RET(TK_CARET_EQUAL) } - <*> "@=" { RET(TK_ATEQUAL) } - <*> "->" { RET(TK_RARROW) } - <*> ":=" { RET(TK_COLONEQUAL) } - <*> "..." { RET(TK_ELLIPSIS) } - <*> "<<=" { RET(TK_LEFTSHIFT_EQUAL) } - <*> ">>=" { RET(TK_RIGHTSHIFT_EQUAL) } - <*> "**=" { RET(TK_POW_EQUAL) } - <*> "//=" { RET(TK_DOUBLESLASH_EQUAL) } + ">>" { RET(TK_RIGHTSHIFT) } + "<<" { RET(TK_LEFTSHIFT) } + "**" { RET(TK_POW) } + "//" { RET(TK_FLOOR_DIV) } + "+=" { RET(TK_PLUS_EQUAL) } + "-=" { RET(TK_MIN_EQUAL) } + "*=" { RET(TK_STAR_EQUAL) } + "/=" { RET(TK_SLASH_EQUAL) } + "%=" { RET(TK_PERCENT_EQUAL) } + "&=" { RET(TK_AMPER_EQUAL) } + "|=" { RET(TK_VBAR_EQUAL) } + "^=" { RET(TK_CARET_EQUAL) } + "@=" { RET(TK_ATEQUAL) } + "->" { RET(TK_RARROW) } + ":=" { RET(TK_COLONEQUAL) } + "..." { RET(TK_ELLIPSIS) } + "<<=" { RET(TK_LEFTSHIFT_EQUAL) } + ">>=" { RET(TK_RIGHTSHIFT_EQUAL) } + "**=" { RET(TK_POW_EQUAL) } + "//=" { RET(TK_DOUBLESLASH_EQUAL) } // Relational operators - <*> "==" { RET(TK_EQ) } - <*> "!=" { RET(TK_NE) } - <*> "<" { RET(TK_LT) } - <*> "<=" { RET(TK_LE) } - <*> ">" { RET(TK_GT) } - <*> ">=" { RET(TK_GE) } + "==" { RET(TK_EQ) } + "!=" { RET(TK_NE) } + "<" { RET(TK_LT) } + "<=" { RET(TK_LE) } + ">" { RET(TK_GT) } + ">=" { RET(TK_GE) } // Logical operators - <*> "not" { RET(TK_NOT) } - <*> "and" { RET(TK_AND) } - <*> "or" { RET(TK_OR) } - <*> "is" whitespace "not" whitespace { RET(TK_IS_NOT) } - <*> "is" whitespace? "\\" newline whitespace? "not" whitespace { RET(TK_IS_NOT) } - <*> "not" whitespace "in" "\\" newline { RET(TK_NOT_IN) } - <*> "not" whitespace "in" whitespace { RET(TK_NOT_IN) } - <*> "not" whitespace "in" newline { RET(TK_NOT_IN) } - <*> "not" whitespace? "\\" newline whitespace? "in" "\\" newline { RET(TK_NOT_IN) } - <*> "not" whitespace? "\\" newline whitespace? "in" whitespace { RET(TK_NOT_IN) } + "not" { RET(TK_NOT) } + "and" { RET(TK_AND) } + "or" { RET(TK_OR) } + "is" whitespace "not" whitespace { RET(TK_IS_NOT) } + "is" whitespace? "\\" newline whitespace? "not" whitespace { RET(TK_IS_NOT) } + "not" whitespace "in" "\\" newline { RET(TK_NOT_IN) } + "not" whitespace "in" whitespace { RET(TK_NOT_IN) } + "not" whitespace "in" newline { RET(TK_NOT_IN) } + "not" whitespace? "\\" newline whitespace? "in" "\\" newline { RET(TK_NOT_IN) } + "not" whitespace? "\\" newline whitespace? "in" whitespace { RET(TK_NOT_IN) } // True/False - <*> "True" { RET(TK_TRUE) } - <*> "False" { RET(TK_FALSE) } + "True" { RET(TK_TRUE) } + "False" { RET(TK_FALSE) } - <*> real { + real { yylval.f = std::strtod(remove_underscore(token()).c_str(), 0); RET(TK_REAL) } - <*> integer { + integer { BigInt::BigInt n; token_loc(loc); lex_int(al, tok, cur, n, loc); yylval.n = n.n; RET(TK_INTEGER) } - <*> imag_number { + imag_number { yylval.f = std::strtod(remove_underscore(token()).c_str(), 0); RET(TK_IMAG_NUM) } - <*> type_ignore { + type_ignore { if (last_token == yytokentype::TK_COLON && !parenlevel) { indent = true; } @@ -633,7 +604,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost return yytokentype::TK_TYPE_IGNORE; } - <*> type_comment { + type_comment { if (last_token == yytokentype::TK_COLON && !parenlevel) { indent = true; } @@ -642,7 +613,7 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost return yytokentype::TK_TYPE_COMMENT; } - <*> comment { + comment { if(last_token == -1) { RET(TK_COMMENT); } if(parenlevel) { continue; } line_num++; cur_line=cur; @@ -663,16 +634,105 @@ int Tokenizer::lex(Allocator &al, YYSTYPE &yylval, Location &loc, diag::Diagnost } //docstring { RET(TK_DOCSTRING) } - <*> string1 { token_str(yylval.string); RET(TK_STRING) } - <*> string2 { token_str(yylval.string); RET(TK_STRING) } - <*> string3 { token_str3(yylval.string); RET(TK_STRING) } - <*> string4 { token_str3(yylval.string); RET(TK_STRING) } + string1 { token_str(yylval.string); RET(TK_STRING) } + string2 { token_str(yylval.string); RET(TK_STRING) } + string3 { token_str3(yylval.string); RET(TK_STRING) } + string4 { token_str3(yylval.string); RET(TK_STRING) } - <*> name { token(yylval.string); RET(TK_NAME) } + name { token(yylval.string); RET(TK_NAME) } */ } } +int Tokenizer::lex_fstring(Location &loc, unsigned char * &cur, YYSTYPE &yylval){ + unsigned char *mar; + + /*!re2c + re2c:define:YYCURSOR = cur; + re2c:define:YYMARKER = mar; + re2c:yyfill:enable = 0; + re2c:define:YYCTYPE = "unsigned char"; + + //fstring_format1 = ('\\'[^\x00{}] | [^"\x00\n\\{}])*; + //fstring_format2 = ( "\\"[^\x00{}] + // | ("'" | "'" "\\"+ "'" | "'" "\\"+) [^'\x00\\{}] + // | ("''" | "''" "\\"+) [^'\x00\\{}] + // | [^'\x00\\{}] )*; + */ + switch(fstring_flag){ + case 1: goto fstring1; + case 2: goto fstring2; + case 3: goto fstring3; + case 4: goto fstring4; + default: return -1; + } +fstring1: + /*!re2c + fstring_middle1 = fstring_format1 "{"; + fstring_end1 = fstring_format1 '"'; + + fstring_middle1 { + token_loc(loc); + token_str(yylval.string); + RET(TK_FSTRING_MIDDLE) + } + fstring_end1 { token_loc(loc); fstring_flag = 0; token(yylval.string); RET(TK_FSTRING_END) } + * { goto default_rule; } + */ +fstring2: + /*!re2c + fstring_middle2 = fstring_format2 "{"; + fstring_end2 = fstring_format2 "'"; + fstring_middle2 { + token_loc(loc); + token_str(yylval.string); + RET(TK_FSTRING_MIDDLE) + } + fstring_end2 { token_loc(loc); fstring_flag = 0; token(yylval.string); RET(TK_FSTRING_END) } + * { goto default_rule; } + */ +fstring3: + /*!re2c + fstring_middle3 = fstring_format3 "{"; + fstring_end3 = fstring_format3 '"""'; + fstring_middle3 { + token_loc(loc); + token_str(yylval.string); + RET(TK_FSTRING_MIDDLE) + } + fstring_end3 { token_loc(loc); fstring_flag = 0; token(yylval.string); RET(TK_FSTRING_END) } + * { goto default_rule; } + */ +fstring4: + /*!re2c + fstring_middle4 = fstring_format4 "{"; + fstring_end4 = fstring_format4 "'''"; + fstring_middle4 { + token_loc(loc); + token_str(yylval.string); + RET(TK_FSTRING_MIDDLE) + } + fstring_end4 { token_loc(loc); fstring_flag = 0; token(yylval.string); RET(TK_FSTRING_END) } + * { goto default_rule; } + */ +default_rule: + /*!re2c + * { + token_loc(loc); + std::string t = std::string((char *)tok, cur - tok); + throw parser_local::TokenizerError("Token '" + + t + "' is not recognized in `fstring` statement", loc); + } + end { + token_loc(loc); + std::string t = std::string((char *)tok, cur - tok); + throw parser_local::TokenizerError( + "End of file not expected within `fstring` statement: '" + t + + "'", loc); + } + */ +} + void Tokenizer::lex_match_or_case(Location &loc, unsigned char *cur, bool &is_match_or_case_keyword) { for (;;) { @@ -864,7 +924,6 @@ Result> tokens(Allocator &al, const std::string &input, t.set_string(input, 0); std::vector tst; int token = yytokentype::END_OF_FILE + 1; // Something different from EOF - t.cond = yycinit; while (token != yytokentype::END_OF_FILE) { YYSTYPE y; Location l; diff --git a/tests/fstring1.py b/tests/fstring1.py index 697e400ca8..d631cbca69 100644 --- a/tests/fstring1.py +++ b/tests/fstring1.py @@ -14,5 +14,6 @@ print(Fr''' Something fun! {c} ''') +print(F"""Hello World {b} {c}""") print(r"LEFT " f"RIGHT") print(f"THIS " r"THAT") \ No newline at end of file diff --git a/tests/reference/asr-fstring1-d96758f.json b/tests/reference/asr-fstring1-d96758f.json new file mode 100644 index 0000000000..6b8f60edae --- /dev/null +++ b/tests/reference/asr-fstring1-d96758f.json @@ -0,0 +1,13 @@ +{ + "basename": "asr-fstring1-d96758f", + "cmd": "lpython --show-asr --no-color {infile} -o {outfile}", + "infile": "tests/fstring1.py", + "infile_hash": "ddceb6a09b5bc556ae8d2db18908c8f60af3ee96cc971bed1f5f9ae9", + "outfile": null, + "outfile_hash": null, + "stdout": "asr-fstring1-d96758f.stdout", + "stdout_hash": "c19f804850e02097002f4c40c7a990738d19f28aed1486010de3ee2b", + "stderr": null, + "stderr_hash": null, + "returncode": 0 +} \ No newline at end of file diff --git a/tests/reference/asr-fstring1-d96758f.stdout b/tests/reference/asr-fstring1-d96758f.stdout new file mode 100644 index 0000000000..5ff7707e0a --- /dev/null +++ b/tests/reference/asr-fstring1-d96758f.stdout @@ -0,0 +1,522 @@ +(TranslationUnit + (SymbolTable + 1 + { + __main__: + (Module + (SymbolTable + 2 + { + __main__global_stmts: + (Function + (SymbolTable + 3 + { + + }) + __main__global_stmts + (FunctionType + [] + () + Source + Implementation + () + .false. + .false. + .false. + .false. + .false. + [] + .false. + ) + [] + [] + [(Print + [(StringConcat + (StringConcat + (StringConstant + "" + (Character 1 0 ()) + ) + (StringConcat + (StringConcat + (Cast + (Var 2 b) + IntegerToCharacter + (Character 1 -2 ()) + () + ) + (StringConstant + " + 1 = " + (Character 1 7 ()) + ) + (Character 1 5 ()) + () + ) + (Cast + (Var 2 c) + IntegerToCharacter + (Character 1 -2 ()) + () + ) + (Character 1 3 ()) + () + ) + (Character 1 3 ()) + () + ) + (StringConstant + "" + (Character 1 0 ()) + ) + (Character 1 3 ()) + () + )] + () + () + ) + (Print + [(StringConcat + (StringConcat + (StringConstant + "Say something! " + (Character 1 15 ()) + ) + (Var 2 a) + (Character 1 13 ()) + () + ) + (StringConstant + "" + (Character 1 0 ()) + ) + (Character 1 13 ()) + () + )] + () + () + ) + (Print + [(StringConcat + (StringConcat + (StringConstant + "do some calculation: " + (Character 1 21 ()) + ) + (Cast + (IntegerBinOp + (IntegerBinOp + (Var 2 b) + Mul + (IntegerConstant 7 (Integer 4)) + (Integer 4) + () + ) + Add + (Var 2 c) + (Integer 4) + () + ) + IntegerToCharacter + (Character 1 -2 ()) + () + ) + (Character 1 19 ()) + () + ) + (StringConstant + "" + (Character 1 0 ()) + ) + (Character 1 19 ()) + () + )] + () + () + ) + (Print + [(StringConcat + (StringConcat + (StringConcat + (StringConstant + "9..." + (Character 1 4 ()) + ) + (StringConstant + "" + (Character 1 0 ()) + ) + (Character 1 4 ()) + (StringConstant + "9..." + (Character 1 4 ()) + ) + ) + (StringConcat + (StringConcat + (Cast + (Var 2 b) + IntegerToCharacter + (Character 1 -2 ()) + () + ) + (StringConstant + "..." + (Character 1 3 ()) + ) + (Character 1 1 ()) + () + ) + (Cast + (Var 2 c) + IntegerToCharacter + (Character 1 -2 ()) + () + ) + (Character 1 -1 ()) + () + ) + (Character 1 3 ()) + () + ) + (StringConstant + "" + (Character 1 0 ()) + ) + (Character 1 3 ()) + () + )] + () + () + ) + (Print + [(StringConcat + (StringConcat + (StringConcat + (StringConstant + "" + (Character 1 0 ()) + ) + (StringConcat + (StringConcat + (Cast + (Var 2 b) + IntegerToCharacter + (Character 1 -2 ()) + () + ) + (StringConstant + "..." + (Character 1 3 ()) + ) + (Character 1 1 ()) + () + ) + (Cast + (Var 2 c) + IntegerToCharacter + (Character 1 -2 ()) + () + ) + (Character 1 -1 ()) + () + ) + (Character 1 -1 ()) + () + ) + (StringConstant + "..." + (Character 1 3 ()) + ) + (Character 1 2 ()) + () + ) + (StringConstant + "12" + (Character 1 2 ()) + ) + (Character 1 4 ()) + () + )] + () + () + ) + (Print + [(StringConcat + (StringConcat + (StringConcat + (StringConcat + (StringConcat + (StringConstant + "" + (Character 1 0 ()) + ) + (Cast + (Var 2 b) + IntegerToCharacter + (Character 1 -2 ()) + () + ) + (Character 1 -2 ()) + () + ) + (StringConstant + " " + (Character 1 1 ()) + ) + (Character 1 -1 ()) + () + ) + (StringConstant + "" + (Character 1 0 ()) + ) + (Character 1 -1 ()) + () + ) + (Cast + (Var 2 c) + IntegerToCharacter + (Character 1 -2 ()) + () + ) + (Character 1 -3 ()) + () + ) + (StringConstant + "" + (Character 1 0 ()) + ) + (Character 1 -3 ()) + () + )] + () + () + ) + (Print + [(StringConcat + (StringConcat + (StringConstant + "Hello! " + (Character 1 7 ()) + ) + (Var 2 a) + (Character 1 5 ()) + () + ) + (StringConstant + "" + (Character 1 0 ()) + ) + (Character 1 5 ()) + () + )] + () + () + ) + (Print + [(StringConcat + (StringConcat + (StringConstant + "\n Something fun! " + (Character 1 17 ()) + ) + (StringRepeat + (Var 2 a) + (IntegerConstant 2 (Integer 4)) + (Character 1 0 ()) + () + ) + (Character 1 17 ()) + () + ) + (StringConstant + "\n" + (Character 1 1 ()) + ) + (Character 1 18 ()) + () + )] + () + () + ) + (Print + [(StringConcat + (StringConcat + (StringConstant + "\n Something fun! " + (Character 1 17 ()) + ) + (Cast + (Var 2 c) + IntegerToCharacter + (Character 1 -2 ()) + () + ) + (Character 1 15 ()) + () + ) + (StringConstant + "\n" + (Character 1 1 ()) + ) + (Character 1 16 ()) + () + )] + () + () + ) + (Print + [(StringConcat + (StringConcat + (StringConstant + "Hello World " + (Character 1 12 ()) + ) + (StringConcat + (StringConcat + (Cast + (Var 2 b) + IntegerToCharacter + (Character 1 -2 ()) + () + ) + (StringConstant + " " + (Character 1 1 ()) + ) + (Character 1 -1 ()) + () + ) + (Cast + (Var 2 c) + IntegerToCharacter + (Character 1 -2 ()) + () + ) + (Character 1 -3 ()) + () + ) + (Character 1 9 ()) + () + ) + (StringConstant + "" + (Character 1 0 ()) + ) + (Character 1 9 ()) + () + )] + () + () + ) + (Print + [(StringConstant + "LEFT RIGHT" + (Character 1 10 ()) + )] + () + () + ) + (Print + [(StringConstant + "THIS THAT" + (Character 1 9 ()) + )] + () + () + )] + () + Public + .false. + .false. + () + ), + a: + (Variable + 2 + a + [] + Local + (StringConstant + "FooBar" + (Character 1 6 ()) + ) + (StringConstant + "FooBar" + (Character 1 6 ()) + ) + Default + (Character 1 -2 ()) + () + Source + Public + Required + .false. + ), + b: + (Variable + 2 + b + [] + Local + (IntegerConstant 10 (Integer 4)) + (IntegerConstant 10 (Integer 4)) + Default + (Integer 4) + () + Source + Public + Required + .false. + ), + c: + (Variable + 2 + c + [] + Local + (IntegerConstant 11 (Integer 4)) + (IntegerConstant 11 (Integer 4)) + Default + (Integer 4) + () + Source + Public + Required + .false. + ) + }) + __main__ + [] + .false. + .false. + ), + main_program: + (Program + (SymbolTable + 4 + { + __main__global_stmts: + (ExternalSymbol + 4 + __main__global_stmts + 2 __main__global_stmts + __main__ + [] + __main__global_stmts + Public + ) + }) + main_program + [__main__] + [(SubroutineCall + 4 __main__global_stmts + 2 __main__global_stmts + [] + () + )] + ) + }) + [] +) diff --git a/tests/reference/ast-fstring1-8d426c9.json b/tests/reference/ast-fstring1-8d426c9.json new file mode 100644 index 0000000000..a21562e47c --- /dev/null +++ b/tests/reference/ast-fstring1-8d426c9.json @@ -0,0 +1,13 @@ +{ + "basename": "ast-fstring1-8d426c9", + "cmd": "lpython --show-ast --no-color {infile} -o {outfile}", + "infile": "tests/fstring1.py", + "infile_hash": "ddceb6a09b5bc556ae8d2db18908c8f60af3ee96cc971bed1f5f9ae9", + "outfile": null, + "outfile_hash": null, + "stdout": "ast-fstring1-8d426c9.stdout", + "stdout_hash": "c85f68a44e23e21912b4bd4d8e0e21d5542d76879ade0ae2dbd362d0", + "stderr": null, + "stderr_hash": null, + "returncode": 0 +} \ No newline at end of file diff --git a/tests/reference/ast-fstring1-8d426c9.stdout b/tests/reference/ast-fstring1-8d426c9.stdout new file mode 100644 index 0000000000..7ade8e6612 --- /dev/null +++ b/tests/reference/ast-fstring1-8d426c9.stdout @@ -0,0 +1,445 @@ +(Module + [(AnnAssign + (Name + a + Store + ) + (Name + str + Load + ) + (ConstantStr + "FooBar" + () + ) + 1 + ) + (AnnAssign + (Name + b + Store + ) + (Name + i32 + Load + ) + (ConstantInt + 10 + () + ) + 1 + ) + (AnnAssign + (Name + c + Store + ) + (Name + i32 + Load + ) + (ConstantInt + 11 + () + ) + 1 + ) + (Expr + (Call + (Name + print + Load + ) + [(JoinedStr + [(ConstantStr + "" + () + ) + (JoinedStr + [(FormattedValue + (Name + b + Load + ) + -1 + () + ) + (ConstantStr + " + 1 = " + () + ) + (FormattedValue + (Name + c + Load + ) + -1 + () + )] + ) + (ConstantStr + "" + () + )] + )] + [] + ) + ) + (Expr + (Call + (Name + print + Load + ) + [(JoinedStr + [(ConstantStr + "Say something! " + () + ) + (FormattedValue + (Name + a + Load + ) + -1 + () + ) + (ConstantStr + "" + () + )] + )] + [] + ) + ) + (Expr + (Call + (Name + print + Load + ) + [(JoinedStr + [(ConstantStr + "do some calculation: " + () + ) + (FormattedValue + (BinOp + (BinOp + (Name + b + Load + ) + Mult + (ConstantInt + 7 + () + ) + ) + Add + (Name + c + Load + ) + ) + -1 + () + ) + (ConstantStr + "" + () + )] + )] + [] + ) + ) + (Expr + (Call + (Name + print + Load + ) + [(JoinedStr + [(ConstantStr + "9..." + () + ) + (ConstantStr + "" + () + ) + (JoinedStr + [(FormattedValue + (Name + b + Load + ) + -1 + () + ) + (ConstantStr + "..." + () + ) + (FormattedValue + (Name + c + Load + ) + -1 + () + )] + ) + (ConstantStr + "" + () + )] + )] + [] + ) + ) + (Expr + (Call + (Name + print + Load + ) + [(JoinedStr + [(ConstantStr + "" + () + ) + (JoinedStr + [(FormattedValue + (Name + b + Load + ) + -1 + () + ) + (ConstantStr + "..." + () + ) + (FormattedValue + (Name + c + Load + ) + -1 + () + )] + ) + (ConstantStr + "..." + () + ) + (ConstantStr + "12" + () + )] + )] + [] + ) + ) + (Expr + (Call + (Name + print + Load + ) + [(JoinedStr + [(ConstantStr + "" + () + ) + (FormattedValue + (Name + b + Load + ) + -1 + () + ) + (ConstantStr + " " + () + ) + (ConstantStr + "" + () + ) + (FormattedValue + (Name + c + Load + ) + -1 + () + ) + (ConstantStr + "" + () + )] + )] + [] + ) + ) + (Expr + (Call + (Name + print + Load + ) + [(JoinedStr + [(ConstantStr + "Hello! " + () + ) + (FormattedValue + (Name + a + Load + ) + -1 + () + ) + (ConstantStr + "" + () + )] + )] + [] + ) + ) + (Expr + (Call + (Name + print + Load + ) + [(JoinedStr + [(ConstantStr + "\n Something fun! " + () + ) + (FormattedValue + (BinOp + (Name + a + Load + ) + Mult + (ConstantInt + 2 + () + ) + ) + -1 + () + ) + (ConstantStr + "\n" + () + )] + )] + [] + ) + ) + (Expr + (Call + (Name + print + Load + ) + [(JoinedStr + [(ConstantStr + "\n Something fun! " + () + ) + (FormattedValue + (Name + c + Load + ) + -1 + () + ) + (ConstantStr + "\n" + () + )] + )] + [] + ) + ) + (Expr + (Call + (Name + print + Load + ) + [(JoinedStr + [(ConstantStr + "Hello World " + () + ) + (JoinedStr + [(FormattedValue + (Name + b + Load + ) + -1 + () + ) + (ConstantStr + " " + () + ) + (FormattedValue + (Name + c + Load + ) + -1 + () + )] + ) + (ConstantStr + "" + () + )] + )] + [] + ) + ) + (Expr + (Call + (Name + print + Load + ) + [(ConstantStr + "LEFT RIGHT" + () + )] + [] + ) + ) + (Expr + (Call + (Name + print + Load + ) + [(ConstantStr + "THIS THAT" + () + )] + [] + ) + )] + [] +) diff --git a/tests/reference/runtime-fstring1-20a9192.json b/tests/reference/runtime-fstring1-20a9192.json new file mode 100644 index 0000000000..d429811eab --- /dev/null +++ b/tests/reference/runtime-fstring1-20a9192.json @@ -0,0 +1,13 @@ +{ + "basename": "runtime-fstring1-20a9192", + "cmd": "lpython {infile}", + "infile": "tests/fstring1.py", + "infile_hash": "ddceb6a09b5bc556ae8d2db18908c8f60af3ee96cc971bed1f5f9ae9", + "outfile": null, + "outfile_hash": null, + "stdout": "runtime-fstring1-20a9192.stdout", + "stdout_hash": "2c65ae0b9a4e18369a1421c44c5269ddd5a2c57e86f598b1a35ef4b1", + "stderr": null, + "stderr_hash": null, + "returncode": 0 +} \ No newline at end of file diff --git a/tests/reference/runtime-fstring1-20a9192.stdout b/tests/reference/runtime-fstring1-20a9192.stdout new file mode 100644 index 0000000000..f5d29ef6fc --- /dev/null +++ b/tests/reference/runtime-fstring1-20a9192.stdout @@ -0,0 +1,16 @@ +10 + 1 = 11 +Say something! FooBar +do some calculation: 81 +9...10...11 +10...11...12 +10 11 +Hello! FooBar + + Something fun! FooBarFooBar + + + Something fun! 11 + +Hello World 10 11 +LEFT RIGHT +THIS THAT diff --git a/tests/tests.toml b/tests/tests.toml index 4f048e910b..0f15500c27 100644 --- a/tests/tests.toml +++ b/tests/tests.toml @@ -142,6 +142,12 @@ asr = true llvm = true llvm_dbg = true +[[test]] +filename = "fstring1.py" +ast = true +asr = true +run = true + [[test]] filename = "../integration_tests/array_01_decl.py" asr = true