diff --git a/src/main/java/net/starlark/java/syntax/Lexer.java b/src/main/java/net/starlark/java/syntax/Lexer.java index e957c469a00dfd..751c3b3efac63b 100644 --- a/src/main/java/net/starlark/java/syntax/Lexer.java +++ b/src/main/java/net/starlark/java/syntax/Lexer.java @@ -257,14 +257,14 @@ private void escapedStringLiteral(char quot, boolean isRaw) { literal.append(c); break; } else { - error("unterminated string literal at eol", literalStartPos); + error("unclosed string literal", literalStartPos); setToken(TokenKind.STRING, literalStartPos, pos); setValue(literal.toString()); return; } case '\\': if (pos == buffer.length) { - error("unterminated string literal at eof", literalStartPos); + error("unclosed string literal", literalStartPos); setToken(TokenKind.STRING, literalStartPos, pos); setValue(literal.toString()); return; @@ -389,7 +389,7 @@ private void escapedStringLiteral(char quot, boolean isRaw) { break; } } - error("unterminated string literal at eof", literalStartPos); + error("unclosed string literal", literalStartPos); setToken(TokenKind.STRING, literalStartPos, pos); setValue(literal.toString()); } @@ -420,7 +420,7 @@ private void stringLiteral(char quot, boolean isRaw) { char c = buffer[pos++]; switch (c) { case '\n': - error("unterminated string literal at eol", literalStartPos); + error("unclosed string literal", literalStartPos); setToken(TokenKind.STRING, literalStartPos, pos); setValue(bufferSlice(contentStartPos, pos - 1)); return; @@ -455,12 +455,12 @@ private void stringLiteral(char quot, boolean isRaw) { } // If the current position is beyond the end of the file, need to move it backwards - // Possible if the file ends with `r"\` (unterminated raw string literal with a backslash) + // Possible if the file ends with `r"\` (unclosed raw string literal with a backslash) if (pos > buffer.length) { pos = buffer.length; } - error("unterminated string literal at eof", literalStartPos); + error("unclosed string literal", literalStartPos); setToken(TokenKind.STRING, literalStartPos, pos); setValue(bufferSlice(contentStartPos, pos)); } diff --git a/src/main/java/net/starlark/java/syntax/StringLiteral.java b/src/main/java/net/starlark/java/syntax/StringLiteral.java index c30b9c718b2938..0facdd6abd1f42 100644 --- a/src/main/java/net/starlark/java/syntax/StringLiteral.java +++ b/src/main/java/net/starlark/java/syntax/StringLiteral.java @@ -13,6 +13,8 @@ // limitations under the License. package net.starlark.java.syntax; +import java.util.ArrayList; + /** Syntax node for a string literal. */ public final class StringLiteral extends Expression { @@ -69,6 +71,27 @@ public Object getFileLocations() { return locs; } + /** + * Returns the value denoted by the Starlark string literal within s. + * + * @throws IllegalArgumentException if s does not contain a valid string literal. + */ + public static String unquote(String s) { + // TODO(adonovan): once we have byte compilation, make this function + // independent of the Lexer, which should only validate string literals + // but not unquote them. Clients (e.g. the compiler) can unquote on demand. + ArrayList errors = new ArrayList<>(); + Lexer lexer = new Lexer(ParserInput.fromLines(s), FileOptions.DEFAULT, errors); + lexer.nextToken(); + if (!errors.isEmpty()) { + throw new IllegalArgumentException(errors.get(0).message()); + } + if (lexer.start != 0 || lexer.end != s.length() || lexer.kind != TokenKind.STRING) { + throw new IllegalArgumentException("invalid syntax"); + } + return (String) lexer.value; + } + /** Constructs a StringLiteral from its serialized components. */ public static StringLiteral fromSerialization( Object fileLocations, int startOffset, String value, int endOffset) { diff --git a/src/test/java/net/starlark/java/syntax/LexerTest.java b/src/test/java/net/starlark/java/syntax/LexerTest.java index de6c909deacb50..7c71c0922417c1 100644 --- a/src/test/java/net/starlark/java/syntax/LexerTest.java +++ b/src/test/java/net/starlark/java/syntax/LexerTest.java @@ -14,6 +14,7 @@ package net.starlark.java.syntax; import static com.google.common.truth.Truth.assertThat; +import static org.junit.Assert.assertThrows; import com.google.common.base.Joiner; import java.util.ArrayList; @@ -328,10 +329,9 @@ public void testRawString() throws Exception { assertThat(values(tokens("r'a\\\\b'"))).isEqualTo("STRING(a\\\\b) NEWLINE EOF"); // r'a\\b' assertThat(values(tokens("r'ab'r"))).isEqualTo("STRING(ab) IDENTIFIER(r) NEWLINE EOF"); - // Unterminated raw string + // Unclosed raw string values(tokens("r'\\'")); // r'\' - assertThat(lastError.toString()) - .isEqualTo("/some/path.txt:1: unterminated string literal at eof"); + assertThat(lastError.toString()).isEqualTo("/some/path.txt:1: unclosed string literal"); } @Test @@ -342,10 +342,9 @@ public void testTripleRawString() throws Exception { // cd""" assertThat(values(tokens("\"\"\"ab\ncd\"\"\""))).isEqualTo("STRING(ab\ncd) NEWLINE EOF"); - // Unterminated raw string + // Unclosed raw string values(tokens("r'''\\'''")); // r'''\''' - assertThat(lastError.toString()) - .isEqualTo("/some/path.txt:1: unterminated string literal at eof"); + assertThat(lastError.toString()).isEqualTo("/some/path.txt:1: unclosed string literal"); } @Test @@ -512,17 +511,17 @@ public void testContainsErrors() throws Exception { allTokens(lexerFail); assertThat(errors).isNotEmpty(); - String s = "'unterminated"; + String s = "'unclosed"; lexerFail = createLexer(s); allTokens(lexerFail); assertThat(errors).isNotEmpty(); - assertThat(values(tokens(s))).isEqualTo("STRING(unterminated) NEWLINE EOF"); + assertThat(values(tokens(s))).isEqualTo("STRING(unclosed) NEWLINE EOF"); } @Test - public void testUnterminatedRawStringWithEscapingError() throws Exception { + public void testUnclosedRawStringWithEscapingError() throws Exception { assertThat(names(tokens("r'\\"))).isEqualTo("STRING NEWLINE EOF"); - assertThat(lastError).isEqualTo("/some/path.txt:1: unterminated string literal at eof"); + assertThat(lastError).isEqualTo("/some/path.txt:1: unclosed string literal"); } @Test @@ -553,4 +552,38 @@ static SyntaxError assertContainsError(List errors, String substr) "error '" + substr + "' not found, but got these:\n" + Joiner.on("\n").join(errors)); } } + + @Test + public void testStringLiteralUnquote() { + // Coverage here needn't be exhaustive, + // as the underlying logic is that of the Lexer. + assertUnquoteEquals("'hello'", "hello"); + assertUnquoteEquals("\"hello\"", "hello"); + assertUnquoteEquals("r'a\\b\"c'", "a\\b\"c"); + + assertUnquoteError("", "invalid syntax"); // empty + assertUnquoteError(" 'hello'", "invalid syntax"); // leading space + assertUnquoteError("'hello' ", "invalid syntax"); // trailing space + assertUnquoteError("x", "invalid syntax"); // identifier + assertUnquoteError("r", "invalid syntax"); // identifier (same prefix as r'...') + assertUnquoteError("r2", "invalid syntax"); // identifier + assertUnquoteError("1", "invalid syntax"); // number + assertUnquoteError("'", "unclosed string literal"); + assertUnquoteError("\"", "unclosed string literal"); + assertUnquoteError("'abc", "unclosed string literal"); + assertUnquoteError( + "'\\g'", + "invalid escape sequence: \\g. You can enable unknown escape sequences by passing the flag" + + " --incompatible_restrict_string_escapes=false"); // this temporary hint is a lie + } + + private static void assertUnquoteEquals(String literal, String value) { + assertThat(StringLiteral.unquote(literal)).isEqualTo(value); + } + + private static void assertUnquoteError(String badLiteral, String errorSubstring) { + IllegalArgumentException ex = + assertThrows(IllegalArgumentException.class, () -> StringLiteral.unquote(badLiteral)); + assertThat(ex).hasMessageThat().contains(errorSubstring); + } }