Skip to content

Commit

Permalink
starlark syntax: add syntax.StringLiteral.unquote
Browse files Browse the repository at this point in the history
This is the dual function to to repr(str).

Also, improve "unclosed string literal" error message.

PiperOrigin-RevId: 338258854
  • Loading branch information
adonovan authored and copybara-github committed Oct 21, 2020
1 parent 31b9b9e commit f056dd1
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 16 deletions.
12 changes: 6 additions & 6 deletions src/main/java/net/starlark/java/syntax/Lexer.java
Original file line number Diff line number Diff line change
Expand Up @@ -257,14 +257,14 @@ private void escapedStringLiteral(char quot, boolean isRaw) {
literal.append(c);
break;
} else {
error("unterminated string literal at eol", literalStartPos);
error("unclosed string literal", literalStartPos);
setToken(TokenKind.STRING, literalStartPos, pos);
setValue(literal.toString());
return;
}
case '\\':
if (pos == buffer.length) {
error("unterminated string literal at eof", literalStartPos);
error("unclosed string literal", literalStartPos);
setToken(TokenKind.STRING, literalStartPos, pos);
setValue(literal.toString());
return;
Expand Down Expand Up @@ -389,7 +389,7 @@ private void escapedStringLiteral(char quot, boolean isRaw) {
break;
}
}
error("unterminated string literal at eof", literalStartPos);
error("unclosed string literal", literalStartPos);
setToken(TokenKind.STRING, literalStartPos, pos);
setValue(literal.toString());
}
Expand Down Expand Up @@ -420,7 +420,7 @@ private void stringLiteral(char quot, boolean isRaw) {
char c = buffer[pos++];
switch (c) {
case '\n':
error("unterminated string literal at eol", literalStartPos);
error("unclosed string literal", literalStartPos);
setToken(TokenKind.STRING, literalStartPos, pos);
setValue(bufferSlice(contentStartPos, pos - 1));
return;
Expand Down Expand Up @@ -455,12 +455,12 @@ private void stringLiteral(char quot, boolean isRaw) {
}

// If the current position is beyond the end of the file, need to move it backwards
// Possible if the file ends with `r"\` (unterminated raw string literal with a backslash)
// Possible if the file ends with `r"\` (unclosed raw string literal with a backslash)
if (pos > buffer.length) {
pos = buffer.length;
}

error("unterminated string literal at eof", literalStartPos);
error("unclosed string literal", literalStartPos);
setToken(TokenKind.STRING, literalStartPos, pos);
setValue(bufferSlice(contentStartPos, pos));
}
Expand Down
23 changes: 23 additions & 0 deletions src/main/java/net/starlark/java/syntax/StringLiteral.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.
package net.starlark.java.syntax;

import java.util.ArrayList;

/** Syntax node for a string literal. */
public final class StringLiteral extends Expression {

Expand Down Expand Up @@ -69,6 +71,27 @@ public Object getFileLocations() {
return locs;
}

/**
* Returns the value denoted by the Starlark string literal within s.
*
* @throws IllegalArgumentException if s does not contain a valid string literal.
*/
public static String unquote(String s) {
// TODO(adonovan): once we have byte compilation, make this function
// independent of the Lexer, which should only validate string literals
// but not unquote them. Clients (e.g. the compiler) can unquote on demand.
ArrayList<SyntaxError> errors = new ArrayList<>();
Lexer lexer = new Lexer(ParserInput.fromLines(s), FileOptions.DEFAULT, errors);
lexer.nextToken();
if (!errors.isEmpty()) {
throw new IllegalArgumentException(errors.get(0).message());
}
if (lexer.start != 0 || lexer.end != s.length() || lexer.kind != TokenKind.STRING) {
throw new IllegalArgumentException("invalid syntax");
}
return (String) lexer.value;
}

/** Constructs a StringLiteral from its serialized components. */
public static StringLiteral fromSerialization(
Object fileLocations, int startOffset, String value, int endOffset) {
Expand Down
53 changes: 43 additions & 10 deletions src/test/java/net/starlark/java/syntax/LexerTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
package net.starlark.java.syntax;

import static com.google.common.truth.Truth.assertThat;
import static org.junit.Assert.assertThrows;

import com.google.common.base.Joiner;
import java.util.ArrayList;
Expand Down Expand Up @@ -328,10 +329,9 @@ public void testRawString() throws Exception {
assertThat(values(tokens("r'a\\\\b'"))).isEqualTo("STRING(a\\\\b) NEWLINE EOF"); // r'a\\b'
assertThat(values(tokens("r'ab'r"))).isEqualTo("STRING(ab) IDENTIFIER(r) NEWLINE EOF");

// Unterminated raw string
// Unclosed raw string
values(tokens("r'\\'")); // r'\'
assertThat(lastError.toString())
.isEqualTo("/some/path.txt:1: unterminated string literal at eof");
assertThat(lastError.toString()).isEqualTo("/some/path.txt:1: unclosed string literal");
}

@Test
Expand All @@ -342,10 +342,9 @@ public void testTripleRawString() throws Exception {
// cd"""
assertThat(values(tokens("\"\"\"ab\ncd\"\"\""))).isEqualTo("STRING(ab\ncd) NEWLINE EOF");

// Unterminated raw string
// Unclosed raw string
values(tokens("r'''\\'''")); // r'''\'''
assertThat(lastError.toString())
.isEqualTo("/some/path.txt:1: unterminated string literal at eof");
assertThat(lastError.toString()).isEqualTo("/some/path.txt:1: unclosed string literal");
}

@Test
Expand Down Expand Up @@ -512,17 +511,17 @@ public void testContainsErrors() throws Exception {
allTokens(lexerFail);
assertThat(errors).isNotEmpty();

String s = "'unterminated";
String s = "'unclosed";
lexerFail = createLexer(s);
allTokens(lexerFail);
assertThat(errors).isNotEmpty();
assertThat(values(tokens(s))).isEqualTo("STRING(unterminated) NEWLINE EOF");
assertThat(values(tokens(s))).isEqualTo("STRING(unclosed) NEWLINE EOF");
}

@Test
public void testUnterminatedRawStringWithEscapingError() throws Exception {
public void testUnclosedRawStringWithEscapingError() throws Exception {
assertThat(names(tokens("r'\\"))).isEqualTo("STRING NEWLINE EOF");
assertThat(lastError).isEqualTo("/some/path.txt:1: unterminated string literal at eof");
assertThat(lastError).isEqualTo("/some/path.txt:1: unclosed string literal");
}

@Test
Expand Down Expand Up @@ -553,4 +552,38 @@ static SyntaxError assertContainsError(List<SyntaxError> errors, String substr)
"error '" + substr + "' not found, but got these:\n" + Joiner.on("\n").join(errors));
}
}

@Test
public void testStringLiteralUnquote() {
// Coverage here needn't be exhaustive,
// as the underlying logic is that of the Lexer.
assertUnquoteEquals("'hello'", "hello");
assertUnquoteEquals("\"hello\"", "hello");
assertUnquoteEquals("r'a\\b\"c'", "a\\b\"c");

assertUnquoteError("", "invalid syntax"); // empty
assertUnquoteError(" 'hello'", "invalid syntax"); // leading space
assertUnquoteError("'hello' ", "invalid syntax"); // trailing space
assertUnquoteError("x", "invalid syntax"); // identifier
assertUnquoteError("r", "invalid syntax"); // identifier (same prefix as r'...')
assertUnquoteError("r2", "invalid syntax"); // identifier
assertUnquoteError("1", "invalid syntax"); // number
assertUnquoteError("'", "unclosed string literal");
assertUnquoteError("\"", "unclosed string literal");
assertUnquoteError("'abc", "unclosed string literal");
assertUnquoteError(
"'\\g'",
"invalid escape sequence: \\g. You can enable unknown escape sequences by passing the flag"
+ " --incompatible_restrict_string_escapes=false"); // this temporary hint is a lie
}

private static void assertUnquoteEquals(String literal, String value) {
assertThat(StringLiteral.unquote(literal)).isEqualTo(value);
}

private static void assertUnquoteError(String badLiteral, String errorSubstring) {
IllegalArgumentException ex =
assertThrows(IllegalArgumentException.class, () -> StringLiteral.unquote(badLiteral));
assertThat(ex).hasMessageThat().contains(errorSubstring);
}
}

0 comments on commit f056dd1

Please sign in to comment.