Skip to content

Commit

Permalink
Rewrite parser and fix escaped quotes in multiline values (#322)
Browse files Browse the repository at this point in the history
  • Loading branch information
GrahamCampbell authored Jan 26, 2019
1 parent 2ecc2da commit 21c540a
Show file tree
Hide file tree
Showing 9 changed files with 145 additions and 99 deletions.
3 changes: 2 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
],
"require": {
"php": "^5.4 || ^7.0",
"phpoption/phpoption": "^1.5"
"phpoption/phpoption": "^1.5",
"symfony/polyfill-ctype": "^1.9"
},
"require-dev": {
"phpunit/phpunit": "^4.8.35 || ^5.0 || ^6.0"
Expand Down
36 changes: 32 additions & 4 deletions src/Lines.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public static function process(array $lines)
foreach ($lines as $line) {
list($multiline, $line, $multilineBuffer) = self::multilineProcess($multiline, $line, $multilineBuffer);

if (!$multiline && !self::isComment($line) && self::looksLikeSetter($line)) {
if (!$multiline && !self::isComment($line) && self::isSetter($line)) {
$output[] = $line;
}
}
Expand Down Expand Up @@ -68,7 +68,11 @@ private static function multilineProcess($multiline, $line, array $buffer)
*/
private static function looksLikeMultilineStart($line)
{
return strpos($line, '="') !== false && substr_count($line, '"') === 1;
if (strpos($line, '="') === false) {
return false;
}

return self::looksLikeMultilineStop($line) === false;
}

/**
Expand All @@ -80,7 +84,31 @@ private static function looksLikeMultilineStart($line)
*/
private static function looksLikeMultilineStop($line)
{
return strpos($line, '"') !== false && substr_count($line, '="') === 0;
if ($line === '"') {
return true;
}

foreach (self::getCharPairs(str_replace('\\\\', '', $line)) as $pair) {
if ($pair[0] !== '\\' && $pair[0] !== '=' && $pair[1] === '"') {
return true;
}
}

return false;
}

/**
* Get all pairs of adjacent characters within the line.
*
* @param string $line
*
* @return bool
*/
private static function getCharPairs($line)
{
$chars = str_split($line);

return array_map(null, $chars, array_slice($chars, 1));
}

/**
Expand All @@ -104,7 +132,7 @@ private static function isComment($line)
*
* @return bool
*/
private static function looksLikeSetter($line)
private static function isSetter($line)
{
return strpos($line, '=') !== false;
}
Expand Down
138 changes: 57 additions & 81 deletions src/Parser.php
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,19 @@
namespace Dotenv;

use Dotenv\Exception\InvalidFileException;
use Dotenv\Regex\Regex;

class Parser
{
const INITIAL_STATE = 0;
const UNQUOTED_STATE = 1;
const QUOTED_STATE = 2;
const ESCAPE_STATE = 3;
const WHITESPACE_STATE = 4;
const COMMENT_STATE = 5;

/**
* Parse the given environment variable entry into a name and value.
*
* Takes value as passed in by developer and:
* - breaks up the line into a name and value,
* - cleaning the value of quotes,
* - cleaning the name of quotes.
*
* @param string $entry
*
* @throws \Dotenv\Exception\InvalidFileException
Expand All @@ -25,14 +26,12 @@ public static function parse($entry)
{
list($name, $value) = self::splitStringIntoParts($entry);

return [self::sanitiseName($name), self::sanitiseValue($value)];
return [self::parseName($name), self::parseValue($value)];
}

/**
* Split the compound string into parts.
*
* If the `$line` contains an `=` sign, then we split it into 2 parts.
*
* @param string $line
*
* @throws \Dotenv\Exception\InvalidFileException
Expand Down Expand Up @@ -66,7 +65,7 @@ private static function splitStringIntoParts($line)
*
* @return string
*/
private static function sanitiseName($name)
private static function parseName($name)
{
$name = trim(str_replace(['export ', '\'', '"'], '', $name));

Expand Down Expand Up @@ -100,71 +99,60 @@ private static function isValidName($name)
*
* @return string|null
*/
private static function sanitiseValue($value)
private static function parseValue($value)
{
if ($value === null || trim($value) === '') {
return $value;
}

if (self::beginsWithAQuote($value)) {
return self::processQuotedValue($value);
}

// Strip comments from the left
$value = explode(' #', $value, 2)[0];

// Unquoted values cannot contain whitespace
if (preg_match('/\s+/', $value) > 0) {
// Check if value is a comment (usually triggered when empty value with comment)
if (preg_match('/^#/', $value) > 0) {
$value = '';
} else {
throw new InvalidFileException(
self::getErrorMessage('an unexpected space', $value)
);
return array_reduce(str_split($value), function ($data, $char) use ($value) {
switch ($data[1]) {
case self::INITIAL_STATE:
if ($char === '"') {
return [$data[0], self::QUOTED_STATE];
} elseif ($char === '#') {
return [$data[0], self::COMMENT_STATE];
} else {
return [$data[0].$char, self::UNQUOTED_STATE];
}
case self::UNQUOTED_STATE:
if ($char === '#') {
return [$data[0], self::COMMENT_STATE];
} elseif (ctype_space($char)) {
return [$data[0], self::WHITESPACE_STATE];
} else {
return [$data[0].$char, self::UNQUOTED_STATE];
}
case self::QUOTED_STATE:
if ($char === '"') {
return [$data[0], self::WHITESPACE_STATE];
} elseif ($char === '\\') {
return [$data[0], self::ESCAPE_STATE];
} else {
return [$data[0].$char, self::QUOTED_STATE];
}
case self::ESCAPE_STATE:
if ($char === '"' || $char === '\\') {
return [$data[0].$char, self::QUOTED_STATE];
} else {
throw new InvalidFileException(
self::getErrorMessage('an unexpected escape sequence', $value)
);
}
case self::WHITESPACE_STATE:
if ($char === '#') {
return [$data[0], self::COMMENT_STATE];
} elseif (!ctype_space($char)) {
throw new InvalidFileException(
self::getErrorMessage('unexpected whitespace', $value)
);
} else {
return [$data[0], self::WHITESPACE_STATE];
}
case self::COMMENT_STATE:
return [$data[0], self::COMMENT_STATE];
}
}

return $value;
}

/**
* Strips quotes from the environment variable value.
*
* @param string $value
*
* @return string
*/
private static function processQuotedValue($value)
{
$quote = $value[0];

$pattern = sprintf(
'/^
%1$s # match a quote at the start of the value
( # capturing sub-pattern used
(?: # we do not need to capture this
[^%1$s\\\\]+ # any character other than a quote or backslash
|\\\\\\\\ # or two backslashes together
|\\\\%1$s # or an escaped quote e.g \"
)* # as many characters that match the previous rules
) # end of the capturing sub-pattern
%1$s # and the closing quote
.*$ # and discard any string after the closing quote
/mx',
$quote
);

return Regex::replace($pattern, '$1', $value)
->mapSuccess(function ($str) use ($quote) {
return str_replace('\\\\', '\\', str_replace("\\$quote", $quote, $str));
})
->mapError(function ($err) use ($value) {
throw new InvalidFileException(
self::getErrorMessage(sprintf('a quote parsing error (%s)', $err), $value)
);
})
->getSuccess();
}, ['', self::INITIAL_STATE])[0];
}

/**
Expand All @@ -183,16 +171,4 @@ private static function getErrorMessage($cause, $subject)
strtok($subject, "\n")
);
}

/**
* Determine if the given string begins with a quote.
*
* @param string $value
*
* @return bool
*/
private static function beginsWithAQuote($value)
{
return isset($value[0]) && ($value[0] === '"' || $value[0] === '\'');
}
}
9 changes: 9 additions & 0 deletions tests/Dotenv/DotenvTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ public function testCommentedDotenvLoadsEnvironmentVars()
$this->assertSame('a value with a # character & a quote " character inside quotes', getenv('CQUOTESWITHQUOTE'));
$this->assertEmpty(getenv('CNULL'));
$this->assertEmpty(getenv('EMPTY'));
$this->assertEmpty(getenv('EMPTY2'));
$this->assertSame('foo', getenv('FOOO'));
}

public function testQuotedDotenvLoadsEnvironmentVars()
Expand Down Expand Up @@ -266,6 +268,13 @@ public function testDotenvAllowsSpecialCharacters()
$this->assertSame('test some escaped characters like a quote " or maybe a backslash \\', getenv('SPVAR5'));
}

public function testMutlilineLoading()
{
$dotenv = Dotenv::create($this->fixturesFolder, 'multiline.env');
$dotenv->load();
$this->assertSame("test\n test\"test\"\n test", getenv('TEST'));
}

public function testDotenvAssertions()
{
$dotenv = Dotenv::create($this->fixturesFolder, 'assertions.env');
Expand Down
30 changes: 27 additions & 3 deletions tests/Dotenv/LinesTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class LinesTest extends TestCase
{
public function testProcess()
public function testProcessBasic()
{
$content = file_get_contents(dirname(__DIR__).'/fixtures/env/assertions.env');

Expand All @@ -14,13 +14,37 @@ public function testProcess()
'ASSERTVAR2=""',
'ASSERTVAR3="val3 "',
'ASSERTVAR4="0" # empty looking value',
'ASSERTVAR5=#foo',
'ASSERTVAR5="#foo"',
"ASSERTVAR6=\"val1\nval2\"",
"ASSERTVAR7=\"\nval3\" #",
"ASSERTVAR8=\"val3\n\"",
"ASSERTVAR9=\"\n\"",
"ASSERTVAR9=\"\n\n\"",
];

$this->assertSame($expected, Lines::process(preg_split("/(\r\n|\n|\r)/", $content)));
}

public function testProcessQuotes()
{
$content = file_get_contents(dirname(__DIR__).'/fixtures/env/multiline.env');

$expected = [
"TEST=\"test\n test\\\"test\\\"\n test\"",
];

$this->assertSame($expected, Lines::process(preg_split("/(\r\n|\n|\r)/", $content)));
}

public function testProcessClosingSlash()
{
$lines = [
'SPVAR5="test some escaped characters like a quote \" or maybe a backslash \\" # not escaped',
];

$expected = [
'SPVAR5="test some escaped characters like a quote \" or maybe a backslash \\" # not escaped',
];

$this->assertSame($expected, $lines);
}
}
20 changes: 11 additions & 9 deletions tests/Dotenv/ParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,17 @@ public function testExportParse()
$this->assertSame(['FOO', 'bar baz'], Parser::parse('export FOO="bar baz"'));
}

public function testClosingSlashParse()
{
$content = 'SPVAR5="test some escaped characters like a quote \\" or maybe a backslash \\\\" # not escaped';
$expected = ['SPVAR5', 'test some escaped characters like a quote " or maybe a backslash \\'];

$this->assertSame($expected, Parser::parse($content));
}

/**
* @expectedException \Dotenv\Exception\InvalidFileException
* @expectedExceptionMessage Failed to parse dotenv file due to an unexpected space. Failed at [bar baz].
* @expectedExceptionMessage Failed to parse dotenv file due to unexpected whitespace. Failed at [bar baz].
*/
public function testParseInvalidSpaces()
{
Expand All @@ -54,16 +62,10 @@ public function testParseInvalidName()

/**
* @expectedException \Dotenv\Exception\InvalidFileException
* @expectedExceptionMessage Failed to parse dotenv file due to a quote parsing error (PREG_
* @expectedExceptionMessage Failed to parse dotenv file due to an unexpected escape sequence. Failed at ["iiiiviiiixiiiiviiii\n"].
*/
public function testParserFailsWithException()
public function testParserEscaping()
{
$limit = (int) ini_get('pcre.backtrack_limit');

if ($limit > 1000000) {
$this->markTestSkipped('System pcre.backtrack_limit too large.');
}

Parser::parse('FOO_BAD="iiiiviiiixiiiiviiii\\n"');
}
}
3 changes: 2 additions & 1 deletion tests/fixtures/env/assertions.env
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ASSERTVAR2=""

ASSERTVAR3="val3 "
ASSERTVAR4="0" # empty looking value
ASSERTVAR5=#foo
ASSERTVAR5="#foo"
ASSERTVAR6="val1
val2"
ASSERTVAR7="
Expand All @@ -14,4 +14,5 @@ val3" #
ASSERTVAR8="val3
"
ASSERTVAR9="

"
2 changes: 2 additions & 0 deletions tests/fixtures/env/commented.env
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ CSPACED="with spaces" # this is a comment
CQUOTES="a value with a # character" # this is a comment
CQUOTESWITHQUOTE="a value with a # character & a quote \" character inside quotes" # " this is a comment
EMPTY= # comment with empty variable
EMPTY2=# comment with empty variable
FOOO=foo# comment with no space
BOOLEAN=yes # (yes, no)

CNULL=
Expand Down
3 changes: 3 additions & 0 deletions tests/fixtures/env/multiline.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
TEST="test
test\"test\"
test"

0 comments on commit 21c540a

Please sign in to comment.