Skip to content

Commit

Permalink
Fix Tokenizer::int64() parsing of "0" when guessing base (#1842)
Browse files Browse the repository at this point in the history
Known bug victims in current code were tcp_outgoing_mark,
mark_client_packet, clientside_mark, and mark_client_connection
directives as well as client_connection_mark and (deprecated)
clientside_mark ACLs if they were configured to match a zero mark using
"0" or "0/..." syntax:

    ERROR: configuration failure: NfMarkConfig: invalid value '0/10'...
    exception location: NfMarkConfig.cc(23) getNfmark

Probably broken since 2014 commit 01f2137.
  • Loading branch information
rousskov authored and squid-anubis committed Jul 7, 2024
1 parent 314e430 commit 8cc5e88
Show file tree
Hide file tree
Showing 2 changed files with 309 additions and 1 deletion.
1 change: 0 additions & 1 deletion src/parser/Tokenizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,6 @@ Parser::Tokenizer::int64(int64_t & result, int base, bool allowSign, const SBuf:
if (base == 0) {
if ( *s == '0') {
base = 8;
++s;
} else {
base = 10;
}
Expand Down
309 changes: 309 additions & 0 deletions src/tests/testTokenizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,285 @@ TestTokenizer::testTokenizerInt64()
CPPUNIT_ASSERT(t.buf().isEmpty());
}

// When interpreting octal numbers, standard strtol() and Tokenizer::int64()
// treat leading zero as a part of sequence of digits rather than a
// character used _exclusively_ as base indicator. Thus, it is not possible
// to create an invalid octal number with an explicit octal base -- the
// first invalid character after the base will be successfully ignored. This
// treatment also makes it difficult to define "shortest valid octal input".
// Here, we are just enumerating interesting "short input" octal cases in
// four dimensions:
// 1. int64(base) argument: forced or auto-detected;
// 2. base character ("0") in input: absent or present;
// 3. post-base digits in input: absent, valid, or invalid;
// 4. input length limits via int64(length) argument: unlimited or limited.

// forced base; input: no base, no post-base digits, unlimited
{
int64_t rv;
Parser::Tokenizer t(SBuf(""));
CPPUNIT_ASSERT(!t.int64(rv, 8));
CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
}

// forced base; input: no base, no post-base digits, limited
{
int64_t rv;
Parser::Tokenizer t(SBuf("7"));
CPPUNIT_ASSERT(!t.int64(rv, 8, false, 0));
CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf());
}

// forced base; input: no base, one valid post-base digit, unlimited
{
int64_t rv;
Parser::Tokenizer t(SBuf("4"));
const int64_t benchmark = 04;
CPPUNIT_ASSERT(t.int64(rv, 8));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
}

// forced base; input: no base, one valid post-base digit, limited
{
int64_t rv;
Parser::Tokenizer t(SBuf("46"));
const int64_t benchmark = 04;
CPPUNIT_ASSERT(t.int64(rv, 8, false, 1));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf());
}

// forced base; input: no base, one invalid post-base digit, unlimited
{
int64_t rv;
Parser::Tokenizer t(SBuf("8"));
CPPUNIT_ASSERT(!t.int64(rv, 8));
CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
}

// forced base; input: no base, one invalid post-base digit, limited
{
int64_t rv;
Parser::Tokenizer t(SBuf("80"));
CPPUNIT_ASSERT(!t.int64(rv, 8, false, 1));
CPPUNIT_ASSERT_EQUAL(SBuf("80"), t.buf());
}

// repeat the above six octal cases, but now with base character in input

// forced base; input: base, no post-base digits, unlimited
{
int64_t rv;
Parser::Tokenizer t(SBuf("0"));
const int64_t benchmark = 0;
CPPUNIT_ASSERT(t.int64(rv, 8));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
}

// forced base; input: base, no post-base digits, limited
{
int64_t rv;
Parser::Tokenizer t(SBuf("07"));
const int64_t benchmark = 0;
CPPUNIT_ASSERT(t.int64(rv, 8, false, 1));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf());
}

// forced base; input: base, one valid post-base digit, unlimited
{
int64_t rv;
Parser::Tokenizer t(SBuf("04"));
const int64_t benchmark = 04;
CPPUNIT_ASSERT(t.int64(rv, 8));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
}

// forced base; input: base, one valid post-base digit, limited
{
int64_t rv;
Parser::Tokenizer t(SBuf("046"));
const int64_t benchmark = 04;
CPPUNIT_ASSERT(t.int64(rv, 8, false, 2));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf());
}

// forced base; input: base, one invalid post-base digit, unlimited
{
int64_t rv;
Parser::Tokenizer t(SBuf("08"));
const int64_t benchmark = 00;
CPPUNIT_ASSERT(t.int64(rv, 8));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
}

// forced base; input: base, one invalid post-base digit, limited
{
int64_t rv;
Parser::Tokenizer t(SBuf("08"));
const int64_t benchmark = 00;
CPPUNIT_ASSERT(t.int64(rv, 8, false, 2));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
}

// And now repeat six "with base character in input" octal cases but with
// auto-detected base. When octal cases below say "auto-detected base", they
// describe int64() base=0 parameter value. Current int64() implementation
// does auto-detect base as octal in all of these cases, but that might
// change, and some of these cases (e.g., "0") can also be viewed as a
// non-octal input case as well. These cases do not attempt to test base
// detection. They focus on other potential problems.

// auto-detected base; input: base, no post-base digits, unlimited
{
int64_t rv;
Parser::Tokenizer t(SBuf("0"));
const int64_t benchmark = 00;
CPPUNIT_ASSERT(t.int64(rv, 0));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
}

// auto-detected base; input: base, no post-base digits, limited
{
int64_t rv;
Parser::Tokenizer t(SBuf("07"));
const int64_t benchmark = 0;
CPPUNIT_ASSERT(t.int64(rv, 0, false, 1));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf("7"), t.buf());
}

// auto-detected base; input: base, one valid post-base digit, unlimited
{
int64_t rv;
Parser::Tokenizer t(SBuf("04"));
const int64_t benchmark = 04;
CPPUNIT_ASSERT(t.int64(rv, 0));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
}

// auto-detected base; input: base, one valid post-base digit, limited
{
int64_t rv;
Parser::Tokenizer t(SBuf("046"));
const int64_t benchmark = 04;
CPPUNIT_ASSERT(t.int64(rv, 0, false, 2));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf("6"), t.buf());
}

// auto-detected base; input: base, one invalid post-base digit, unlimited
{
int64_t rv;
Parser::Tokenizer t(SBuf("08"));
const int64_t benchmark = 00;
CPPUNIT_ASSERT(t.int64(rv, 0));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
}

// auto-detected base; input: base, one invalid post-base digit, limited
{
int64_t rv;
Parser::Tokenizer t(SBuf("08"));
const int64_t benchmark = 00;
CPPUNIT_ASSERT(t.int64(rv, 0, false, 2));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
}

// this ends four-dimensional enumeration of octal cases described earlier

// check octal base auto-detection
{
int64_t rv;
Parser::Tokenizer t(SBuf("0128"));
const int64_t benchmark = 012;
CPPUNIT_ASSERT(t.int64(rv, 0));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf("8"), t.buf());
}

// check that octal base auto-detection is not confused by repeated zeros
{
int64_t rv;
Parser::Tokenizer t(SBuf("00000000071"));
const int64_t benchmark = 00000000071;
CPPUNIT_ASSERT(t.int64(rv));
CPPUNIT_ASSERT_EQUAL(benchmark,rv);
CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
}

// check that forced octal base is not confused by hex prefix
{
int64_t rv;
Parser::Tokenizer t(SBuf("0x5"));
const int64_t benchmark = 0;
CPPUNIT_ASSERT(t.int64(rv, 8));
CPPUNIT_ASSERT_EQUAL(benchmark, rv);
CPPUNIT_ASSERT_EQUAL(SBuf("x5"), t.buf());
}

// autodetect decimal base in shortest valid input
{
int64_t rv;
Parser::Tokenizer t(SBuf("1"));
const int64_t benchmark = 1;
CPPUNIT_ASSERT(t.int64(rv));
CPPUNIT_ASSERT_EQUAL(benchmark,rv);
CPPUNIT_ASSERT(t.buf().isEmpty());
}

// autodetect hex base in shortest valid input
{
int64_t rv;
Parser::Tokenizer t(SBuf("0X1"));
const int64_t benchmark = 0X1;
CPPUNIT_ASSERT(t.int64(rv));
CPPUNIT_ASSERT_EQUAL(benchmark,rv);
CPPUNIT_ASSERT(t.buf().isEmpty());
}

// invalid (when autodetecting base) input matching hex base
{
int64_t rv;
Parser::Tokenizer t(SBuf("0x"));
CPPUNIT_ASSERT(!t.int64(rv));
CPPUNIT_ASSERT_EQUAL(SBuf("0x"), t.buf());
}

// invalid (when forcing hex base) input matching hex base
{
int64_t rv;
Parser::Tokenizer t(SBuf("0x"));
CPPUNIT_ASSERT(!t.int64(rv, 16));
CPPUNIT_ASSERT_EQUAL(SBuf("0x"), t.buf());
}

// invalid (when autodetecting base and limiting) input matching hex base
{
int64_t rv;
Parser::Tokenizer t(SBuf("0x2"));
CPPUNIT_ASSERT(!t.int64(rv, 0, true, 2));
CPPUNIT_ASSERT_EQUAL(SBuf("0x2"), t.buf());
}

// invalid (when forcing hex base and limiting) input matching hex base
{
int64_t rv;
Parser::Tokenizer t(SBuf("0x3"));
CPPUNIT_ASSERT(!t.int64(rv, 16, false, 2));
CPPUNIT_ASSERT_EQUAL(SBuf("0x3"), t.buf());
}

// API mismatch: don't eat leading space
{
int64_t rv;
Expand All @@ -264,6 +543,36 @@ TestTokenizer::testTokenizerInt64()
CPPUNIT_ASSERT_EQUAL(SBuf(" 1234"), t.buf());
}

// zero corner case: repeated zeros
{
int64_t rv;
Parser::Tokenizer t(SBuf("00"));
const int64_t benchmark = 00;
CPPUNIT_ASSERT(t.int64(rv));
CPPUNIT_ASSERT_EQUAL(benchmark,rv);
CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
}

// zero corner case: "positive" zero
{
int64_t rv;
Parser::Tokenizer t(SBuf("+0"));
const int64_t benchmark = +0;
CPPUNIT_ASSERT(t.int64(rv));
CPPUNIT_ASSERT_EQUAL(benchmark,rv);
CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
}

// zero corner case: "negative" zero
{
int64_t rv;
Parser::Tokenizer t(SBuf("-0"));
const int64_t benchmark = -0;
CPPUNIT_ASSERT(t.int64(rv));
CPPUNIT_ASSERT_EQUAL(benchmark,rv);
CPPUNIT_ASSERT_EQUAL(SBuf(""), t.buf());
}

// trailing spaces
{
int64_t rv;
Expand Down

0 comments on commit 8cc5e88

Please sign in to comment.