From 1106d2c0132a2f1027073e7a3571be2eb95d1305 Mon Sep 17 00:00:00 2001 From: John Lee Date: Thu, 18 Jul 2024 14:23:15 -0400 Subject: [PATCH] Get PCRE working --- Makefile | 2 +- libstuff/libstuff.cpp | 152 +++++++++++++++++++++++++++++++++++++++--- libstuff/libstuff.h | 4 +- plugins/MySQL.cpp | 6 +- 4 files changed, 149 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index 9490de652..284061735 100644 --- a/Makefile +++ b/Makefile @@ -33,7 +33,7 @@ INTERMEDIATEDIR = .build # We use the same library paths and required libraries for all binaries. LIBPATHS =-L$(PROJECT) -Lmbedtls/library -L/usr/lib/llvm-18/lib -LIBRARIES =-Wl,--start-group -lbedrock -lstuff -Wl,--end-group -ldl -lpcrecpp -lpthread -lmbedtls -lmbedx509 -lmbedcrypto -lz -lm -fuse-ld=lld +LIBRARIES =-Wl,--start-group -lbedrock -lstuff -Wl,--end-group -ldl -lpcre2-8 -lpthread -lmbedtls -lmbedx509 -lmbedcrypto -lz -lm -fuse-ld=lld # These targets aren't actual files. .PHONY: all test clustertest clean testplugin diff --git a/libstuff/libstuff.cpp b/libstuff/libstuff.cpp index a25b1a084..28f0a8223 100644 --- a/libstuff/libstuff.cpp +++ b/libstuff/libstuff.cpp @@ -39,7 +39,9 @@ #endif #endif -#include // sudo apt-get install libpcre++-dev +// #include // sudo apt-get install libpcre++-dev +#define PCRE2_CODE_UNIT_WIDTH 8 +#include // sudo apt-get install libpcre2-dev // Common error definitions #define S_errno errno @@ -2805,25 +2807,159 @@ bool SIsValidSQLiteDateModifier(const string& modifier) { return true; } -bool SREMatch(const string& regExp, const string& s) { - return pcrecpp::RE(regExp, pcrecpp::RE_Options().set_match_limit_recursion(1000)).FullMatch(s); +bool SREMatch(const string& regExp, const string& s, const bool caseSensitive) { + pcre2_code* re; + PCRE2_SPTR pattern = (PCRE2_SPTR)regExp.c_str(); + PCRE2_SPTR subject = (PCRE2_SPTR)s.c_str(); + int errornumber; + PCRE2_SIZE erroroffset; + + + // Compile the regular expression + re = pcre2_compile( + pattern, // the pattern + PCRE2_ZERO_TERMINATED, // indicates pattern is zero-terminated + caseSensitive ? 0 : PCRE2_CASELESS, // default options + &errornumber, // for error number + &erroroffset, // for error offset + NULL); // use default compile context + + if (re == NULL) { + return false; // Compilation failed + } + + // Match the subject string against the compiled pattern + pcre2_match_data* match_data = pcre2_match_data_create_from_pattern(re, NULL); + int rc = pcre2_match( + re, // the compiled pattern + subject, // the subject string + PCRE2_ZERO_TERMINATED, // the length of the subject string + 0, // start at offset 0 in the subject + 0, // default options + match_data, // block for storing the result + NULL); // use default match context + + // Free the match data and the compiled pattern + pcre2_match_data_free(match_data); + pcre2_code_free(re); + + return rc >= 0; // Return true if the match succeeded } -bool SREMatch(const string& regExp, const string& s, string& match) { - return pcrecpp::RE(regExp, pcrecpp::RE_Options().set_match_limit_recursion(1000)).FullMatch(s, &match); +bool SREMatch(const string& regExp, const string& s, string& match, const bool caseSensitive) { + pcre2_code* re; + PCRE2_SPTR pattern = (PCRE2_SPTR)regExp.c_str(); + PCRE2_SPTR subject = (PCRE2_SPTR)s.c_str(); + int errornumber; + PCRE2_SIZE erroroffset; + + // Compile the regular expression + re = pcre2_compile( + pattern, // the pattern + PCRE2_ZERO_TERMINATED, // indicates pattern is zero-terminated + caseSensitive ? 0 : PCRE2_CASELESS, // default options + &errornumber, // for error number + &erroroffset, // for error offset + NULL); // use default compile context + + if (re == NULL) { + return false; // Compilation failed + } + + // Set match limit recursion + pcre2_match_context* match_context = pcre2_match_context_create(NULL); + pcre2_set_match_limit(match_context, 1000); + + // Match the subject string against the compiled pattern + pcre2_match_data* match_data = pcre2_match_data_create_from_pattern(re, NULL); + int rc = pcre2_match( + re, // the compiled pattern + subject, // the subject string + PCRE2_ZERO_TERMINATED, // the length of the subject string + 0, // start at offset 0 in the subject + 0, // default options + match_data, // block for storing the result + match_context); // match context with recursion limit + + bool is_match = (rc >= 0); + if (is_match) { + PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(match_data); + match = std::string((char*)subject + ovector[0], ovector[1] - ovector[0]); + } + + // Free the match data, match context, and the compiled pattern + pcre2_match_data_free(match_data); + pcre2_match_context_free(match_context); + pcre2_code_free(re); + + return is_match; } void SRedactSensitiveValues(string& s) { + // Helper function to perform global replacement using PCRE2 + auto globalReplace = [](const std::string& pattern, const std::string& replacement, std::string& subject) { + PCRE2_SPTR pattern_str = (PCRE2_SPTR)pattern.c_str(); + PCRE2_SPTR subject_str = (PCRE2_SPTR)subject.c_str(); + int errornumber; + PCRE2_SIZE erroroffset; + + // Compile the regular expression + pcre2_code* re = pcre2_compile( + pattern_str, // the pattern + PCRE2_ZERO_TERMINATED, // indicates pattern is zero-terminated + 0, // default options + &errornumber, // for error number + &erroroffset, // for error offset + NULL); // use default compile context + + if (re == NULL) { + return; // Compilation failed + } + + PCRE2_SIZE subject_length = subject.size(); + pcre2_match_data* match_data = pcre2_match_data_create_from_pattern(re, NULL); + + size_t offset = 0; + while (offset < subject_length) { + int rc = pcre2_match( + re, // the compiled pattern + (PCRE2_SPTR)subject.c_str(), // the subject string + subject_length, // the length of the subject string + offset, // start at the current offset + 0, // default options + match_data, // block for storing the result + NULL); // use default match context + + if (rc < 0) { + break; // No more matches + } + + PCRE2_SIZE* ovector = pcre2_get_ovector_pointer(match_data); + PCRE2_SIZE start = ovector[0]; + PCRE2_SIZE end = ovector[1]; + + // Replace the matched substring with the replacement string + subject.replace(start, end - start, replacement); + + // Adjust the subject length and offset for the next match + subject_length = subject.size(); + offset = start + replacement.size(); + } + + pcre2_match_data_free(match_data); + pcre2_code_free(re); + }; + // This code removing authTokens is a quick fix and should be removed once https://github.com/Expensify/Expensify/issues/144185 is done. // The message may be truncated midway through the authToken, so there may not be a closing quote (") at the end of // the authToken, so we need to optionally match the closing quote with a question mark (?). - pcrecpp::RE("\"authToken\":\".*\"?").GlobalReplace("\"authToken\":", &s); + globalReplace("\"authToken\":\".*\"?", "\"authToken\":", s); // Redact queries that contain encrypted fields since there's no value in logging them. - pcrecpp::RE("v[0-9]+:[0-9A-F]{10,}").GlobalReplace("", &s); + globalReplace("v[0-9]+:[0-9A-F]{10,}", "", s); // Remove anything inside "html" because we intentionally don't log chats. - pcrecpp::RE("\"html\":\".*\"").GlobalReplace("\"html\":\"\"", &s); + globalReplace("\"html\":\".*\"", "\"html\":\"\"", s); } SStopwatch::SStopwatch() { diff --git a/libstuff/libstuff.h b/libstuff/libstuff.h index ad32c2a03..c7bea9df4 100644 --- a/libstuff/libstuff.h +++ b/libstuff/libstuff.h @@ -388,8 +388,8 @@ bool SConstantTimeEquals(const string& secret, const string& userInput); bool SConstantTimeIEquals(const string& secret, const string& userInput); // Perform a full regex match. The '^' and '$' symbols are implicit. -bool SREMatch(const string& regExp, const string& s); -bool SREMatch(const string& regExp, const string& s, string& match); +bool SREMatch(const string& regExp, const string& s, const bool caseSensitive = true); +bool SREMatch(const string& regExp, const string& s, string& match, const bool caseSensitive = true); // Redact values that should not be logged. void SRedactSensitiveValues(string& s); diff --git a/plugins/MySQL.cpp b/plugins/MySQL.cpp index 1cce8871f..54c3ea468 100644 --- a/plugins/MySQL.cpp +++ b/plugins/MySQL.cpp @@ -1,7 +1,5 @@ #include "MySQL.h" -#include - #include #include @@ -112,7 +110,7 @@ string MySQLPacket::serializeHandshake() { SAppend(handshake.payload, &capability_flags_2, 2); // capability_flags_2 (high 2 bytes) // The first byte is the length of the auth_plugin_name string. Followed by 10 NULL - // characters for the "reserved" field. Since we don't support CLIENT_SECURE_CONNECTION + // characters for the "reserved" field. Since we don't support CLIENT_SECURE_CONNECTION // in our capabilities we can skip auth-plugin-data-part-2 // https://dev.mysql.com/doc/internals/en/client-wants-native-server-wants-old.html // (Initial Handshake Packet) @@ -271,7 +269,7 @@ void BedrockPlugin_MySQL::onPortRecv(STCPManager::Socket* s, SData& request) { // See if it's asking for a global variable string varName; string regExp = "^(?:(?:SELECT\\s+)?@@(?:\\w+\\.)?|SHOW VARIABLES LIKE ')(\\w+).*$"; - if (pcrecpp::RE(regExp, pcrecpp::RE_Options().set_caseless(true)).FullMatch(query, &varName)) { + if (SREMatch(regExp, query, varName, false)) { // Loop across and look for it SQResult result; result.headers.push_back(varName);