diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 28458b2..9c8eaaa 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -17,18 +17,32 @@ jobs: strategy: matrix: cmake_build_type: [Asan, Release] - runs-on: ubuntu-latest + runs-on: ubuntu-24.04 steps: - - uses: actions/checkout@v3 + - name: Checkout + uses: actions/checkout@v3 + - name: Install dependencies run: | + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - + sudo add-apt-repository "deb http://apt.llvm.org/noble/ llvm-toolchain-noble-18 main" sudo apt update sudo apt install build-essential + sudo apt install clang-format-18 clang-tidy-18 + - name: Setup ccache uses: hendrikmuhs/ccache-action@v1.2 with: create-symlink: true - key: ${{ github.job }}-${{ matrix.os }} + key: ${{ github.job }}-${{ matrix.cmake_build_type }} + + - name: Clang-Format + working-directory: ${{github.workspace}}/ports/cpp + run: | + find source test -iname '*.hpp' -o -iname '*.cpp' \ + | xargs clang-format-18 -Werror --dry-run \ + --fallback-style=Google --verbose + - name: Configure working-directory: ${{github.workspace}}/ports/cpp run: | @@ -38,11 +52,20 @@ jobs: -DANTLR4C3_DEVELOPER=ON \ -DCMAKE_BUILD_TYPE=${{ matrix.cmake_build_type }} \ .. + - name: Build working-directory: ${{github.workspace}}/ports/cpp/build run: make + - name: Unit Test working-directory: ${{github.workspace}}/ports/cpp/build/test run: | ctest cat Testing/Temporary/LastTest.log + + - name: Clang-Tidy on sources + if: matrix.cmake_build_type == 'Release' + working-directory: ${{github.workspace}}/ports/cpp + run: | + find source -iname '*.hpp' -o -iname '*.cpp' \ + | xargs clang-tidy-18 -p build/compile_commands.json diff --git a/ports/cpp/.clang-format b/ports/cpp/.clang-format new file mode 100644 index 0000000..e4edb9b --- /dev/null +++ b/ports/cpp/.clang-format @@ -0,0 +1,17 @@ +--- +Language: Cpp +BasedOnStyle: Google + +ColumnLimit: 100 +AccessModifierOffset: -2 + +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false + +DerivePointerAlignment: false +BreakConstructorInitializers: BeforeComma +AlignAfterOpenBracket: BlockIndent + +BinPackArguments: false +BinPackParameters: false diff --git a/ports/cpp/.clang-tidy b/ports/cpp/.clang-tidy new file mode 100644 index 0000000..5db4aee --- /dev/null +++ b/ports/cpp/.clang-tidy @@ -0,0 +1,22 @@ +--- +Checks: '-*, + bugprone-*, + cert-*, + concurrency-*, + cppcoreguidelines-*, + google-*, + hicpp-*, + misc-*, + -misc-no-recursion, + modernize-*, + -modernize-use-trailing-return-type, + performance-*, + readability-*' +WarningsAsErrors: '*' +HeaderFileExtensions: + - hpp +ImplementationFileExtensions: + - cpp +HeaderFilterRegex: '' +FormatStyle: '.clang-format' +SystemHeaders: false diff --git a/ports/cpp/source/antlr4-c3/CodeCompletionCore.cpp b/ports/cpp/source/antlr4-c3/CodeCompletionCore.cpp index 98599b6..e381c4b 100644 --- a/ports/cpp/source/antlr4-c3/CodeCompletionCore.cpp +++ b/ports/cpp/source/antlr4-c3/CodeCompletionCore.cpp @@ -1,317 +1,329 @@ // // CodeCompletionCore.cpp // -// C++ port of antlr4-c3 (TypeScript) by Mike Lischke +// C++ port of antlr4-c3 (TypeScript) by Mike Lischke // Licensed under the MIT License. -// +// #include "CodeCompletionCore.hpp" -using namespace antlr4; -using namespace std; +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include namespace c3 { -// ---------------------------------------------------------------------------- -// MARK: - Utilities -// ---------------------------------------------------------------------------- - -static std::vector longestCommonPrefix(std::vector a, std::vector b) -{ - size_t i = 0; - for (; i < std::min(a.size(), b.size()); i++) { - if (a[i] != b[i]) { - break; - } - } - - return std::vector(a.begin(), a.begin() + i); +namespace { + +std::vector longestCommonPrefix(std::vector lhs, std::vector rhs) { + size_t index = 0; + for (; index < std::min(lhs.size(), rhs.size()); index++) { + if (lhs[index] != rhs[index]) { + break; + } + } + return { + lhs.begin(), + std::next(lhs.begin(), static_cast(index)), + }; } +} // namespace - - -// ---------------------------------------------------------------------------- -// MARK: - Static -// ---------------------------------------------------------------------------- - -std::map c3::CodeCompletionCore::followSetsByATN = {}; +std::map // NOLINT + c3::CodeCompletionCore::followSetsByATN = {}; // Matches ATNStateType enum -std::vector c3::CodeCompletionCore::atnStateTypeMap = { - "invalid", - "basic", - "rule start", - "block start", - "plus block start", - "star block start", - "token start", - "rule stop", - "block end", - "star loop back", - "star loop entry", - "plus loop back", - "loop end", -}; - - - -// ---------------------------------------------------------------------------- -// MARK: - Construction -// ---------------------------------------------------------------------------- - -CodeCompletionCore::CodeCompletionCore(antlr4::Parser * parser) -: parser(parser), atn(parser->getATN()), vocabulary(parser->getVocabulary()), ruleNames(parser->getRuleNames()) -{ - +std::vector c3::CodeCompletionCore::atnStateTypeMap // NOLINT + { + "invalid", + "basic", + "rule start", + "block start", + "plus block start", + "star block start", + "token start", + "rule stop", + "block end", + "star loop back", + "star loop entry", + "plus loop back", + "loop end", + }; + +CodeCompletionCore::CodeCompletionCore(antlr4::Parser* parser) + : parser(parser) + , atn(parser->getATN()) + , vocabulary(parser->getVocabulary()) + , ruleNames(parser->getRuleNames()) + , timeoutMS(0) + , cancel(nullptr) { } - - - - -// ---------------------------------------------------------------------------- -// MARK: - Collecting -// ---------------------------------------------------------------------------- - -CandidatesCollection CodeCompletionCore::collectCandidates(size_t caretTokenIndex, antlr4::ParserRuleContext * context, size_t timeoutMS, std::atomic * cancel) -{ - shortcutMap.clear(); - candidates.rules.clear(); - candidates.tokens.clear(); - candidates.cancelled = false; - statesProcessed = 0; - precedenceStack = {}; - timeoutStart = std::chrono::steady_clock::now(); - this->cancel = cancel; - this->timeoutMS = timeoutMS; - - tokenStartIndex = context ? context->start->getTokenIndex() : 0; - const auto tokenStream = parser->getTokenStream(); - - tokens = {}; - size_t offset = tokenStartIndex; - while (true) { - antlr4::Token * token = tokenStream->get(offset++); - if (token->getChannel() == Token::DEFAULT_CHANNEL) { - tokens.push_back(token); - - if (token->getTokenIndex() >= caretTokenIndex || token->getType() == Token::EOF) { - break; - } - } - - // Do not check for the token index here, as we want to end with the first unhidden token on or after - // the caret. - if (token->getType() == Token::EOF) { - break; - } - } - - RuleWithStartTokenList callStack = {}; - size_t startRule = context ? context->getRuleIndex() : 0; - bool cancelled = false; - - processRule(atn.ruleToStartState[startRule], 0, callStack, 0, 0, cancelled); - candidates.cancelled = cancelled; - - if (showResult) { - if (cancelled) { - std::cout << "*** TIMED OUT ***\n"; - } - - std::cout << "States processed: " << std::to_string(statesProcessed) << "\n\n"; - - std::cout << "Collected rules:\n"; - for (const auto & [tokenIndex, rule] : candidates.rules) { - std::cout << ruleNames[tokenIndex]; - std::cout << ", path: "; - - for (size_t token: rule.ruleList) { - std::cout << ruleNames[token] + " "; - } - } - std::cout << "\n\n"; - - std::set sortedTokens; - for (auto entry: candidates.tokens) { - size_t token = entry.first; - std::vector tokenList = entry.second; - - std::string value = vocabulary.getDisplayName(token); - for (size_t following: tokenList) { - value += " " + vocabulary.getDisplayName(following); - } - - sortedTokens.insert(value); - } - - std::cout << "Collected tokens:\n"; - for (std::string symbol: sortedTokens) { - std::cout << symbol; - } - std::cout << "\n\n"; - } - - return candidates; +CandidatesCollection CodeCompletionCore::collectCandidates( // NOLINT + size_t caretTokenIndex, + antlr4::ParserRuleContext* context, + size_t timeoutMS, + std::atomic* cancel +) { + shortcutMap.clear(); + candidates.rules.clear(); + candidates.tokens.clear(); + candidates.cancelled = false; + statesProcessed = 0; + precedenceStack = {}; + timeoutStart = std::chrono::steady_clock::now(); + this->cancel = cancel; + this->timeoutMS = timeoutMS; + + tokenStartIndex = (context != nullptr) ? context->start->getTokenIndex() : 0; + auto* const tokenStream = parser->getTokenStream(); + + tokens = {}; + size_t offset = tokenStartIndex; + while (true) { + antlr4::Token* token = tokenStream->get(offset++); + if (token->getChannel() == antlr4::Token::DEFAULT_CHANNEL) { + tokens.push_back(token); + + if (token->getTokenIndex() >= caretTokenIndex || token->getType() == antlr4::Token::EOF) { + break; + } + } + + // Do not check for the token index here, as we want to end with the first + // unhidden token on or after the caret. + if (token->getType() == antlr4::Token::EOF) { + break; + } + } + + RuleWithStartTokenList callStack = {}; + const size_t startRule = (context != nullptr) ? context->getRuleIndex() : 0; + bool cancelled = false; + + processRule(atn.ruleToStartState[startRule], 0, callStack, 0, 0, cancelled); + candidates.cancelled = cancelled; + + if (showResult) { + if (cancelled) { + std::cout << "*** TIMED OUT ***\n"; + } + + std::cout << "States processed: " << std::to_string(statesProcessed) << "\n\n"; + + std::cout << "Collected rules:\n"; + for (const auto& [tokenIndex, rule] : candidates.rules) { + std::cout << ruleNames[tokenIndex]; + std::cout << ", path: "; + + for (const size_t token : rule.ruleList) { + std::cout << ruleNames[token] + " "; + } + } + std::cout << "\n\n"; + + std::set sortedTokens; + for (const auto& entry : candidates.tokens) { + const size_t token = entry.first; + const std::vector tokenList = entry.second; + + std::string value = vocabulary.getDisplayName(token); + for (const size_t following : tokenList) { + value += " " + vocabulary.getDisplayName(following); + } + + sortedTokens.insert(value); + } + + std::cout << "Collected tokens:\n"; + for (const std::string& symbol : sortedTokens) { + std::cout << symbol; + } + std::cout << "\n\n"; + } + + return candidates; } - - /** - * Checks if the predicate associated with the given transition evaluates to true. + * Checks if the predicate associated with the given transition evaluates to + * true. * * @param transition The transition to check. * @returns the evaluation result of the predicate. */ -bool CodeCompletionCore::checkPredicate(const antlr4::atn::PredicateTransition * transition) -{ - return transition->getPredicate()->eval(parser, &ParserRuleContext::EMPTY); +bool CodeCompletionCore::checkPredicate(const antlr4::atn::PredicateTransition* transition) { + return transition->getPredicate()->eval(parser, &antlr4::ParserRuleContext::EMPTY); } - - /** - * Walks the rule chain upwards or downwards (depending on translateRulesTopDown) to see if that matches any of the - * preferred rules. If found, that rule is added to the collection candidates and true is returned. + * Walks the rule chain upwards or downwards (depending on + * translateRulesTopDown) to see if that matches any of the preferred rules. If + * found, that rule is added to the collection candidates and true is returned. * * @param ruleWithStartTokenList The list to convert. * @returns true if any of the stack entries was converted. */ -bool CodeCompletionCore::translateStackToRuleIndex(RuleWithStartTokenList const& ruleWithStartTokenList) -{ - if (preferredRules.size() == 0) { - return false; - } - - // Change the direction we iterate over the rule stack - if (translateRulesTopDown) { - // Loop over the rule stack from lowest to highest rule level. This will prioritize a lower preferred rule - // if it is a child of a higher one that is also a preferred rule. - for (int64_t i = ruleWithStartTokenList.size() - 1; i >= 0; i--) { - if (translateToRuleIndex(i, ruleWithStartTokenList)) { - return true; - } - } - } else { - // Loop over the rule stack from highest to lowest rule level. This will prioritize a higher preferred rule - // if it contains a lower one that is also a preferred rule. - for (size_t i = 0; i < ruleWithStartTokenList.size(); i++) { - if (translateToRuleIndex(i, ruleWithStartTokenList)) { - return true; - } - } - } - - return false; +bool CodeCompletionCore::translateStackToRuleIndex( + RuleWithStartTokenList const& ruleWithStartTokenList +) { + if (preferredRules.empty()) { + return false; + } + + // Change the direction we iterate over the rule stack + auto forward = std::views::iota(0U, ruleWithStartTokenList.size()); + auto backward = forward | std::views::reverse; + if (translateRulesTopDown) { + // Loop over the rule stack from lowest to highest rule level. This will + // prioritize a lower preferred rule if it is a child of a higher one that + // is also a preferred rule. + for (const auto index : backward) { + if (translateToRuleIndex(index, ruleWithStartTokenList)) { + return true; + } + } + } else { + // Loop over the rule stack from highest to lowest rule level. This will + // prioritize a higher preferred rule if it contains a lower one that is + // also a preferred rule. + for (const auto index : forward) { + if (translateToRuleIndex(index, ruleWithStartTokenList)) { + return true; + } + } + } + + return false; } - - /** - * Given the index of a rule from a rule chain, check if that matches any of the preferred rules. If it matches, - * that rule is added to the collection candidates and true is returned. + * Given the index of a rule from a rule chain, check if that matches any of the + * preferred rules. If it matches, that rule is added to the collection + * candidates and true is returned. * * @param i The rule index. * @param ruleWithStartTokenList The list to check. * @returns true if the specified rule is in the list of preferred rules. */ -bool CodeCompletionCore::translateToRuleIndex(size_t i, RuleWithStartTokenList const& ruleWithStartTokenList) -{ - RuleWithStartToken rwst = ruleWithStartTokenList[i]; - - if (preferredRules.contains(rwst.ruleIndex)) { - - // Add the rule to our candidates list along with the current rule path, - // but only if there isn't already an entry like that. - std::vector path; - { - for (size_t subrangeIndex = 0; subrangeIndex < i; subrangeIndex++) { - path.push_back(ruleWithStartTokenList[subrangeIndex].ruleIndex); - } - } - - - bool addNew = true; - - for (auto const& [cRuleEntryRuleIndex, cRuleEntryCandidateRule]: candidates.rules) { - if (cRuleEntryRuleIndex != rwst.ruleIndex || cRuleEntryCandidateRule.ruleList.size() != path.size()) { - continue; - } - - // Found an entry for this rule. Same path? - bool samePath = true; - for (size_t pathI = 0; pathI < path.size(); pathI++) { - if (path[pathI] == cRuleEntryCandidateRule.ruleList[pathI]) { - samePath = false; - break; - } - } - - // If same path, then don't add a new (duplicate) entry. - if (samePath) { - addNew = false; - break; - } - } - - if (addNew) { - candidates.rules[rwst.ruleIndex] = { - .startTokenIndex = rwst.startTokenIndex, - .ruleList = path - }; - if (showDebugOutput) { - std::cout << "=====> collected: " << ruleNames[rwst.ruleIndex] << "\n"; - } - } - - return true; - } - - return false; +bool CodeCompletionCore::translateToRuleIndex( + size_t index, RuleWithStartTokenList const& ruleWithStartTokenList +) { + const RuleWithStartToken rwst = ruleWithStartTokenList[index]; + + if (preferredRules.contains(rwst.ruleIndex)) { + // Add the rule to our candidates list along with the current rule path, + // but only if there isn't already an entry like that. + std::vector path; + path.reserve(index); + for (size_t subrangeIndex = 0; subrangeIndex < index; subrangeIndex++) { + path.push_back(ruleWithStartTokenList[subrangeIndex].ruleIndex); + } + + bool addNew = true; + + for (auto const& [cRuleEntryRuleIndex, cRuleEntryCandidateRule] : candidates.rules) { + if (cRuleEntryRuleIndex != rwst.ruleIndex || + cRuleEntryCandidateRule.ruleList.size() != path.size()) { + continue; + } + + // Found an entry for this rule. Same path? + bool samePath = true; + for (size_t pathI = 0; pathI < path.size(); pathI++) { + if (path[pathI] == cRuleEntryCandidateRule.ruleList[pathI]) { + samePath = false; + break; + } + } + + // If same path, then don't add a new (duplicate) entry. + if (samePath) { + addNew = false; + break; + } + } + + if (addNew) { + candidates.rules[rwst.ruleIndex] = { + .startTokenIndex = rwst.startTokenIndex, + .ruleList = path, + }; + if (showDebugOutput) { + std::cout << "=====> collected: " << ruleNames[rwst.ruleIndex] << "\n"; + } + } + + return true; + } + + return false; } - - /** - * This method follows the given transition and collects all symbols within the same rule that directly follow it - * without intermediate transitions to other rules and only if there is a single symbol for a transition. + * This method follows the given transition and collects all symbols within the + * same rule that directly follow it without intermediate transitions to other + * rules and only if there is a single symbol for a transition. * * @param transition The transition from which to start. * @returns A list of toke types. */ -std::vector CodeCompletionCore::getFollowingTokens(const antlr4::atn::Transition * transition) -{ - std::vector result = {}; - - std::vector pipeline = { transition->target }; - - while (pipeline.size() > 0) { - antlr4::atn::ATNState * state = pipeline.back(); - pipeline.pop_back(); - - if (state) { - for (antlr4::atn::ConstTransitionPtr& outgoing: state->transitions) { - if (outgoing->getTransitionType() == antlr4::atn::TransitionType::ATOM) { - if (!outgoing->isEpsilon()) { - std::vector list = outgoing->label().toList(); - if (list.size() == 1 && !ignoredTokens.contains(list[0])) { - result.push_back(list[0]); - pipeline.push_back(outgoing->target); - } - } else { - pipeline.push_back(outgoing->target); - } - } - } - } - } - - return result; +std::vector CodeCompletionCore::getFollowingTokens(const antlr4::atn::Transition* transition +) const { + std::vector result = {}; + + std::vector pipeline = {transition->target}; + + while (!pipeline.empty()) { + antlr4::atn::ATNState* state = pipeline.back(); + pipeline.pop_back(); + + if (state != nullptr) { + for (const antlr4::atn::ConstTransitionPtr& outgoing : state->transitions) { + if (outgoing->getTransitionType() == antlr4::atn::TransitionType::ATOM) { + if (!outgoing->isEpsilon()) { + std::vector list = outgoing->label().toList(); + if (list.size() == 1 && !ignoredTokens.contains(list[0])) { + result.push_back(list[0]); + pipeline.push_back(outgoing->target); + } + } else { + pipeline.push_back(outgoing->target); + } + } + } + } + } + + return result; } - - /** * Entry point for the recursive follow set collection function. * @@ -319,504 +331,558 @@ std::vector CodeCompletionCore::getFollowingTokens(const antlr4::atn::Tr * @param stop Stop state. * @returns Follow sets. */ -FollowSetsHolder CodeCompletionCore::determineFollowSets(antlr4::atn::ATNState * start, antlr4::atn::ATNState * stop) -{ - std::vector sets = {}; - std::vector stateStack = {}; - std::vector ruleStack = {}; - bool isExhaustive = collectFollowSets(start, stop, sets, stateStack, ruleStack); - - // Sets are split by path to allow translating them to preferred rules. But for quick hit tests - // it is also useful to have a set with all symbols combined. - antlr4::misc::IntervalSet combined; - for (auto set: sets) { - combined.addAll(set.intervals); - } - - return { - .sets = sets, - .combined = combined, - .isExhaustive = isExhaustive, - }; +FollowSetsHolder CodeCompletionCore::determineFollowSets( + antlr4::atn::ATNState* start, antlr4::atn::ATNState* stop +) { + std::vector sets = {}; + std::vector stateStack = {}; + std::vector ruleStack = {}; + const bool isExhaustive = collectFollowSets(start, stop, sets, stateStack, ruleStack); + + // Sets are split by path to allow translating them to preferred rules. But + // for quick hit tests it is also useful to have a set with all symbols + // combined. + antlr4::misc::IntervalSet combined; + for (const auto& set : sets) { + combined.addAll(set.intervals); + } + + return { + .sets = sets, + .combined = combined, + .isExhaustive = isExhaustive, + }; } - - /** - * Collects possible tokens which could be matched following the given ATN state. This is essentially the same - * algorithm as used in the LL1Analyzer class, but here we consider predicates also and use no parser rule context. + * Collects possible tokens which could be matched following the given ATN + * state. This is essentially the same algorithm as used in the LL1Analyzer + * class, but here we consider predicates also and use no parser rule context. * * @param s The state to continue from. * @param stopState The state which ends the collection routine. * @param followSets A pass through parameter to add found sets to. * @param stateStack A stack to avoid endless recursions. * @param ruleStack The current rule stack. - * @returns true if the follow sets is exhaustive, i.e. we terminated before the rule end was reached, so no - * subsequent rules could add tokens + * @returns true if the follow sets is exhaustive, i.e. we terminated before the + * rule end was reached, so no subsequent rules could add tokens */ -bool CodeCompletionCore::collectFollowSets(antlr4::atn::ATNState * s, antlr4::atn::ATNState * stopState, std::vector& followSets, std::vector& stateStack, std::vector& ruleStack) -{ - if (std::find(stateStack.begin(), stateStack.end(), s) != stateStack.end()) { - return true; - } - stateStack.push_back(s); - - if (s == stopState || s->getStateType() == antlr4::atn::ATNStateType::RULE_STOP) { - stateStack.pop_back(); - return false; - } - - bool isExhaustive = true; - for (antlr4::atn::ConstTransitionPtr& tp: s->transitions) { - const antlr4::atn::Transition * transition = tp.get(); - - if (transition->getTransitionType() == antlr4::atn::TransitionType::RULE) { - const antlr4::atn::RuleTransition * ruleTransition = static_cast(transition); - - if (std::find(ruleStack.begin(), ruleStack.end(), ruleTransition->target->ruleIndex) != ruleStack.end()) { - continue; - } - - ruleStack.push_back(ruleTransition->target->ruleIndex); - - bool ruleFollowSetsIsExhaustive = collectFollowSets(transition->target, stopState, followSets, stateStack, ruleStack); - ruleStack.pop_back(); - - // If the subrule had an epsilon transition to the rule end, the tokens added to - // the follow set are non-exhaustive and we should continue processing subsequent transitions post-rule - if (!ruleFollowSetsIsExhaustive) { - bool nextStateFollowSetsIsExhaustive = collectFollowSets(ruleTransition->followState, stopState, followSets, stateStack, ruleStack); - isExhaustive = isExhaustive && nextStateFollowSetsIsExhaustive; - } - - } else if (transition->getTransitionType() == antlr4::atn::TransitionType::PREDICATE) { - if (checkPredicate(static_cast(transition))) { - bool nextStateFollowSetsIsExhaustive = collectFollowSets(transition->target, stopState, followSets, stateStack, ruleStack); - isExhaustive = isExhaustive && nextStateFollowSetsIsExhaustive; - } - } else if (transition->isEpsilon()) { - bool nextStateFollowSetsIsExhaustive = collectFollowSets(transition->target, stopState, followSets, stateStack, ruleStack); - isExhaustive = isExhaustive && nextStateFollowSetsIsExhaustive; - } else if (transition->getTransitionType() == antlr4::atn::TransitionType::WILDCARD) { - FollowSetWithPath set; - set.intervals = antlr4::misc::IntervalSet::of(antlr4::Token::MIN_USER_TOKEN_TYPE, atn.maxTokenType); - set.path = ruleStack; - followSets.push_back(set); - } else { - antlr4::misc::IntervalSet label = transition->label(); - if (label.size() > 0) { - if (transition->getTransitionType() == antlr4::atn::TransitionType::NOT_SET) { - label = label.complement(antlr4::misc::IntervalSet::of(antlr4::Token::MIN_USER_TOKEN_TYPE, atn.maxTokenType)); - } - FollowSetWithPath set; - set.intervals = label; - set.path = ruleStack; - set.following = getFollowingTokens(transition); - followSets.push_back(set); - } - } - } - stateStack.pop_back(); - - return isExhaustive; +bool CodeCompletionCore::collectFollowSets( // NOLINT + antlr4::atn::ATNState* state, + antlr4::atn::ATNState* stopState, + std::vector& followSets, + std::vector& stateStack, + std::vector& ruleStack +) { + if (std::find(stateStack.begin(), stateStack.end(), state) != stateStack.end()) { + return true; + } + + stateStack.push_back(state); + + if (state == stopState || state->getStateType() == antlr4::atn::ATNStateType::RULE_STOP) { + stateStack.pop_back(); + return false; + } + + bool isExhaustive = true; + for (const antlr4::atn::ConstTransitionPtr& transitionPtr : state->transitions) { + const antlr4::atn::Transition* transition = transitionPtr.get(); + + if (transition->getTransitionType() == antlr4::atn::TransitionType::RULE) { + const auto* ruleTransition = dynamic_cast(transition); + + if (std::find(ruleStack.begin(), ruleStack.end(), ruleTransition->target->ruleIndex) != + ruleStack.end()) { + continue; + } + + ruleStack.push_back(ruleTransition->target->ruleIndex); + + const bool ruleFollowSetsIsExhaustive = + collectFollowSets(transition->target, stopState, followSets, stateStack, ruleStack); + ruleStack.pop_back(); + + // If the subrule had an epsilon transition to the rule end, the tokens + // added to the follow set are non-exhaustive and we should continue + // processing subsequent transitions post-rule + if (!ruleFollowSetsIsExhaustive) { + const bool nextStateFollowSetsIsExhaustive = collectFollowSets( + ruleTransition->followState, stopState, followSets, stateStack, ruleStack + ); + isExhaustive = isExhaustive && nextStateFollowSetsIsExhaustive; + } + + } else if (transition->getTransitionType() == antlr4::atn::TransitionType::PREDICATE) { + if (checkPredicate(dynamic_cast(transition))) { + const bool nextStateFollowSetsIsExhaustive = + collectFollowSets(transition->target, stopState, followSets, stateStack, ruleStack); + isExhaustive = isExhaustive && nextStateFollowSetsIsExhaustive; + } + } else if (transition->isEpsilon()) { + const bool nextStateFollowSetsIsExhaustive = + collectFollowSets(transition->target, stopState, followSets, stateStack, ruleStack); + isExhaustive = isExhaustive && nextStateFollowSetsIsExhaustive; + } else if (transition->getTransitionType() == antlr4::atn::TransitionType::WILDCARD) { + FollowSetWithPath set; + set.intervals = antlr4::misc::IntervalSet::of( + antlr4::Token::MIN_USER_TOKEN_TYPE, static_cast(atn.maxTokenType) + ); + set.path = ruleStack; + followSets.push_back(set); + } else { + antlr4::misc::IntervalSet label = transition->label(); + if (label.size() > 0) { + if (transition->getTransitionType() == antlr4::atn::TransitionType::NOT_SET) { + label = label.complement(antlr4::misc::IntervalSet::of( + antlr4::Token::MIN_USER_TOKEN_TYPE, static_cast(atn.maxTokenType) + )); + } + FollowSetWithPath set; + set.intervals = label; + set.path = ruleStack; + set.following = getFollowingTokens(transition); + followSets.push_back(set); + } + } + } + stateStack.pop_back(); + + return isExhaustive; } - - /** - * Walks the ATN for a single rule only. It returns the token stream position for each path that could be matched - * in this rule. - * The result can be empty in case we hit only non-epsilon transitions that didn't match the current input or if we - * hit the caret position. + * Walks the ATN for a single rule only. It returns the token stream position + * for each path that could be matched in this rule. The result can be empty in + * case we hit only non-epsilon transitions that didn't match the current input + * or if we hit the caret position. * * @param startState The start state. * @param tokenListIndex The token index we are currently at. * @param callStack The stack that indicates where in the ATN we are currently. * @param precedence The current precedence level. - * @param indentation A value to determine the current indentation when doing debug prints. - * @returns the set of token stream indexes (which depend on the ways that had to be taken). + * @param indentation A value to determine the current indentation when doing + * debug prints. + * @returns the set of token stream indexes (which depend on the ways that had + * to be taken). */ -RuleEndStatus CodeCompletionCore::processRule(antlr4::atn::RuleStartState * startState, size_t tokenListIndex, RuleWithStartTokenList& callStack, int precedence, size_t indentation, bool& cancelled) -{ - // Cancelled by external caller? - if (cancel && cancel->load()) { - cancelled = true; - return {}; - } - - // Check for timeout - cancelled = false; - if (timeoutMS > 0) { - std::chrono::duration timeout(timeoutMS); - if (std::chrono::steady_clock::now() - timeoutStart > timeout) { - cancelled = true; - return {}; - } - } - - - // Start with rule specific handling before going into the ATN walk. - - // Check first if we've taken this path with the same input before. - std::map positionMap; - if (!shortcutMap.contains(startState->ruleIndex)) { - shortcutMap[startState->ruleIndex] = positionMap; - } else { - positionMap = shortcutMap[startState->ruleIndex]; - if (positionMap.contains(tokenListIndex)) { - if (showDebugOutput) { - std::cout << "=====> shortcut" << "\n"; - } - - return positionMap[tokenListIndex]; - } - } - - RuleEndStatus result; - - // For rule start states we determine and cache the follow set, which gives us 3 advantages: - // 1) We can quickly check if a symbol would be matched when we follow that rule. We can so check in advance - // and can save us all the intermediate steps if there is no match. - // 2) We'll have all symbols that are collectable already together when we are at the caret on rule enter. - // 3) We get this lookup for free with any 2nd or further visit of the same rule, which often happens - // in non trivial grammars, especially with (recursive) expressions and of course when invoking code - // completion multiple times. - - if (!followSetsByATN.contains(typeid(parser))) { - followSetsByATN[typeid(parser)] = FollowSetsPerState(); - } - - FollowSetsPerState &setsPerState = followSetsByATN[typeid(parser)]; - if (!setsPerState.contains(startState->stateNumber)) { - auto stop = atn.ruleToStopState[startState->ruleIndex]; - auto followSets = determineFollowSets(startState, stop); - setsPerState[startState->stateNumber] = followSets; - } - FollowSetsHolder followSets = setsPerState[startState->stateNumber]; - - - // Get the token index where our rule starts from our (possibly filtered) token list - size_t startTokenIndex = tokens[tokenListIndex]->getTokenIndex(); - - callStack.push_back({ - .startTokenIndex = startTokenIndex, - .ruleIndex = startState->ruleIndex, - }); - - if (tokenListIndex >= tokens.size() - 1) { // At caret? - if (preferredRules.contains(startState->ruleIndex)) { - // No need to go deeper when collecting entries and we reach a rule that we want to collect anyway. - translateStackToRuleIndex(callStack); - } else { - // Convert all follow sets to either single symbols or their associated preferred rule and add - // the result to our candidates list. - for (FollowSetWithPath &set: followSets.sets) { - RuleWithStartTokenList fullPath = callStack; - - // Rules derived from our followSet will always start at the same token as our current rule. - RuleWithStartTokenList followSetPath; - for (size_t rule: set.path) { - followSetPath.push_back({ - .startTokenIndex = startTokenIndex, - .ruleIndex = rule, - }); - } - - fullPath.insert(fullPath.end(), followSetPath.begin(), followSetPath.end()); - - if (!translateStackToRuleIndex(fullPath)) { - for (ssize_t symbol: set.intervals.toList()) { - if (!ignoredTokens.contains((size_t)symbol)) { - - if (showDebugOutput) { - std::cout << "=====> collected: " << vocabulary.getDisplayName(symbol) << "\n"; - } - if (!candidates.tokens.contains(symbol)) { - // Following is empty if there is more than one entry in the set. - candidates.tokens[symbol] = set.following; - } else { - // More than one following list for the same symbol. - if (candidates.tokens[symbol] != set.following) { - candidates.tokens[symbol] = {}; - } - } - - } - } - } - } - } - - if (!followSets.isExhaustive) { - // If we're at the caret but the follow sets is non-exhaustive (empty or all tokens are optional), - // we should continue to collect tokens following this rule - result.insert(tokenListIndex); - } - - callStack.pop_back(); - - return result; - - } else { - // Process the rule if we either could pass it without consuming anything (epsilon transition) - // or if the current input symbol will be matched somewhere after this entry point. - // Otherwise stop here. - size_t currentSymbol = tokens[tokenListIndex]->getType(); - if (followSets.isExhaustive && !followSets.combined.contains(currentSymbol)) { - callStack.pop_back(); - - return result; - } - } - - if (startState->isLeftRecursiveRule) { - precedenceStack.push_back(precedence); - } - - // The current state execution pipeline contains all yet-to-be-processed ATN states in this rule. - // For each such state we store the token index + a list of rules that lead to it. - std::vector statePipeline; - - // Bootstrap the pipeline. - statePipeline.push_back({ - .state = startState, - .tokenListIndex = tokenListIndex - }); - - while (statePipeline.size() > 0) { - if (cancel && cancel->load()) { - cancelled = true; - return {}; - } - - - PipelineEntry currentEntry = statePipeline.back(); - statePipeline.pop_back(); - ++statesProcessed; - - size_t currentSymbol = tokens[currentEntry.tokenListIndex]->getType(); - - bool atCaret = currentEntry.tokenListIndex >= tokens.size() - 1; - if (showDebugOutput) { - printDescription(indentation, currentEntry.state, generateBaseDescription(currentEntry.state), currentEntry.tokenListIndex); - if (showRuleStack) { - printRuleState(callStack); - } - } - - if (currentEntry.state->getStateType() == antlr4::atn::ATNStateType::RULE_STOP) { - // Record the token index we are at, to report it to the caller. - result.insert(currentEntry.tokenListIndex); - continue; - } - - - // We simulate here the same precedence handling as the parser does, which uses hard coded values. - // For rules that are not left recursive this value is ignored (since there is no precedence transition). - for (antlr4::atn::ConstTransitionPtr& transition: currentEntry.state->transitions) { - - switch (transition->getTransitionType()) { - case antlr4::atn::TransitionType::RULE: { - const atn::RuleTransition * ruleTransition = static_cast(transition.get()); - atn::RuleStartState * ruleStartState = static_cast(ruleTransition->target); - bool innerCancelled = false; - RuleEndStatus endStatus = processRule(ruleStartState, currentEntry.tokenListIndex, callStack, ruleTransition->precedence, indentation + 1, innerCancelled); - if (innerCancelled) { - cancelled = true; - return {}; - } - - for (size_t position: endStatus) { - statePipeline.push_back({ - .state = ruleTransition->followState, - .tokenListIndex = position, - }); - } - break; - } - - case antlr4::atn::TransitionType::PREDICATE: { - const atn::PredicateTransition * predTransition = static_cast(transition.get()); - if (checkPredicate(predTransition)) { - statePipeline.push_back({ - .state = transition->target, - .tokenListIndex = currentEntry.tokenListIndex, - }); - } - break; - } - - case antlr4::atn::TransitionType::PRECEDENCE: { - const atn::PrecedencePredicateTransition * predTransition = static_cast(transition.get()); - if (predTransition->getPrecedence() >= precedenceStack[precedenceStack.size() - 1]) { - statePipeline.push_back({ - .state = transition->target, - .tokenListIndex = currentEntry.tokenListIndex, - }); - } - - break; - } - - case antlr4::atn::TransitionType::WILDCARD: { - if (atCaret) { - if (!translateStackToRuleIndex(callStack)) { - for (auto token: antlr4::misc::IntervalSet::of(antlr4::Token::MIN_USER_TOKEN_TYPE, atn.maxTokenType).toList()) { - if (!ignoredTokens.contains(token)) { - candidates.tokens[token] = {}; - } - } - } - } else { - statePipeline.push_back({ - .state = transition->target, - .tokenListIndex = currentEntry.tokenListIndex + 1, - }); - } - break; - } - - default: { - if (transition->isEpsilon()) { - // Jump over simple states with a single outgoing epsilon transition. - statePipeline.push_back({ - .state = transition->target, - .tokenListIndex = currentEntry.tokenListIndex, - }); - continue; - } - - antlr4::misc::IntervalSet set = transition->label(); - if (set.size() > 0) { - if (transition->getTransitionType() == antlr4::atn::TransitionType::NOT_SET) { - set = set.complement(antlr4::misc::IntervalSet::of(antlr4::Token::MIN_USER_TOKEN_TYPE, atn.maxTokenType)); - } - if (atCaret) { - if (!translateStackToRuleIndex(callStack)) { - std::vector list = set.toList(); - bool hasTokenSequence = list.size() == 1; - for (size_t symbol: list) { - if (!ignoredTokens.contains(symbol)) { - if (showDebugOutput) { - std::cout << "=====> collected: " << vocabulary.getDisplayName(symbol) << "\n"; - } - - std::vector followingTokens; - if (hasTokenSequence) { - followingTokens = getFollowingTokens(transition.get()); - } - if (!candidates.tokens.contains(symbol)) { - candidates.tokens[symbol] = followingTokens; - } else { - candidates.tokens[symbol] = longestCommonPrefix(followingTokens, candidates.tokens[symbol]); - } - } - } - } - } else { - if (set.contains(currentSymbol)) { - if (showDebugOutput) { - std::cout << "=====> consumed: " << vocabulary.getDisplayName(currentSymbol) << "\n"; - } - statePipeline.push_back({ - .state = transition->target, - .tokenListIndex = currentEntry.tokenListIndex + 1, - }); - } - } - } - } - } - } - } - - callStack.pop_back(); - - if (startState->isLeftRecursiveRule) { - precedenceStack.pop_back(); - } - - // Cache the result, for later lookup to avoid duplicate walks. - positionMap[tokenListIndex] = result; - - return result; +RuleEndStatus CodeCompletionCore::processRule( // NOLINT + antlr4::atn::RuleStartState* startState, + size_t tokenListIndex, + RuleWithStartTokenList& callStack, + int precedence, // NOLINT + size_t indentation, // NOLINT + bool& timedOut +) { + // Cancelled by external caller? + if (cancel != nullptr && cancel->load()) { + timedOut = true; + return {}; + } + + // Check for timeout + timedOut = false; + if (timeoutMS > 0) { + const std::chrono::duration timeout(timeoutMS); + if (std::chrono::steady_clock::now() - timeoutStart > timeout) { + timedOut = true; + return {}; + } + } + + // Start with rule specific handling before going into the ATN walk. + + // Check first if we've taken this path with the same input before. + std::map positionMap; + if (!shortcutMap.contains(startState->ruleIndex)) { + shortcutMap[startState->ruleIndex] = positionMap; + } else { + positionMap = shortcutMap[startState->ruleIndex]; + if (positionMap.contains(tokenListIndex)) { + if (showDebugOutput) { + std::cout << "=====> shortcut" << "\n"; + } + + return positionMap[tokenListIndex]; + } + } + + RuleEndStatus result; + + // For rule start states we determine and cache the follow set, which gives us + // 3 advantages: 1) We can quickly check if a symbol would be matched when we + // follow that rule. We can so check in advance + // and can save us all the intermediate steps if there is no match. + // 2) We'll have all symbols that are collectable already together when we are + // at the caret on rule enter. 3) We get this lookup for free with any 2nd or + // further visit of the same rule, which often happens + // in non trivial grammars, especially with (recursive) expressions and of + // course when invoking code completion multiple times. + + if (!followSetsByATN.contains(typeid(parser))) { + followSetsByATN[typeid(parser)] = FollowSetsPerState(); + } + + FollowSetsPerState& setsPerState = followSetsByATN[typeid(parser)]; + if (!setsPerState.contains(startState->stateNumber)) { + antlr4::atn::RuleStopState* stop = atn.ruleToStopState[startState->ruleIndex]; + auto followSets = determineFollowSets(startState, stop); + setsPerState[startState->stateNumber] = followSets; + } + const FollowSetsHolder followSets = setsPerState[startState->stateNumber]; + + // Get the token index where our rule starts from our (possibly filtered) + // token list + const size_t startTokenIndex = tokens[tokenListIndex]->getTokenIndex(); + + callStack.push_back({ + .startTokenIndex = startTokenIndex, + .ruleIndex = startState->ruleIndex, + }); + + if (tokenListIndex >= tokens.size() - 1) { // At caret? + if (preferredRules.contains(startState->ruleIndex)) { + // No need to go deeper when collecting entries and we reach a rule that + // we want to collect anyway. + translateStackToRuleIndex(callStack); + } else { + // Convert all follow sets to either single symbols or their associated + // preferred rule and add the result to our candidates list. + for (const FollowSetWithPath& set : followSets.sets) { + RuleWithStartTokenList fullPath = callStack; + + // Rules derived from our followSet will always start at the same token + // as our current rule. + RuleWithStartTokenList followSetPath; + for (const size_t rule : set.path) { + followSetPath.push_back({ + .startTokenIndex = startTokenIndex, + .ruleIndex = rule, + }); + } + + fullPath.insert(fullPath.end(), followSetPath.begin(), followSetPath.end()); + + if (!translateStackToRuleIndex(fullPath)) { + for (ptrdiff_t symbol : set.intervals.toList()) { + if (!ignoredTokens.contains(static_cast(symbol))) { + if (showDebugOutput) { + std::cout << "=====> collected: " << vocabulary.getDisplayName(symbol) << "\n"; + } + if (!candidates.tokens.contains(symbol)) { + // Following is empty if there is more than one entry in the + // set. + candidates.tokens[symbol] = set.following; + } else { + // More than one following list for the same symbol. + if (candidates.tokens[symbol] != set.following) { + candidates.tokens[symbol] = {}; + } + } + } + } + } + } + } + + if (!followSets.isExhaustive) { + // If we're at the caret but the follow sets is non-exhaustive (empty or + // all tokens are optional), we should continue to collect tokens + // following this rule + result.insert(tokenListIndex); + } + + callStack.pop_back(); + + return result; + } + + // Process the rule if we either could pass it without consuming anything + // (epsilon transition) or if the current input symbol will be matched + // somewhere after this entry point. Otherwise stop here. + const size_t currentSymbol = tokens[tokenListIndex]->getType(); + if (followSets.isExhaustive && !followSets.combined.contains(currentSymbol)) { + callStack.pop_back(); + + return result; + } + + if (startState->isLeftRecursiveRule) { + precedenceStack.push_back(precedence); + } + + // The current state execution pipeline contains all yet-to-be-processed ATN + // states in this rule. For each such state we store the token index + a list + // of rules that lead to it. + std::vector statePipeline; + + // Bootstrap the pipeline. + statePipeline.push_back({.state = startState, .tokenListIndex = tokenListIndex}); + + while (!statePipeline.empty()) { + if (cancel != nullptr && cancel->load()) { + timedOut = true; + return {}; + } + + const PipelineEntry currentEntry = statePipeline.back(); + statePipeline.pop_back(); + ++statesProcessed; + + const size_t currentSymbol = tokens[currentEntry.tokenListIndex]->getType(); + + const bool atCaret = currentEntry.tokenListIndex >= tokens.size() - 1; + if (showDebugOutput) { + printDescription( + indentation, + currentEntry.state, + generateBaseDescription(currentEntry.state), + currentEntry.tokenListIndex + ); + if (showRuleStack) { + printRuleState(callStack); + } + } + + if (currentEntry.state->getStateType() == antlr4::atn::ATNStateType::RULE_STOP) { + // Record the token index we are at, to report it to the caller. + result.insert(currentEntry.tokenListIndex); + continue; + } + + // We simulate here the same precedence handling as the parser does, which + // uses hard coded values. For rules that are not left recursive this value + // is ignored (since there is no precedence transition). + for (const antlr4::atn::ConstTransitionPtr& transition : currentEntry.state->transitions) { + switch (transition->getTransitionType()) { + case antlr4::atn::TransitionType::RULE: { + const auto* ruleTransition = + dynamic_cast(transition.get()); + auto* ruleStartState = dynamic_cast(ruleTransition->target); + bool innerCancelled = false; + const RuleEndStatus endStatus = processRule( + ruleStartState, + currentEntry.tokenListIndex, + callStack, + ruleTransition->precedence, + indentation + 1, + innerCancelled + ); + if (innerCancelled) { + timedOut = true; + return {}; + } + + for (const size_t position : endStatus) { + statePipeline.push_back({ + .state = ruleTransition->followState, + .tokenListIndex = position, + }); + } + break; + } + + case antlr4::atn::TransitionType::PREDICATE: { + const auto* predTransition = + dynamic_cast(transition.get()); + if (checkPredicate(predTransition)) { + statePipeline.push_back({ + .state = transition->target, + .tokenListIndex = currentEntry.tokenListIndex, + }); + } + break; + } + + case antlr4::atn::TransitionType::PRECEDENCE: { + const auto* predTransition = + dynamic_cast(transition.get()); + if (predTransition->getPrecedence() >= precedenceStack[precedenceStack.size() - 1]) { + statePipeline.push_back({ + .state = transition->target, + .tokenListIndex = currentEntry.tokenListIndex, + }); + } + + break; + } + + case antlr4::atn::TransitionType::WILDCARD: { + if (atCaret) { + if (!translateStackToRuleIndex(callStack)) { + const auto tokens = antlr4::misc::IntervalSet::of( + antlr4::Token::MIN_USER_TOKEN_TYPE, static_cast(atn.maxTokenType) + ); + for (auto token : tokens.toList()) { + if (!ignoredTokens.contains(token)) { + candidates.tokens[token] = {}; + } + } + } + } else { + statePipeline.push_back({ + .state = transition->target, + .tokenListIndex = currentEntry.tokenListIndex + 1, + }); + } + break; + } + + default: { + if (transition->isEpsilon()) { + // Jump over simple states with a single outgoing epsilon + // transition. + statePipeline.push_back({ + .state = transition->target, + .tokenListIndex = currentEntry.tokenListIndex, + }); + continue; + } + + antlr4::misc::IntervalSet set = transition->label(); + if (set.size() > 0) { + if (transition->getTransitionType() == antlr4::atn::TransitionType::NOT_SET) { + set = set.complement(antlr4::misc::IntervalSet::of( + antlr4::Token::MIN_USER_TOKEN_TYPE, static_cast(atn.maxTokenType) + )); + } + if (atCaret) { + if (!translateStackToRuleIndex(callStack)) { + const std::vector list = set.toList(); + const bool hasTokenSequence = list.size() == 1; + for (const size_t symbol : list) { + if (!ignoredTokens.contains(symbol)) { + if (showDebugOutput) { + std::cout << "=====> collected: " << vocabulary.getDisplayName(symbol) + << "\n"; + } + + std::vector followingTokens; + if (hasTokenSequence) { + followingTokens = getFollowingTokens(transition.get()); + } + if (!candidates.tokens.contains(symbol)) { + candidates.tokens[symbol] = followingTokens; + } else { + candidates.tokens[symbol] = + longestCommonPrefix(followingTokens, candidates.tokens[symbol]); + } + } + } + } + } else { + if (set.contains(currentSymbol)) { + if (showDebugOutput) { + std::cout << "=====> consumed: " << vocabulary.getDisplayName(currentSymbol) + << "\n"; + } + statePipeline.push_back({ + .state = transition->target, + .tokenListIndex = currentEntry.tokenListIndex + 1, + }); + } + } + } + } + } + } + } + + callStack.pop_back(); + + if (startState->isLeftRecursiveRule) { + precedenceStack.pop_back(); + } + + // Cache the result, for later lookup to avoid duplicate walks. + positionMap[tokenListIndex] = result; + + return result; } - - // ---------------------------------------------------------------------------- // MARK: - Debug // ---------------------------------------------------------------------------- -std::string CodeCompletionCore::generateBaseDescription(antlr4::atn::ATNState * state) -{ - std::string stateValue = (state->stateNumber == atn::ATNState::INVALID_STATE_NUMBER) ? "Invalid" : std::to_string(state->stateNumber); - std::stringstream output; - - output << "[" << stateValue << " " << atnStateTypeMap[(size_t)state->getStateType()] << "]"; - output << " in "; - output << ruleNames[state->ruleIndex]; - return output.str(); +std::string CodeCompletionCore::generateBaseDescription(antlr4::atn::ATNState* state) { + const std::string stateValue = (state->stateNumber == antlr4::atn::ATNState::INVALID_STATE_NUMBER) + ? "Invalid" + : std::to_string(state->stateNumber); + std::stringstream output; + + output << "[" << stateValue << " " << atnStateTypeMap[static_cast(state->getStateType())] + << "]"; + output << " in "; + output << ruleNames[state->ruleIndex]; + return output.str(); } - -void CodeCompletionCore::printDescription(size_t indentation, antlr4::atn::ATNState * state, std::string const& baseDescription, size_t tokenIndex) -{ - std::string indent = std::string(indentation * 2, ' '); - std::string output = ""; - std::string transitionDescription = ""; - - if (debugOutputWithTransitions) { - for (antlr4::atn::ConstTransitionPtr& transition: state->transitions) { - - std::string labels = ""; - std::vector symbols = transition->label().toList(); - - if (symbols.size() > 2) { - // Only print start and end symbols to avoid large lists in debug output. - labels = vocabulary.getDisplayName((size_t)symbols[0]) + " .. " + vocabulary.getDisplayName((size_t)symbols[symbols.size() - 1]); - } else { - for (size_t symbol: symbols) { - if (labels.size() > 0) { - labels += ", "; - } - labels += vocabulary.getDisplayName(symbol); - } - } - if (labels.size() == 0) { - labels = "ε"; - } - - transitionDescription += "\n" + indent + "\t(" + labels + ") " + - "[" + std::to_string(transition->target->stateNumber) + " " + atnStateTypeMap[(size_t)transition->target->getStateType()] + "]" - " in " + ruleNames[transition->target->ruleIndex]; - } - } - - if (tokenIndex >= tokens.size() - 1) { - output = "<<" + std::to_string(tokenStartIndex + tokenIndex) + ">> "; - } else { - output = "<" + std::to_string(tokenStartIndex + tokenIndex) + "> "; - } - - std::cout << indent + output + "Current state: " + baseDescription + transitionDescription << "\n"; +void CodeCompletionCore::printDescription( + size_t indentation, + antlr4::atn::ATNState* state, + std::string const& baseDescription, + size_t tokenIndex +) { + const std::string indent = std::string(indentation * 2, ' '); + std::string output; + std::string transitionDescription; + + if (debugOutputWithTransitions) { + for (const antlr4::atn::ConstTransitionPtr& transition : state->transitions) { + std::string labels; + std::vector symbols = transition->label().toList(); + + if (symbols.size() > 2) { + // Only print start and end symbols to avoid large lists in debug + // output. + labels = vocabulary.getDisplayName(static_cast(symbols[0])) + " .. " + + vocabulary.getDisplayName(static_cast(symbols[symbols.size() - 1])); + } else { + for (const size_t symbol : symbols) { + if (!labels.empty()) { + labels += ", "; + } + labels += vocabulary.getDisplayName(symbol); + } + } + if (labels.empty()) { + labels = "ε"; + } + + transitionDescription += "\n"; + transitionDescription += indent; + transitionDescription += "\t("; + transitionDescription += labels; + transitionDescription += ") ["; + transitionDescription += std::to_string(transition->target->stateNumber); + transitionDescription += " "; + transitionDescription += + atnStateTypeMap[static_cast(transition->target->getStateType())]; + transitionDescription += "] in "; + transitionDescription += ruleNames[transition->target->ruleIndex]; + } + } + + if (tokenIndex >= tokens.size() - 1) { + output = "<<" + std::to_string(tokenStartIndex + tokenIndex) + ">> "; + } else { + output = "<" + std::to_string(tokenStartIndex + tokenIndex) + "> "; + } + + std::cout << indent + output + "Current state: " + baseDescription + transitionDescription + << "\n"; } +void CodeCompletionCore::printRuleState(RuleWithStartTokenList const& stack) { + if (stack.empty()) { + std::cout << "\n"; + return; + } -void CodeCompletionCore::printRuleState(RuleWithStartTokenList const& stack) -{ - if (stack.size() == 0) { - std::cout << "\n"; - return; - } - - if (stack.size() > 0) { - for (RuleWithStartToken rule: stack) { - std::cout << ruleNames[rule.ruleIndex]; - } - std::cout << "\n"; - } + for (const RuleWithStartToken rule : stack) { + std::cout << ruleNames[rule.ruleIndex]; + } + std::cout << "\n"; } - -} // namespace c3; +} // namespace c3 diff --git a/ports/cpp/source/antlr4-c3/CodeCompletionCore.hpp b/ports/cpp/source/antlr4-c3/CodeCompletionCore.hpp index 0792085..c7da799 100644 --- a/ports/cpp/source/antlr4-c3/CodeCompletionCore.hpp +++ b/ports/cpp/source/antlr4-c3/CodeCompletionCore.hpp @@ -1,234 +1,262 @@ // // CodeCompletionCore.hpp // -// C++ port of antlr4-c3 (TypeScript) by Mike Lischke +// C++ port of antlr4-c3 (TypeScript) by Mike Lischke // Licensed under the MIT License. -// +// #ifndef CodeCompletionCore_hpp #define CodeCompletionCore_hpp -#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include #include -#include #include -#include #include -#include -#include "antlr4-runtime.h" - +#include +#include // ---------------------------------------------------------------------------- // Supporting Types // ---------------------------------------------------------------------------- namespace c3 { + using TokenList = std::vector; using RuleList = std::vector; - struct RuleWithStartToken { - size_t startTokenIndex; - size_t ruleIndex; + size_t startTokenIndex; + size_t ruleIndex; }; using RuleWithStartTokenList = std::vector; - /** * A record for a follow set along with the path at which this set was found. - * If there is only a single symbol in the interval set then we also collect and store tokens which follow - * this symbol directly in its rule (i.e. there is no intermediate rule transition). Only single label transitions - * are considered. This is useful if you have a chain of tokens which can be suggested as a whole, because there is - * a fixed sequence in the grammar. + * If there is only a single symbol in the interval set then we also collect and + * store tokens which follow this symbol directly in its rule (i.e. there is no + * intermediate rule transition). Only single label transitions are considered. + * This is useful if you have a chain of tokens which can be suggested as a + * whole, because there is a fixed sequence in the grammar. */ struct FollowSetWithPath { - antlr4::misc::IntervalSet intervals; - RuleList path; - TokenList following; + antlr4::misc::IntervalSet intervals; + RuleList path; + TokenList following; }; - - /** - * A list of follow sets (for a given state number) + all of them combined for quick hit tests + whether they are - * exhaustive (false if subsequent yet-unprocessed rules could add further tokens to the follow set, true otherwise). - * This data is static in nature (because the used ATN states are part of a static struct: the ATN). - * Hence it can be shared between all C3 instances, however it depends on the actual parser class (type). + * A list of follow sets (for a given state number) + all of them combined for + * quick hit tests + whether they are exhaustive (false if subsequent + * yet-unprocessed rules could add further tokens to the follow set, true + * otherwise). This data is static in nature (because the used ATN states are + * part of a static struct: the ATN). Hence it can be shared between all C3 + * instances, however it depends on the actual parser class (type). */ struct FollowSetsHolder { - std::vector sets; - antlr4::misc::IntervalSet combined; - bool isExhaustive; + std::vector sets; + antlr4::misc::IntervalSet combined; + bool isExhaustive; }; - using FollowSetsPerState = std::map; - /** Token stream position info after a rule was processed. */ using RuleEndStatus = std::unordered_set; struct PipelineEntry { - antlr4::atn::ATNState * state; - size_t tokenListIndex; + antlr4::atn::ATNState* state; + size_t tokenListIndex; }; - - - // ---------------------------------------------------------------------------- // CandidatesCollection // ---------------------------------------------------------------------------- struct CandidateRule { - size_t startTokenIndex; - RuleList ruleList; + size_t startTokenIndex; + RuleList ruleList; }; - /** * All the candidates which have been found. Tokens and rules are separated. - * – Token entries include a list of tokens that directly follow them (see also the "following" member in the - * FollowSetWithPath class). - * – Rule entries include the index of the starting token within the evaluated rule, along with a call stack of rules - * found during evaluation. - * – cancelled will be true if the collectCandidates() was cancelled or timed out. + * – Token entries include a list of tokens that directly follow them (see also + * the "following" member in the FollowSetWithPath class). – Rule entries + * include the index of the starting token within the evaluated rule, along with + * a call stack of rules found during evaluation. – cancelled will be true if + * the collectCandidates() was cancelled or timed out. */ struct CandidatesCollection { - std::map tokens; - std::map rules; - bool cancelled; + std::map tokens; + std::map rules; + bool cancelled; }; - - // ---------------------------------------------------------------------------- // Code Completion Core // ---------------------------------------------------------------------------- -class CodeCompletionCore -{ +class CodeCompletionCore { public: - - // -------------------------------------------------------- - // Construction - // -------------------------------------------------------- - - CodeCompletionCore(antlr4::Parser * parser); - - - // -------------------------------------------------------- - // Configuration - // -------------------------------------------------------- - - /** - * Tailoring of the result: - * Tokens which should not appear in the candidates set. - */ - std::unordered_set ignoredTokens; - - /** - * Rules which replace any candidate token they contain. - * This allows to return descriptive rules (e.g. className, instead of ID/identifier). - */ - std::unordered_set preferredRules; - - /** - * Specify if preferred rules should translated top-down (higher index rule returns first) or - * bottom-up (lower index rule returns first). - */ - bool translateRulesTopDown = false; - - - - - // -------------------------------------------------------- - // Debugging Options - // -------------------------------------------------------- - // Print human readable ATN state and other info. - - /** Not dependent on showDebugOutput. Prints the collected rules + tokens to terminal. */ - bool showResult = false; - - /** Enables printing ATN state info to terminal. */ - bool showDebugOutput = false; - - /** Only relevant when showDebugOutput is true. Enables transition printing for a state. */ - bool debugOutputWithTransitions = false; - - /** Also depends on showDebugOutput. Enables call stack printing for each rule recursion. */ - bool showRuleStack = false; - - - - // -------------------------------------------------------- - // Usage - // -------------------------------------------------------- - - /** - * This is the main entry point. The caret token index specifies the token stream index for the token which - * currently covers the caret (or any other position you want to get code completion candidates for). - * Optionally you can pass in a parser rule context which limits the ATN walk to only that or called rules. - * This can significantly speed up the retrieval process but might miss some candidates (if they are outside of - * the given context). - * - * @param caretTokenIndex The index of the token at the caret position. - * @param context An option parser rule context to limit the search space. - * @param timeoutMS If non-zero, the number of milliseconds until collecting times out. - * @param cancel If set to a non-NULL atomic boolean, and that boolean value is set to true while the function is executing, then - * collecting candidates will abort as soon as possible. - * @returns The collection of completion candidates. If cancelled or timed out, the returned collection will have its 'cancelled' - * value set to true and the collected candidates may be incomplete. - */ - CandidatesCollection collectCandidates(size_t caretTokenIndex, antlr4::ParserRuleContext * context = nullptr, size_t timeoutMS = 0, std::atomic * cancel = nullptr); - - - - - // -------------------------------------------------------- - // Private - // -------------------------------------------------------- + // -------------------------------------------------------- + // Construction + // -------------------------------------------------------- + + explicit CodeCompletionCore(antlr4::Parser* parser); + + // -------------------------------------------------------- + // Configuration + // -------------------------------------------------------- + + /** + * Tailoring of the result: + * Tokens which should not appear in the candidates set. + */ + std::unordered_set ignoredTokens; // NOLINT: public field + + /** + * Rules which replace any candidate token they contain. + * This allows to return descriptive rules (e.g. className, instead of + * ID/identifier). + */ + std::unordered_set preferredRules; // NOLINT: public field + + /** + * Specify if preferred rules should translated top-down (higher index rule + * returns first) or bottom-up (lower index rule returns first). + */ + bool translateRulesTopDown = false; // NOLINT: public field + + // -------------------------------------------------------- + // Debugging Options + // -------------------------------------------------------- + // Print human readable ATN state and other info. + + /** Not dependent on showDebugOutput. Prints the collected rules + tokens to + * terminal. */ + bool showResult = false; // NOLINT: public field + + /** Enables printing ATN state info to terminal. */ + bool showDebugOutput = false; // NOLINT: public field + + /** Only relevant when showDebugOutput is true. Enables transition printing + * for a state. */ + bool debugOutputWithTransitions = false; // NOLINT: public field + + /** Also depends on showDebugOutput. Enables call stack printing for each rule + * recursion. */ + bool showRuleStack = false; // NOLINT: public field + + // -------------------------------------------------------- + // Usage + // -------------------------------------------------------- + + /** + * This is the main entry point. The caret token index specifies the token + * stream index for the token which currently covers the caret (or any other + * position you want to get code completion candidates for). Optionally you + * can pass in a parser rule context which limits the ATN walk to only that or + * called rules. This can significantly speed up the retrieval process but + * might miss some candidates (if they are outside of the given context). + * + * @param caretTokenIndex The index of the token at the caret position. + * @param context An option parser rule context to limit the search space. + * @param timeoutMS If non-zero, the number of milliseconds until collecting + * times out. + * @param cancel If set to a non-NULL atomic boolean, and that boolean value + * is set to true while the function is executing, then collecting candidates + * will abort as soon as possible. + * @returns The collection of completion candidates. If cancelled or timed + * out, the returned collection will have its 'cancelled' value set to true + * and the collected candidates may be incomplete. + */ + CandidatesCollection collectCandidates( + size_t caretTokenIndex, + antlr4::ParserRuleContext* context = nullptr, + size_t timeoutMS = 0, + std::atomic* cancel = nullptr + ); + + // -------------------------------------------------------- + // Private + // -------------------------------------------------------- private: - - static std::map followSetsByATN; - static std::vector atnStateTypeMap; - - antlr4::Parser * parser; - antlr4::atn::ATN const& atn; - antlr4::dfa::Vocabulary const& vocabulary; - std::vector const& ruleNames; - std::vector tokens; - std::vector precedenceStack; - - size_t tokenStartIndex = 0; - size_t statesProcessed = 0; - - /** - * A mapping of rule index + token stream position to end token positions. - * A rule which has been visited before with the same input position will always produce the same output positions. - */ - std::map> shortcutMap; - - /** The collected candidates (rules and tokens). */ - c3::CandidatesCollection candidates; - size_t timeoutMS; - std::atomic * cancel; - std::chrono::steady_clock::time_point timeoutStart; - - bool checkPredicate(const antlr4::atn::PredicateTransition * transition); - bool translateStackToRuleIndex(RuleWithStartTokenList const& ruleWithStartTokenList); - bool translateToRuleIndex(size_t i, RuleWithStartTokenList const& ruleWithStartTokenList); - std::vector getFollowingTokens(const antlr4::atn::Transition * transition); - FollowSetsHolder determineFollowSets(antlr4::atn::ATNState * start, antlr4::atn::ATNState * stop); - bool collectFollowSets(antlr4::atn::ATNState * s, antlr4::atn::ATNState * stopState, std::vector& followSets, std::vector& stateStack, std::vector& ruleStack); - RuleEndStatus processRule(antlr4::atn::RuleStartState * startState, size_t tokenListIndex, RuleWithStartTokenList& callStack, int precedence, size_t indentation, bool& timedOut); - - std::string generateBaseDescription(antlr4::atn::ATNState * state); - void printDescription(size_t indentation, antlr4::atn::ATNState * state, std::string const& baseDescription, size_t tokenIndex); - void printRuleState(RuleWithStartTokenList const& stack); + static std::map followSetsByATN; + static std::vector atnStateTypeMap; + + antlr4::Parser* parser; + antlr4::atn::ATN const& atn; // NOLINT: reference field + antlr4::dfa::Vocabulary const& vocabulary; // NOLINT: reference field + std::vector const& ruleNames; // NOLINT: reference field + std::vector tokens; + std::vector precedenceStack; + + size_t tokenStartIndex = 0; + size_t statesProcessed = 0; + + /** + * A mapping of rule index + token stream position to end token positions. + * A rule which has been visited before with the same input position will + * always produce the same output positions. + */ + std::map> shortcutMap; + + /** The collected candidates (rules and tokens). */ + c3::CandidatesCollection candidates; + size_t timeoutMS; + std::atomic* cancel; + std::chrono::steady_clock::time_point timeoutStart; + + bool checkPredicate(const antlr4::atn::PredicateTransition* transition); + + bool translateStackToRuleIndex(RuleWithStartTokenList const& ruleWithStartTokenList); + + bool translateToRuleIndex(size_t index, RuleWithStartTokenList const& ruleWithStartTokenList); + + std::vector getFollowingTokens(const antlr4::atn::Transition* transition) const; + + FollowSetsHolder determineFollowSets(antlr4::atn::ATNState* start, antlr4::atn::ATNState* stop); + + bool collectFollowSets( + antlr4::atn::ATNState* state, + antlr4::atn::ATNState* stopState, + std::vector& followSets, + std::vector& stateStack, + std::vector& ruleStack + ); + + RuleEndStatus processRule( + antlr4::atn::RuleStartState* startState, + size_t tokenListIndex, + RuleWithStartTokenList& callStack, + int precedence, + size_t indentation, + bool& timedOut + ); + + std::string generateBaseDescription(antlr4::atn::ATNState* state); + + void printDescription( + size_t indentation, + antlr4::atn::ATNState* state, + std::string const& baseDescription, + size_t tokenIndex + ); + + void printRuleState(RuleWithStartTokenList const& stack); }; -} // namespace c3 +} // namespace c3 #endif /* CodeCompletionCore_hpp */ diff --git a/ports/cpp/test/cpp14/Cpp14Test.cpp b/ports/cpp/test/cpp14/Cpp14Test.cpp index 8af94bf..835ad40 100644 --- a/ports/cpp/test/cpp14/Cpp14Test.cpp +++ b/ports/cpp/test/cpp14/Cpp14Test.cpp @@ -1,11 +1,17 @@ -#include - #include #include +#include +#include -#include - +#include #include +#include +#include +#include +#include +#include +#include +#include namespace c3::test { @@ -14,16 +20,17 @@ struct Cpp14Grammar { using Parser = CPP14Parser; }; -TEST(CPP14Parser, SimpleExample) { +TEST(CPP14Parser, SimpleExample) { // NOLINT: complexity // We are trying here to get useful code completion candidates without // adjusting the grammar in any way. We use the grammar as downloaded from the // ANTLR grammar directory and set up the c3 engine instead in a way that // still returns useful info. This limits us somewhat. - const auto *source = "class A {\n" - "public:\n" - " void test() {\n" - " }\n" - "};\n"; + const auto* source = + "class A {\n" + "public:\n" + " void test() {\n" + " }\n" + "};\n"; AntlrPipeline pipeline(source); pipeline.parser.translationunit(); EXPECT_EQ(pipeline.listener.GetErrorCount(), 0); @@ -32,10 +39,17 @@ TEST(CPP14Parser, SimpleExample) { // Ignore operators and the generic ID token. completion.ignoredTokens = { - CPP14Lexer::Identifier, CPP14Lexer::LeftParen, CPP14Lexer::RightParen, - CPP14Lexer::Operator, CPP14Lexer::Star, CPP14Lexer::And, - CPP14Lexer::AndAnd, CPP14Lexer::LeftBracket, CPP14Lexer::Ellipsis, - CPP14Lexer::Doublecolon, CPP14Lexer::Semi, + CPP14Lexer::Identifier, + CPP14Lexer::LeftParen, + CPP14Lexer::RightParen, + CPP14Lexer::Operator, + CPP14Lexer::Star, + CPP14Lexer::And, + CPP14Lexer::AndAnd, + CPP14Lexer::LeftBracket, + CPP14Lexer::Ellipsis, + CPP14Lexer::Doublecolon, + CPP14Lexer::Semi, }; // For a C++ grammar you can of course get many candidates of all kind. For @@ -55,57 +69,100 @@ TEST(CPP14Parser, SimpleExample) { EXPECT_THAT( Keys(candidates.tokens), UnorderedElementsAre( - CPP14Lexer::Extern, CPP14Lexer::Mutable, CPP14Lexer::Register, - CPP14Lexer::Static, CPP14Lexer::Thread_local, CPP14Lexer::Decltype, - CPP14Lexer::Char, CPP14Lexer::Char16, CPP14Lexer::Char32, - CPP14Lexer::Wchar, CPP14Lexer::Bool, CPP14Lexer::Short, - CPP14Lexer::Int, CPP14Lexer::Long, CPP14Lexer::Signed, - CPP14Lexer::Unsigned, CPP14Lexer::Float, CPP14Lexer::Double, - CPP14Lexer::Void, CPP14Lexer::Auto, CPP14Lexer::Class, - CPP14Lexer::Struct, CPP14Lexer::Union, CPP14Lexer::Enum, - CPP14Lexer::Typename, CPP14Lexer::Const, CPP14Lexer::Volatile, - CPP14Lexer::Explicit, CPP14Lexer::Inline, CPP14Lexer::Virtual, - CPP14Lexer::Friend, CPP14Lexer::Typedef, CPP14Lexer::Constexpr, - CPP14Lexer::Alignas, CPP14Lexer::Asm, CPP14Lexer::Namespace, - CPP14Lexer::Using, CPP14Lexer::Static_assert, CPP14Lexer::Template, - CPP14Lexer::EOF)); - - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(CPP14Parser::RuleClassname, - CPP14Parser::RuleNamespacename, - CPP14Parser::RuleIdexpression)); + CPP14Lexer::Extern, + CPP14Lexer::Mutable, + CPP14Lexer::Register, + CPP14Lexer::Static, + CPP14Lexer::Thread_local, + CPP14Lexer::Decltype, + CPP14Lexer::Char, + CPP14Lexer::Char16, + CPP14Lexer::Char32, + CPP14Lexer::Wchar, + CPP14Lexer::Bool, + CPP14Lexer::Short, + CPP14Lexer::Int, + CPP14Lexer::Long, + CPP14Lexer::Signed, + CPP14Lexer::Unsigned, + CPP14Lexer::Float, + CPP14Lexer::Double, + CPP14Lexer::Void, + CPP14Lexer::Auto, + CPP14Lexer::Class, + CPP14Lexer::Struct, + CPP14Lexer::Union, + CPP14Lexer::Enum, + CPP14Lexer::Typename, + CPP14Lexer::Const, + CPP14Lexer::Volatile, + CPP14Lexer::Explicit, + CPP14Lexer::Inline, + CPP14Lexer::Virtual, + CPP14Lexer::Friend, + CPP14Lexer::Typedef, + CPP14Lexer::Constexpr, + CPP14Lexer::Alignas, + CPP14Lexer::Asm, + CPP14Lexer::Namespace, + CPP14Lexer::Using, + CPP14Lexer::Static_assert, + CPP14Lexer::Template, + CPP14Lexer::EOF + ) + ); + + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre( + CPP14Parser::RuleClassname, + CPP14Parser::RuleNamespacename, + CPP14Parser::RuleIdexpression + ) + ); EXPECT_THAT( candidates.rules[CPP14Parser::RuleNamespacename].ruleList, ElementsAre( - CPP14Parser::RuleTranslationunit, CPP14Parser::RuleDeclarationseq, - CPP14Parser::RuleDeclaration, CPP14Parser::RuleFunctiondefinition, - CPP14Parser::RuleDeclarator, CPP14Parser::RulePtrdeclarator, + CPP14Parser::RuleTranslationunit, + CPP14Parser::RuleDeclarationseq, + CPP14Parser::RuleDeclaration, + CPP14Parser::RuleFunctiondefinition, + CPP14Parser::RuleDeclarator, + CPP14Parser::RulePtrdeclarator, CPP14Parser::RulePtroperator, - CPP14Parser::RuleNestednamespecifier)); + CPP14Parser::RuleNestednamespecifier + ) + ); EXPECT_THAT( candidates.rules[CPP14Parser::RuleClassname].ruleList, ElementsAre( - CPP14Parser::RuleTranslationunit, CPP14Parser::RuleDeclarationseq, - CPP14Parser::RuleDeclaration, CPP14Parser::RuleFunctiondefinition, - CPP14Parser::RuleDeclarator, CPP14Parser::RulePtrdeclarator, - CPP14Parser::RulePtroperator, CPP14Parser::RuleNestednamespecifier, - CPP14Parser::RuleTypename)); + CPP14Parser::RuleTranslationunit, + CPP14Parser::RuleDeclarationseq, + CPP14Parser::RuleDeclaration, + CPP14Parser::RuleFunctiondefinition, + CPP14Parser::RuleDeclarator, + CPP14Parser::RulePtrdeclarator, + CPP14Parser::RulePtroperator, + CPP14Parser::RuleNestednamespecifier, + CPP14Parser::RuleTypename + ) + ); } for (auto translateRulesTopDown : {false, true}) { // 2) Within the method body. // Note when counting token indexes: the C++14 grammar skips all // whitespaces, hence there are no tokens for them. completion.translateRulesTopDown = translateRulesTopDown; - auto candidates = completion.collectCandidates(10); + auto candidates = completion.collectCandidates(10); // NOLINT: magic const std::vector idexpressionStack = { CPP14Parser::RuleTranslationunit, CPP14Parser::RuleDeclarationseq, CPP14Parser::RuleDeclaration, - CPP14Parser::RuleBlockdeclaration, // TS: +- `RuleFunctiondefinition` - CPP14Parser::RuleSimpledeclaration, // TS: -- + CPP14Parser::RuleBlockdeclaration, // TS: +- `RuleFunctiondefinition` + CPP14Parser::RuleSimpledeclaration, // TS: -- CPP14Parser::RuleDeclspecifierseq, CPP14Parser::RuleDeclspecifier, CPP14Parser::RuleTypespecifier, @@ -139,62 +196,69 @@ TEST(CPP14Parser, SimpleExample) { CPP14Parser::RulePrimaryexpression, }; - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(CPP14Parser::RuleClassname, - CPP14Parser::RuleNamespacename, - CPP14Parser::RuleIdexpression)); - - EXPECT_THAT(candidates.rules[CPP14Parser::RuleIdexpression].ruleList, - ElementsAreArray(idexpressionStack)); - - EXPECT_THAT(candidates.rules[CPP14Parser::RuleClassname].ruleList, - ElementsAreArray([&] { - auto stack = idexpressionStack; - stack.pop_back(); - for (auto rule : { - CPP14Parser::RuleSimpletypespecifier, - CPP14Parser::RuleNestednamespecifier, - CPP14Parser::RuleTypename, - }) { - stack.emplace_back(rule); - } - return stack; - }())); - - EXPECT_THAT(candidates.rules[CPP14Parser::RuleNamespacename].ruleList, - ElementsAreArray([&] { - auto stack = idexpressionStack; - stack.pop_back(); - for (auto rule : { - CPP14Parser::RuleSimpletypespecifier, - CPP14Parser::RuleNestednamespecifier, - }) { - stack.emplace_back(rule); - } - return stack; - }())); + const auto classnameStack = [&] { + auto stack = idexpressionStack; + stack.pop_back(); + stack.emplace_back(CPP14Parser::RuleSimpletypespecifier); + stack.emplace_back(CPP14Parser::RuleNestednamespecifier); + stack.emplace_back(CPP14Parser::RuleTypename); + return stack; + }(); + + const auto namespacenameStack = [&] { + auto stack = idexpressionStack; + stack.pop_back(); + stack.emplace_back(CPP14Parser::RuleSimpletypespecifier); + stack.emplace_back(CPP14Parser::RuleNestednamespecifier); + return stack; + }(); + + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre( + CPP14Parser::RuleClassname, + CPP14Parser::RuleNamespacename, + CPP14Parser::RuleIdexpression + ) + ); + + EXPECT_THAT( + candidates.rules[CPP14Parser::RuleIdexpression].ruleList, + ElementsAreArray(idexpressionStack) + ); + + EXPECT_THAT( + candidates.rules[CPP14Parser::RuleClassname].ruleList, ElementsAreArray(classnameStack) + ); + + EXPECT_THAT( + candidates.rules[CPP14Parser::RuleNamespacename].ruleList, + ElementsAreArray(namespacenameStack) + ); } { // 2) Within the method body. // Note when counting token indexes: the C++14 grammar skips all // whitespaces, hence there are no tokens for them. completion.translateRulesTopDown = true; - auto candidates = completion.collectCandidates(10); + auto candidates = completion.collectCandidates(10); // NOLINT: magic EXPECT_EQ(candidates.tokens.size(), 82); - EXPECT_THAT(Keys(candidates.tokens), - IsSupersetOf({ - CPP14Lexer::If, - CPP14Lexer::This, - CPP14Lexer::New, - CPP14Lexer::Case, - CPP14Lexer::While, - CPP14Lexer::Throw, - // Fixing issue #12 causes this to be included that was - // previously not returned - CPP14Lexer::Decltype, - })); + EXPECT_THAT( + Keys(candidates.tokens), + IsSupersetOf({ + CPP14Lexer::If, + CPP14Lexer::This, + CPP14Lexer::New, + CPP14Lexer::Case, + CPP14Lexer::While, + CPP14Lexer::Throw, + // Fixing issue #12 causes this to be included that was + // previously not returned + CPP14Lexer::Decltype, + }) + ); EXPECT_FALSE(candidates.tokens.contains(CPP14Lexer::Override)); EXPECT_FALSE(candidates.tokens.contains(CPP14Lexer::Export)); @@ -204,12 +268,13 @@ TEST(CPP14Parser, SimpleExample) { } TEST(CPP14Parser, SimpleCppExampleWithErrorsInInput) { - const auto *source = "class A {\n" - "public:\n" - " void test() {\n" - " if ()" - " }\n" - "};\n"; + const auto* source = + "class A {\n" + "public:\n" + " void test() {\n" + " if ()" + " }\n" + "};\n"; AntlrPipeline pipeline(source); pipeline.parser.translationunit(); EXPECT_EQ(pipeline.listener.GetErrorCount(), 3); @@ -240,15 +305,14 @@ TEST(CPP14Parser, SimpleCppExampleWithErrorsInInput) { { // At the opening parenthesis. - auto candidates = completion.collectCandidates(11); + auto candidates = completion.collectCandidates(11); // NOLINT: magic - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(CPP14Lexer::LeftParen)); + EXPECT_THAT(Keys(candidates.tokens), UnorderedElementsAre(CPP14Lexer::LeftParen)); } { // At the closing parenthesis -> again everything in an expression allowed // (no control flow this time, though). - auto candidates = completion.collectCandidates(12); + auto candidates = completion.collectCandidates(12); // NOLINT: magic EXPECT_EQ(candidates.tokens.size(), 65); @@ -270,14 +334,14 @@ TEST(CPP14Parser, SimpleCppExampleWithErrorsInInput) { } { // After the error position -> no suggestions. - auto candidates = completion.collectCandidates(13); + auto candidates = completion.collectCandidates(13); // NOLINT: magic EXPECT_EQ(candidates.tokens.size(), 0); EXPECT_EQ(candidates.rules.size(), 0); } } -TEST(CPP14Parser, RealCppFile) { +TEST(CPP14Parser, RealCppFile) { // NOLINT: complexity { const auto path = std::filesystem::current_path().string(); EXPECT_TRUE(path.ends_with("ports/cpp/build/test/cpp14")); @@ -286,8 +350,7 @@ TEST(CPP14Parser, RealCppFile) { const auto source = [] { // Assume we are at antlr4-c3/ports/cpp/build std::ifstream file("../../../tests/Parser.cpp"); - std::string content((std::istreambuf_iterator(file)), - std::istreambuf_iterator()); + std::string content((std::istreambuf_iterator(file)), std::istreambuf_iterator()); return content; }(); @@ -299,10 +362,17 @@ TEST(CPP14Parser, RealCppFile) { // Ignore operators and the generic ID token. completion.ignoredTokens = { - CPP14Lexer::Identifier, CPP14Lexer::LeftParen, CPP14Lexer::RightParen, - CPP14Lexer::Operator, CPP14Lexer::Star, CPP14Lexer::And, - CPP14Lexer::AndAnd, CPP14Lexer::LeftBracket, CPP14Lexer::Ellipsis, - CPP14Lexer::Doublecolon, CPP14Lexer::Semi, + CPP14Lexer::Identifier, + CPP14Lexer::LeftParen, + CPP14Lexer::RightParen, + CPP14Lexer::Operator, + CPP14Lexer::Star, + CPP14Lexer::And, + CPP14Lexer::AndAnd, + CPP14Lexer::LeftBracket, + CPP14Lexer::Ellipsis, + CPP14Lexer::Doublecolon, + CPP14Lexer::Semi, }; completion.preferredRules = { @@ -311,7 +381,7 @@ TEST(CPP14Parser, RealCppFile) { CPP14Parser::RuleIdexpression, }; - std::vector idexpressionStack = { + const std::vector idexpressionStack = { CPP14Parser::RuleTranslationunit, CPP14Parser::RuleDeclarationseq, CPP14Parser::RuleDeclaration, @@ -331,50 +401,63 @@ TEST(CPP14Parser, RealCppFile) { CPP14Parser::RuleDeclaratorid, }; - std::vector classnameStack = - Concat(idexpressionStack, { - CPP14Parser::RuleIdexpression, - CPP14Parser::RuleQualifiedid, - CPP14Parser::RuleNestednamespecifier, - CPP14Parser::RuleTypename, - }); - - std::vector namespacenameStack = - Concat(idexpressionStack, { - CPP14Parser::RuleIdexpression, - CPP14Parser::RuleQualifiedid, - CPP14Parser::RuleNestednamespecifier, - }); + const std::vector classnameStack = Concat( + idexpressionStack, + { + CPP14Parser::RuleIdexpression, + CPP14Parser::RuleQualifiedid, + CPP14Parser::RuleNestednamespecifier, + CPP14Parser::RuleTypename, + } + ); + + const std::vector namespacenameStack = Concat( + idexpressionStack, + { + CPP14Parser::RuleIdexpression, + CPP14Parser::RuleQualifiedid, + CPP14Parser::RuleNestednamespecifier, + } + ); { - auto candidates = completion.collectCandidates(3469); + auto candidates = completion.collectCandidates(3469); // NOLINT: magic - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(CPP14Parser::RuleClassname, - CPP14Parser::RuleNamespacename, - CPP14Parser::RuleIdexpression)); + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre( + CPP14Parser::RuleClassname, + CPP14Parser::RuleNamespacename, + CPP14Parser::RuleIdexpression + ) + ); - EXPECT_THAT(candidates.rules[CPP14Parser::RuleIdexpression].ruleList, - ElementsAreArray(idexpressionStack)); + EXPECT_THAT( + candidates.rules[CPP14Parser::RuleIdexpression].ruleList, + ElementsAreArray(idexpressionStack) + ); } { // We should receive more specific rules when translating top down. completion.translateRulesTopDown = true; - auto candidates = completion.collectCandidates(3469); + auto candidates = completion.collectCandidates(3469); // NOLINT: magic - EXPECT_THAT(candidates.rules[CPP14Parser::RuleClassname].ruleList, - ElementsAreArray(classnameStack)); + EXPECT_THAT( + candidates.rules[CPP14Parser::RuleClassname].ruleList, ElementsAreArray(classnameStack) + ); - EXPECT_THAT(candidates.rules[CPP14Parser::RuleNamespacename].ruleList, - ElementsAreArray(namespacenameStack)); + EXPECT_THAT( + candidates.rules[CPP14Parser::RuleNamespacename].ruleList, + ElementsAreArray(namespacenameStack) + ); // We are starting a primary expression in a function body, so everything // related to expressions and control flow is allowed here. We only check // for a few possible keywords. - EXPECT_EQ(candidates.tokens.size(), 40); // TS: 82 + EXPECT_EQ(candidates.tokens.size(), 40); // TS: 82 - { // TS: at each statement in this block must be EXPECT_TRUE + { // TS: at each statement in this block must be EXPECT_TRUE EXPECT_FALSE(candidates.tokens.contains(CPP14Lexer::If)); EXPECT_FALSE(candidates.tokens.contains(CPP14Lexer::This)); EXPECT_FALSE(candidates.tokens.contains(CPP14Lexer::New)); @@ -394,4 +477,4 @@ TEST(CPP14Parser, RealCppFile) { } } -} // namespace c3::test +} // namespace c3::test diff --git a/ports/cpp/test/expr/ExprTest.cpp b/ports/cpp/test/expr/ExprTest.cpp index 01edd87..c3ff7ec 100644 --- a/ports/cpp/test/expr/ExprTest.cpp +++ b/ports/cpp/test/expr/ExprTest.cpp @@ -1,8 +1,11 @@ -#include - #include #include +#include +#include +#include +#include +#include #include namespace c3::test { @@ -23,12 +26,10 @@ TEST(SimpleExpressionParser, MostSimpleSetup) { // 1) At the input start. auto candidates = completion.collectCandidates(0); EXPECT_THAT( - Keys(candidates.tokens), - UnorderedElementsAre(ExprLexer::VAR, ExprLexer::LET, ExprLexer::ID)); - EXPECT_THAT(candidates.tokens[ExprLexer::VAR], - ElementsAre(ExprLexer::ID, ExprLexer::EQUAL)); - EXPECT_THAT(candidates.tokens[ExprLexer::LET], - ElementsAre(ExprLexer::ID, ExprLexer::EQUAL)); + Keys(candidates.tokens), UnorderedElementsAre(ExprLexer::VAR, ExprLexer::LET, ExprLexer::ID) + ); + EXPECT_THAT(candidates.tokens[ExprLexer::VAR], ElementsAre(ExprLexer::ID, ExprLexer::EQUAL)); + EXPECT_THAT(candidates.tokens[ExprLexer::LET], ElementsAre(ExprLexer::ID, ExprLexer::EQUAL)); EXPECT_THAT(candidates.tokens[ExprLexer::ID], ElementsAre()); } { @@ -46,24 +47,29 @@ TEST(SimpleExpressionParser, MostSimpleSetup) { { // 4) On the equal sign (ignoring whitespace positions from now on). auto candidates = completion.collectCandidates(4); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(ExprLexer::EQUAL)); + EXPECT_THAT(Keys(candidates.tokens), UnorderedElementsAre(ExprLexer::EQUAL)); } { // 5) On the variable reference 'a'. But since we have not configure the c3 // engine to return us var refs (or function refs for that matter) we only // get an ID here. - auto candidates = completion.collectCandidates(6); + auto candidates = completion.collectCandidates(6); // NOLINT: magic EXPECT_THAT(Keys(candidates.tokens), UnorderedElementsAre(ExprLexer::ID)); } { // 6) On the '+' operator. Usually you would not show operators as // candidates, but we have not set up the c3 engine yet to not return them. - auto candidates = completion.collectCandidates(8); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(ExprLexer::PLUS, ExprLexer::MINUS, - ExprLexer::MULTIPLY, ExprLexer::DIVIDE, - ExprLexer::OPEN_PAR)); + auto candidates = completion.collectCandidates(8); // NOLINT: magic + EXPECT_THAT( + Keys(candidates.tokens), + UnorderedElementsAre( + ExprLexer::PLUS, + ExprLexer::MINUS, + ExprLexer::MULTIPLY, + ExprLexer::DIVIDE, + ExprLexer::OPEN_PAR + ) + ); } } @@ -74,8 +80,12 @@ TEST(SimpleExpressionParser, TypicalSetup) { c3::CodeCompletionCore completion(&pipeline.parser); completion.ignoredTokens = { - ExprLexer::ID, ExprLexer::PLUS, ExprLexer::MINUS, - ExprLexer::MULTIPLY, ExprLexer::DIVIDE, ExprLexer::EQUAL, + ExprLexer::ID, + ExprLexer::PLUS, + ExprLexer::MINUS, + ExprLexer::MULTIPLY, + ExprLexer::DIVIDE, + ExprLexer::EQUAL, }; completion.preferredRules = { ExprParser::RuleFunctionRef, @@ -85,8 +95,7 @@ TEST(SimpleExpressionParser, TypicalSetup) { { // 1) At the input start. auto candidates = completion.collectCandidates(0); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(ExprLexer::VAR, ExprLexer::LET)); + EXPECT_THAT(Keys(candidates.tokens), UnorderedElementsAre(ExprLexer::VAR, ExprLexer::LET)); // NOTE: Behaviour differs from TypeScript version EXPECT_THAT(candidates.tokens[ExprLexer::VAR], UnorderedElementsAre()); @@ -104,24 +113,24 @@ TEST(SimpleExpressionParser, TypicalSetup) { } { // 5) On the variable reference 'a'. - auto candidates = completion.collectCandidates(6); + auto candidates = completion.collectCandidates(6); // NOLINT: magic EXPECT_EQ(candidates.tokens.size(), 0); // Here we get 2 rule indexes, derived from 2 different IDs possible at this // caret position. These are what we told the engine above to be preferred // rules for us. - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleFunctionRef, - ExprParser::RuleVariableRef)); + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre(ExprParser::RuleFunctionRef, ExprParser::RuleVariableRef) + ); EXPECT_EQ(candidates.rules[ExprParser::RuleFunctionRef].startTokenIndex, 6); EXPECT_EQ(candidates.rules[ExprParser::RuleVariableRef].startTokenIndex, 6); } { // 6) On the whitespace just after the variable reference 'a' (but it could // still be a function reference!). - auto candidates = completion.collectCandidates(7); + auto candidates = completion.collectCandidates(7); // NOLINT: magic EXPECT_EQ(candidates.tokens.size(), 0); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleFunctionRef)); + EXPECT_THAT(Keys(candidates.rules), UnorderedElementsAre(ExprParser::RuleFunctionRef)); EXPECT_EQ(candidates.rules[ExprParser::RuleFunctionRef].startTokenIndex, 6); } } @@ -136,36 +145,30 @@ TEST(SimpleExpressionParser, RecursivePreferredRule) { { // 1) On the variable reference 'a'. - auto candidates = completion.collectCandidates(6); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleSimpleExpression)); + auto candidates = completion.collectCandidates(6); // NOLINT: magic + EXPECT_THAT(Keys(candidates.rules), UnorderedElementsAre(ExprParser::RuleSimpleExpression)); // The start token of the simpleExpression rule begins at token 'a'. - EXPECT_EQ( - candidates.rules[ExprParser::RuleSimpleExpression].startTokenIndex, 6); + EXPECT_EQ(candidates.rules[ExprParser::RuleSimpleExpression].startTokenIndex, 6); } { // 2) On the variable reference 'b'. completion.translateRulesTopDown = false; - auto candidates = completion.collectCandidates(10); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleSimpleExpression)); + auto candidates = completion.collectCandidates(10); // NOLINT: magic + EXPECT_THAT(Keys(candidates.rules), UnorderedElementsAre(ExprParser::RuleSimpleExpression)); // When translateRulesTopDown is false, startTokenIndex should match the // start token for the lower index (less specific) rule in the expression, // which is 'a'. - EXPECT_EQ( - candidates.rules[ExprParser::RuleSimpleExpression].startTokenIndex, 6); + EXPECT_EQ(candidates.rules[ExprParser::RuleSimpleExpression].startTokenIndex, 6); } { // 3) On the variable reference 'b' topDown preferred rules. completion.translateRulesTopDown = true; - auto candidates = completion.collectCandidates(10); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleSimpleExpression)); + auto candidates = completion.collectCandidates(10); // NOLINT: magic + EXPECT_THAT(Keys(candidates.rules), UnorderedElementsAre(ExprParser::RuleSimpleExpression)); // When translateRulesTopDown is true, startTokenIndex should match the // start token for the higher index (more specific) rule in the expression, // which is 'b'. - EXPECT_EQ( - candidates.rules[ExprParser::RuleSimpleExpression].startTokenIndex, 10); + EXPECT_EQ(candidates.rules[ExprParser::RuleSimpleExpression].startTokenIndex, 10); } } @@ -184,9 +187,10 @@ TEST(SimpleExpressionParser, CandidateRulesWithDifferentStartTokens) { { // 1) On the token 'var'. auto candidates = completion.collectCandidates(0); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleAssignment, - ExprParser::RuleVariableRef)); + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre(ExprParser::RuleAssignment, ExprParser::RuleVariableRef) + ); // The start token of the assignment and variableRef rules begin at token // 'var'. EXPECT_EQ(candidates.rules[ExprParser::RuleAssignment].startTokenIndex, 0); @@ -194,10 +198,11 @@ TEST(SimpleExpressionParser, CandidateRulesWithDifferentStartTokens) { } { // 2) On the variable reference 'a'. - auto candidates = completion.collectCandidates(6); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleAssignment, - ExprParser::RuleVariableRef)); + auto candidates = completion.collectCandidates(6); // NOLINT: magic + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre(ExprParser::RuleAssignment, ExprParser::RuleVariableRef) + ); // The start token of the assignment rule begins at token 'var'. EXPECT_EQ(candidates.rules[ExprParser::RuleAssignment].startTokenIndex, 0); // The start token of the variableRef rule begins at token 'a'. @@ -205,4 +210,4 @@ TEST(SimpleExpressionParser, CandidateRulesWithDifferentStartTokens) { } } -} // namespace c3::test +} // namespace c3::test diff --git a/ports/cpp/test/utility/AntlrPipeline.hpp b/ports/cpp/test/utility/AntlrPipeline.hpp index 2e423d5..aa7d205 100644 --- a/ports/cpp/test/utility/AntlrPipeline.hpp +++ b/ports/cpp/test/utility/AntlrPipeline.hpp @@ -1,39 +1,52 @@ #pragma once -#include +#include +#include +#include +#include +#include #include +#include +#include #include namespace c3::test { class CountingErrorListener final : public antlr4::BaseErrorListener { public: - void syntaxError(antlr4::Recognizer *recognizer, - antlr4::Token *offendingSymbol, std::size_t line, - std::size_t charPositionInLine, const std::string &msg, - std::exception_ptr e) override { + void syntaxError( + antlr4::Recognizer* /*recognizer*/, + antlr4::Token* /*offendingSymbol*/, + std::size_t /*line*/, + std::size_t /*charPositionInLine*/, + const std::string& /*msg*/, + std::exception_ptr /*e*/ + ) override { errorCount += 1; } - std::size_t GetErrorCount() const { return errorCount; } + [[nodiscard]] std::size_t GetErrorCount() const { + return errorCount; + } private: std::size_t errorCount = 0; }; -template struct AntlrPipeline { - AntlrPipeline(std::string_view text) +template +struct AntlrPipeline { + explicit AntlrPipeline(std::string_view text) : chars(text), lexer(&chars), tokens(&lexer), parser(&tokens) { parser.removeErrorListeners(); parser.addErrorListener(&listener); } - antlr4::ANTLRInputStream chars; - Grammar::Lexer lexer; - antlr4::CommonTokenStream tokens; - Grammar::Parser parser; - CountingErrorListener listener; + antlr4::ANTLRInputStream chars; // NOLINT: public + Grammar::Lexer lexer; // NOLINT: public + antlr4::CommonTokenStream tokens; // NOLINT: public + Grammar::Parser parser; // NOLINT: public + CountingErrorListener listener; // NOLINT: public }; -} // namespace c3::test +} // namespace c3::test diff --git a/ports/cpp/test/utility/Collections.hpp b/ports/cpp/test/utility/Collections.hpp index 64cfdbd..df88cd3 100644 --- a/ports/cpp/test/utility/Collections.hpp +++ b/ports/cpp/test/utility/Collections.hpp @@ -5,20 +5,21 @@ namespace c3::test { -template std::vector Keys(const std::map &map) { +template +std::vector Keys(const std::map& map) { std::vector keys; - for (const auto &[key, value] : map) { + for (const auto& [key, value] : map) { keys.emplace_back(key); } return keys; } template -std::vector Concat(std::vector lhs, const std::vector &rhs) { - for (const auto &element : rhs) { +std::vector Concat(std::vector lhs, const std::vector& rhs) { + for (const auto& element : rhs) { lhs.emplace_back(element); } return lhs; } -} // namespace c3::test +} // namespace c3::test diff --git a/ports/cpp/test/utility/Testing.hpp b/ports/cpp/test/utility/Testing.hpp index 81b1b20..bef9d1b 100644 --- a/ports/cpp/test/utility/Testing.hpp +++ b/ports/cpp/test/utility/Testing.hpp @@ -1,10 +1,6 @@ #pragma once #include -#include - -#include "AntlrPipeline.hpp" -#include "Collections.hpp" namespace c3::test { @@ -14,4 +10,4 @@ using testing::IsSupersetOf; using testing::UnorderedElementsAre; using testing::UnorderedElementsAreArray; -} // namespace c3::test +} // namespace c3::test diff --git a/ports/cpp/test/whitebox/WhiteboxTest.cpp b/ports/cpp/test/whitebox/WhiteboxTest.cpp index e43f6a8..42c7d42 100644 --- a/ports/cpp/test/whitebox/WhiteboxTest.cpp +++ b/ports/cpp/test/whitebox/WhiteboxTest.cpp @@ -1,8 +1,13 @@ -#include - +#include #include #include +#include +#include +#include +#include +#include +#include #include namespace c3::test { @@ -15,75 +20,82 @@ struct WhiteboxGrammar { /// (optional tokens) TEST(WhiteboxGrammarTests, CaretAtTransitionToRuleWithNonExhaustiveFollowSet) { AntlrPipeline pipeline("LOREM "); - auto *ctx = pipeline.parser.test1(); + auto* ctx = pipeline.parser.test1(); EXPECT_EQ(pipeline.listener.GetErrorCount(), 1); c3::CodeCompletionCore completion(&pipeline.parser); auto candidates = completion.collectCandidates(1, ctx); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(WhiteboxLexer::IPSUM, WhiteboxLexer::DOLOR, - WhiteboxLexer::SIT, WhiteboxLexer::AMET, - WhiteboxLexer::CONSECTETUR)); + EXPECT_THAT( + Keys(candidates.tokens), + UnorderedElementsAre( + WhiteboxLexer::IPSUM, + WhiteboxLexer::DOLOR, + WhiteboxLexer::SIT, + WhiteboxLexer::AMET, + WhiteboxLexer::CONSECTETUR + ) + ); } /// (epsilon-only transition to rule end) TEST(WhiteboxGrammarTests, CaretAtTransitionToRuleWithEmptyFollowSet) { AntlrPipeline pipeline("LOREM "); - auto *ctx = pipeline.parser.test2(); + auto* ctx = pipeline.parser.test2(); EXPECT_EQ(pipeline.listener.GetErrorCount(), 1); c3::CodeCompletionCore completion(&pipeline.parser); auto candidates = completion.collectCandidates(1, ctx); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(WhiteboxLexer::IPSUM, WhiteboxLexer::DOLOR, - WhiteboxLexer::SIT, WhiteboxLexer::AMET, - WhiteboxLexer::CONSECTETUR)); + EXPECT_THAT( + Keys(candidates.tokens), + UnorderedElementsAre( + WhiteboxLexer::IPSUM, + WhiteboxLexer::DOLOR, + WhiteboxLexer::SIT, + WhiteboxLexer::AMET, + WhiteboxLexer::CONSECTETUR + ) + ); } TEST(WhiteboxGrammarTests, CaretAtOneOfMultiplePossibleStates) { for (const auto index : {4, 5, 6, 7}) { AntlrPipeline pipeline("LOREM IPSUM "); - auto *ctx = [&]() -> antlr4::ParserRuleContext * { + auto* ctx = [&]() -> antlr4::ParserRuleContext* { switch (index) { - case 4: - return pipeline.parser.test4(); - case 5: - return pipeline.parser.test5(); - case 6: - return pipeline.parser.test6(); - case 7: - return pipeline.parser.test7(); - default: - std::abort(); + case 4: + return pipeline.parser.test4(); + case 5: // NOLINT: magic + return pipeline.parser.test5(); + case 6: // NOLINT: magic + return pipeline.parser.test6(); + case 7: // NOLINT: magic + return pipeline.parser.test7(); + default: + std::abort(); } }(); c3::CodeCompletionCore completion(&pipeline.parser); auto candidates = completion.collectCandidates(2, ctx); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(WhiteboxLexer::DOLOR)); - EXPECT_THAT(candidates.tokens[WhiteboxLexer::DOLOR], - UnorderedElementsAre()); + EXPECT_THAT(Keys(candidates.tokens), UnorderedElementsAre(WhiteboxLexer::DOLOR)); + EXPECT_THAT(candidates.tokens[WhiteboxLexer::DOLOR], UnorderedElementsAre()); } } -TEST(WhiteboxGrammarTests, - CaretAtOneOfMultiplePossibleStatesWithCommonFollowList) { +TEST(WhiteboxGrammarTests, CaretAtOneOfMultiplePossibleStatesWithCommonFollowList) { AntlrPipeline pipeline("LOREM IPSUM "); - auto *ctx = pipeline.parser.test8(); + auto* ctx = pipeline.parser.test8(); c3::CodeCompletionCore completion(&pipeline.parser); auto candidates = completion.collectCandidates(2, ctx); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(WhiteboxLexer::DOLOR)); - EXPECT_THAT(candidates.tokens[WhiteboxLexer::DOLOR], - UnorderedElementsAre(WhiteboxLexer::SIT)); + EXPECT_THAT(Keys(candidates.tokens), UnorderedElementsAre(WhiteboxLexer::DOLOR)); + EXPECT_THAT(candidates.tokens[WhiteboxLexer::DOLOR], UnorderedElementsAre(WhiteboxLexer::SIT)); } -} // namespace c3::test +} // namespace c3::test