From f54cbc4e51c884bb8cdca2575da0be9f2ec6323e Mon Sep 17 00:00:00 2001 From: vityaman Date: Sun, 28 Jul 2024 15:31:50 +0300 Subject: [PATCH] #132 Format using Google style Signed-off-by: vityaman --- .github/workflows/cmake.yml | 8 + ports/cpp/.clang-format | 13 + .../source/antlr4-c3/CodeCompletionCore.cpp | 1499 +++++++++-------- .../source/antlr4-c3/CodeCompletionCore.hpp | 343 ++-- ports/cpp/test/cpp14/Cpp14Test.cpp | 238 +-- ports/cpp/test/expr/ExprTest.cpp | 102 +- ports/cpp/test/utility/AntlrPipeline.hpp | 18 +- ports/cpp/test/utility/Collections.hpp | 11 +- ports/cpp/test/utility/Testing.hpp | 2 +- ports/cpp/test/whitebox/WhiteboxTest.cpp | 81 +- 10 files changed, 1246 insertions(+), 1069 deletions(-) create mode 100644 ports/cpp/.clang-format diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 28458b2..d62f3f2 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -29,6 +29,14 @@ jobs: with: create-symlink: true key: ${{ github.job }}-${{ matrix.os }} + - name: Clang-Format + working-directory: ${{github.workspace}}/ports/cpp + run: | + ( \ + find source test -iname '*.hpp' -o -iname '*.cpp' \ + | xargs clang-format -Werror --dry-run \ + --fallback-style=Google --verbose \ + ) - name: Configure working-directory: ${{github.workspace}}/ports/cpp run: | diff --git a/ports/cpp/.clang-format b/ports/cpp/.clang-format new file mode 100644 index 0000000..375a5a1 --- /dev/null +++ b/ports/cpp/.clang-format @@ -0,0 +1,13 @@ +--- +Language: Cpp +BasedOnStyle: Google + +AccessModifierOffset: -2 + +AllowShortFunctionsOnASingleLine: None +AllowShortIfStatementsOnASingleLine: false +AllowShortLoopsOnASingleLine: false + +DerivePointerAlignment: false +BreakConstructorInitializers: BeforeComma +AlignAfterOpenBracket: BlockIndent diff --git a/ports/cpp/source/antlr4-c3/CodeCompletionCore.cpp b/ports/cpp/source/antlr4-c3/CodeCompletionCore.cpp index 98599b6..3da04f0 100644 --- a/ports/cpp/source/antlr4-c3/CodeCompletionCore.cpp +++ b/ports/cpp/source/antlr4-c3/CodeCompletionCore.cpp @@ -1,317 +1,308 @@ // // CodeCompletionCore.cpp // -// C++ port of antlr4-c3 (TypeScript) by Mike Lischke +// C++ port of antlr4-c3 (TypeScript) by Mike Lischke // Licensed under the MIT License. -// +// #include "CodeCompletionCore.hpp" using namespace antlr4; using namespace std; - namespace c3 { // ---------------------------------------------------------------------------- // MARK: - Utilities // ---------------------------------------------------------------------------- -static std::vector longestCommonPrefix(std::vector a, std::vector b) -{ - size_t i = 0; - for (; i < std::min(a.size(), b.size()); i++) { - if (a[i] != b[i]) { - break; - } - } - - return std::vector(a.begin(), a.begin() + i); +static std::vector longestCommonPrefix( + std::vector a, std::vector b +) { + size_t i = 0; + for (; i < std::min(a.size(), b.size()); i++) { + if (a[i] != b[i]) { + break; + } + } + + return std::vector(a.begin(), a.begin() + i); } - - - // ---------------------------------------------------------------------------- // MARK: - Static // ---------------------------------------------------------------------------- -std::map c3::CodeCompletionCore::followSetsByATN = {}; +std::map + c3::CodeCompletionCore::followSetsByATN = {}; // Matches ATNStateType enum std::vector c3::CodeCompletionCore::atnStateTypeMap = { - "invalid", - "basic", - "rule start", - "block start", - "plus block start", - "star block start", - "token start", - "rule stop", - "block end", - "star loop back", - "star loop entry", - "plus loop back", - "loop end", + "invalid", "basic", + "rule start", "block start", + "plus block start", "star block start", + "token start", "rule stop", + "block end", "star loop back", + "star loop entry", "plus loop back", + "loop end", }; - - // ---------------------------------------------------------------------------- // MARK: - Construction // ---------------------------------------------------------------------------- -CodeCompletionCore::CodeCompletionCore(antlr4::Parser * parser) -: parser(parser), atn(parser->getATN()), vocabulary(parser->getVocabulary()), ruleNames(parser->getRuleNames()) -{ - +CodeCompletionCore::CodeCompletionCore(antlr4::Parser* parser) + : parser(parser) + , atn(parser->getATN()) + , vocabulary(parser->getVocabulary()) + , ruleNames(parser->getRuleNames()) { } - - - - // ---------------------------------------------------------------------------- // MARK: - Collecting // ---------------------------------------------------------------------------- -CandidatesCollection CodeCompletionCore::collectCandidates(size_t caretTokenIndex, antlr4::ParserRuleContext * context, size_t timeoutMS, std::atomic * cancel) -{ - shortcutMap.clear(); - candidates.rules.clear(); - candidates.tokens.clear(); - candidates.cancelled = false; - statesProcessed = 0; - precedenceStack = {}; - timeoutStart = std::chrono::steady_clock::now(); - this->cancel = cancel; - this->timeoutMS = timeoutMS; - - tokenStartIndex = context ? context->start->getTokenIndex() : 0; - const auto tokenStream = parser->getTokenStream(); - - tokens = {}; - size_t offset = tokenStartIndex; - while (true) { - antlr4::Token * token = tokenStream->get(offset++); - if (token->getChannel() == Token::DEFAULT_CHANNEL) { - tokens.push_back(token); - - if (token->getTokenIndex() >= caretTokenIndex || token->getType() == Token::EOF) { - break; - } - } - - // Do not check for the token index here, as we want to end with the first unhidden token on or after - // the caret. - if (token->getType() == Token::EOF) { - break; - } - } - - RuleWithStartTokenList callStack = {}; - size_t startRule = context ? context->getRuleIndex() : 0; - bool cancelled = false; - - processRule(atn.ruleToStartState[startRule], 0, callStack, 0, 0, cancelled); - candidates.cancelled = cancelled; - - if (showResult) { - if (cancelled) { - std::cout << "*** TIMED OUT ***\n"; - } - - std::cout << "States processed: " << std::to_string(statesProcessed) << "\n\n"; - - std::cout << "Collected rules:\n"; - for (const auto & [tokenIndex, rule] : candidates.rules) { - std::cout << ruleNames[tokenIndex]; - std::cout << ", path: "; - - for (size_t token: rule.ruleList) { - std::cout << ruleNames[token] + " "; - } - } - std::cout << "\n\n"; - - std::set sortedTokens; - for (auto entry: candidates.tokens) { - size_t token = entry.first; - std::vector tokenList = entry.second; - - std::string value = vocabulary.getDisplayName(token); - for (size_t following: tokenList) { - value += " " + vocabulary.getDisplayName(following); - } - - sortedTokens.insert(value); - } - - std::cout << "Collected tokens:\n"; - for (std::string symbol: sortedTokens) { - std::cout << symbol; - } - std::cout << "\n\n"; - } - - return candidates; +CandidatesCollection CodeCompletionCore::collectCandidates( + size_t caretTokenIndex, antlr4::ParserRuleContext* context, + size_t timeoutMS, std::atomic* cancel +) { + shortcutMap.clear(); + candidates.rules.clear(); + candidates.tokens.clear(); + candidates.cancelled = false; + statesProcessed = 0; + precedenceStack = {}; + timeoutStart = std::chrono::steady_clock::now(); + this->cancel = cancel; + this->timeoutMS = timeoutMS; + + tokenStartIndex = context ? context->start->getTokenIndex() : 0; + const auto tokenStream = parser->getTokenStream(); + + tokens = {}; + size_t offset = tokenStartIndex; + while (true) { + antlr4::Token* token = tokenStream->get(offset++); + if (token->getChannel() == Token::DEFAULT_CHANNEL) { + tokens.push_back(token); + + if (token->getTokenIndex() >= caretTokenIndex || + token->getType() == Token::EOF) { + break; + } + } + + // Do not check for the token index here, as we want to end with the first + // unhidden token on or after the caret. + if (token->getType() == Token::EOF) { + break; + } + } + + RuleWithStartTokenList callStack = {}; + size_t startRule = context ? context->getRuleIndex() : 0; + bool cancelled = false; + + processRule(atn.ruleToStartState[startRule], 0, callStack, 0, 0, cancelled); + candidates.cancelled = cancelled; + + if (showResult) { + if (cancelled) { + std::cout << "*** TIMED OUT ***\n"; + } + + std::cout << "States processed: " << std::to_string(statesProcessed) + << "\n\n"; + + std::cout << "Collected rules:\n"; + for (const auto& [tokenIndex, rule] : candidates.rules) { + std::cout << ruleNames[tokenIndex]; + std::cout << ", path: "; + + for (size_t token : rule.ruleList) { + std::cout << ruleNames[token] + " "; + } + } + std::cout << "\n\n"; + + std::set sortedTokens; + for (auto entry : candidates.tokens) { + size_t token = entry.first; + std::vector tokenList = entry.second; + + std::string value = vocabulary.getDisplayName(token); + for (size_t following : tokenList) { + value += " " + vocabulary.getDisplayName(following); + } + + sortedTokens.insert(value); + } + + std::cout << "Collected tokens:\n"; + for (std::string symbol : sortedTokens) { + std::cout << symbol; + } + std::cout << "\n\n"; + } + + return candidates; } - - /** - * Checks if the predicate associated with the given transition evaluates to true. + * Checks if the predicate associated with the given transition evaluates to + * true. * * @param transition The transition to check. * @returns the evaluation result of the predicate. */ -bool CodeCompletionCore::checkPredicate(const antlr4::atn::PredicateTransition * transition) -{ - return transition->getPredicate()->eval(parser, &ParserRuleContext::EMPTY); +bool CodeCompletionCore::checkPredicate( + const antlr4::atn::PredicateTransition* transition +) { + return transition->getPredicate()->eval(parser, &ParserRuleContext::EMPTY); } - - /** - * Walks the rule chain upwards or downwards (depending on translateRulesTopDown) to see if that matches any of the - * preferred rules. If found, that rule is added to the collection candidates and true is returned. + * Walks the rule chain upwards or downwards (depending on + * translateRulesTopDown) to see if that matches any of the preferred rules. If + * found, that rule is added to the collection candidates and true is returned. * * @param ruleWithStartTokenList The list to convert. * @returns true if any of the stack entries was converted. */ -bool CodeCompletionCore::translateStackToRuleIndex(RuleWithStartTokenList const& ruleWithStartTokenList) -{ - if (preferredRules.size() == 0) { - return false; - } - - // Change the direction we iterate over the rule stack - if (translateRulesTopDown) { - // Loop over the rule stack from lowest to highest rule level. This will prioritize a lower preferred rule - // if it is a child of a higher one that is also a preferred rule. - for (int64_t i = ruleWithStartTokenList.size() - 1; i >= 0; i--) { - if (translateToRuleIndex(i, ruleWithStartTokenList)) { - return true; - } - } - } else { - // Loop over the rule stack from highest to lowest rule level. This will prioritize a higher preferred rule - // if it contains a lower one that is also a preferred rule. - for (size_t i = 0; i < ruleWithStartTokenList.size(); i++) { - if (translateToRuleIndex(i, ruleWithStartTokenList)) { - return true; - } - } - } - - return false; +bool CodeCompletionCore::translateStackToRuleIndex( + RuleWithStartTokenList const& ruleWithStartTokenList +) { + if (preferredRules.size() == 0) { + return false; + } + + // Change the direction we iterate over the rule stack + if (translateRulesTopDown) { + // Loop over the rule stack from lowest to highest rule level. This will + // prioritize a lower preferred rule if it is a child of a higher one that + // is also a preferred rule. + for (int64_t i = ruleWithStartTokenList.size() - 1; i >= 0; i--) { + if (translateToRuleIndex(i, ruleWithStartTokenList)) { + return true; + } + } + } else { + // Loop over the rule stack from highest to lowest rule level. This will + // prioritize a higher preferred rule if it contains a lower one that is + // also a preferred rule. + for (size_t i = 0; i < ruleWithStartTokenList.size(); i++) { + if (translateToRuleIndex(i, ruleWithStartTokenList)) { + return true; + } + } + } + + return false; } - - /** - * Given the index of a rule from a rule chain, check if that matches any of the preferred rules. If it matches, - * that rule is added to the collection candidates and true is returned. + * Given the index of a rule from a rule chain, check if that matches any of the + * preferred rules. If it matches, that rule is added to the collection + * candidates and true is returned. * * @param i The rule index. * @param ruleWithStartTokenList The list to check. * @returns true if the specified rule is in the list of preferred rules. */ -bool CodeCompletionCore::translateToRuleIndex(size_t i, RuleWithStartTokenList const& ruleWithStartTokenList) -{ - RuleWithStartToken rwst = ruleWithStartTokenList[i]; - - if (preferredRules.contains(rwst.ruleIndex)) { - - // Add the rule to our candidates list along with the current rule path, - // but only if there isn't already an entry like that. - std::vector path; - { - for (size_t subrangeIndex = 0; subrangeIndex < i; subrangeIndex++) { - path.push_back(ruleWithStartTokenList[subrangeIndex].ruleIndex); - } - } - - - bool addNew = true; - - for (auto const& [cRuleEntryRuleIndex, cRuleEntryCandidateRule]: candidates.rules) { - if (cRuleEntryRuleIndex != rwst.ruleIndex || cRuleEntryCandidateRule.ruleList.size() != path.size()) { - continue; - } - - // Found an entry for this rule. Same path? - bool samePath = true; - for (size_t pathI = 0; pathI < path.size(); pathI++) { - if (path[pathI] == cRuleEntryCandidateRule.ruleList[pathI]) { - samePath = false; - break; - } - } - - // If same path, then don't add a new (duplicate) entry. - if (samePath) { - addNew = false; - break; - } - } - - if (addNew) { - candidates.rules[rwst.ruleIndex] = { - .startTokenIndex = rwst.startTokenIndex, - .ruleList = path - }; - if (showDebugOutput) { - std::cout << "=====> collected: " << ruleNames[rwst.ruleIndex] << "\n"; - } - } - - return true; - } - - return false; +bool CodeCompletionCore::translateToRuleIndex( + size_t i, RuleWithStartTokenList const& ruleWithStartTokenList +) { + RuleWithStartToken rwst = ruleWithStartTokenList[i]; + + if (preferredRules.contains(rwst.ruleIndex)) { + // Add the rule to our candidates list along with the current rule path, + // but only if there isn't already an entry like that. + std::vector path; + { + for (size_t subrangeIndex = 0; subrangeIndex < i; subrangeIndex++) { + path.push_back(ruleWithStartTokenList[subrangeIndex].ruleIndex); + } + } + + bool addNew = true; + + for (auto const& [cRuleEntryRuleIndex, cRuleEntryCandidateRule] : + candidates.rules) { + if (cRuleEntryRuleIndex != rwst.ruleIndex || + cRuleEntryCandidateRule.ruleList.size() != path.size()) { + continue; + } + + // Found an entry for this rule. Same path? + bool samePath = true; + for (size_t pathI = 0; pathI < path.size(); pathI++) { + if (path[pathI] == cRuleEntryCandidateRule.ruleList[pathI]) { + samePath = false; + break; + } + } + + // If same path, then don't add a new (duplicate) entry. + if (samePath) { + addNew = false; + break; + } + } + + if (addNew) { + candidates.rules[rwst.ruleIndex] = { + .startTokenIndex = rwst.startTokenIndex, .ruleList = path + }; + if (showDebugOutput) { + std::cout << "=====> collected: " << ruleNames[rwst.ruleIndex] << "\n"; + } + } + + return true; + } + + return false; } - - /** - * This method follows the given transition and collects all symbols within the same rule that directly follow it - * without intermediate transitions to other rules and only if there is a single symbol for a transition. + * This method follows the given transition and collects all symbols within the + * same rule that directly follow it without intermediate transitions to other + * rules and only if there is a single symbol for a transition. * * @param transition The transition from which to start. * @returns A list of toke types. */ -std::vector CodeCompletionCore::getFollowingTokens(const antlr4::atn::Transition * transition) -{ - std::vector result = {}; - - std::vector pipeline = { transition->target }; - - while (pipeline.size() > 0) { - antlr4::atn::ATNState * state = pipeline.back(); - pipeline.pop_back(); - - if (state) { - for (antlr4::atn::ConstTransitionPtr& outgoing: state->transitions) { - if (outgoing->getTransitionType() == antlr4::atn::TransitionType::ATOM) { - if (!outgoing->isEpsilon()) { - std::vector list = outgoing->label().toList(); - if (list.size() == 1 && !ignoredTokens.contains(list[0])) { - result.push_back(list[0]); - pipeline.push_back(outgoing->target); - } - } else { - pipeline.push_back(outgoing->target); - } - } - } - } - } - - return result; +std::vector CodeCompletionCore::getFollowingTokens( + const antlr4::atn::Transition* transition +) { + std::vector result = {}; + + std::vector pipeline = {transition->target}; + + while (pipeline.size() > 0) { + antlr4::atn::ATNState* state = pipeline.back(); + pipeline.pop_back(); + + if (state) { + for (antlr4::atn::ConstTransitionPtr& outgoing : state->transitions) { + if (outgoing->getTransitionType() == + antlr4::atn::TransitionType::ATOM) { + if (!outgoing->isEpsilon()) { + std::vector list = outgoing->label().toList(); + if (list.size() == 1 && !ignoredTokens.contains(list[0])) { + result.push_back(list[0]); + pipeline.push_back(outgoing->target); + } + } else { + pipeline.push_back(outgoing->target); + } + } + } + } + } + + return result; } - - /** * Entry point for the recursive follow set collection function. * @@ -319,504 +310,578 @@ std::vector CodeCompletionCore::getFollowingTokens(const antlr4::atn::Tr * @param stop Stop state. * @returns Follow sets. */ -FollowSetsHolder CodeCompletionCore::determineFollowSets(antlr4::atn::ATNState * start, antlr4::atn::ATNState * stop) -{ - std::vector sets = {}; - std::vector stateStack = {}; - std::vector ruleStack = {}; - bool isExhaustive = collectFollowSets(start, stop, sets, stateStack, ruleStack); - - // Sets are split by path to allow translating them to preferred rules. But for quick hit tests - // it is also useful to have a set with all symbols combined. - antlr4::misc::IntervalSet combined; - for (auto set: sets) { - combined.addAll(set.intervals); - } - - return { - .sets = sets, - .combined = combined, - .isExhaustive = isExhaustive, - }; +FollowSetsHolder CodeCompletionCore::determineFollowSets( + antlr4::atn::ATNState* start, antlr4::atn::ATNState* stop +) { + std::vector sets = {}; + std::vector stateStack = {}; + std::vector ruleStack = {}; + bool isExhaustive = + collectFollowSets(start, stop, sets, stateStack, ruleStack); + + // Sets are split by path to allow translating them to preferred rules. But + // for quick hit tests it is also useful to have a set with all symbols + // combined. + antlr4::misc::IntervalSet combined; + for (auto set : sets) { + combined.addAll(set.intervals); + } + + return { + .sets = sets, + .combined = combined, + .isExhaustive = isExhaustive, + }; } - - /** - * Collects possible tokens which could be matched following the given ATN state. This is essentially the same - * algorithm as used in the LL1Analyzer class, but here we consider predicates also and use no parser rule context. + * Collects possible tokens which could be matched following the given ATN + * state. This is essentially the same algorithm as used in the LL1Analyzer + * class, but here we consider predicates also and use no parser rule context. * * @param s The state to continue from. * @param stopState The state which ends the collection routine. * @param followSets A pass through parameter to add found sets to. * @param stateStack A stack to avoid endless recursions. * @param ruleStack The current rule stack. - * @returns true if the follow sets is exhaustive, i.e. we terminated before the rule end was reached, so no - * subsequent rules could add tokens + * @returns true if the follow sets is exhaustive, i.e. we terminated before the + * rule end was reached, so no subsequent rules could add tokens */ -bool CodeCompletionCore::collectFollowSets(antlr4::atn::ATNState * s, antlr4::atn::ATNState * stopState, std::vector& followSets, std::vector& stateStack, std::vector& ruleStack) -{ - if (std::find(stateStack.begin(), stateStack.end(), s) != stateStack.end()) { - return true; - } - stateStack.push_back(s); - - if (s == stopState || s->getStateType() == antlr4::atn::ATNStateType::RULE_STOP) { - stateStack.pop_back(); - return false; - } - - bool isExhaustive = true; - for (antlr4::atn::ConstTransitionPtr& tp: s->transitions) { - const antlr4::atn::Transition * transition = tp.get(); - - if (transition->getTransitionType() == antlr4::atn::TransitionType::RULE) { - const antlr4::atn::RuleTransition * ruleTransition = static_cast(transition); - - if (std::find(ruleStack.begin(), ruleStack.end(), ruleTransition->target->ruleIndex) != ruleStack.end()) { - continue; - } - - ruleStack.push_back(ruleTransition->target->ruleIndex); - - bool ruleFollowSetsIsExhaustive = collectFollowSets(transition->target, stopState, followSets, stateStack, ruleStack); - ruleStack.pop_back(); - - // If the subrule had an epsilon transition to the rule end, the tokens added to - // the follow set are non-exhaustive and we should continue processing subsequent transitions post-rule - if (!ruleFollowSetsIsExhaustive) { - bool nextStateFollowSetsIsExhaustive = collectFollowSets(ruleTransition->followState, stopState, followSets, stateStack, ruleStack); - isExhaustive = isExhaustive && nextStateFollowSetsIsExhaustive; - } - - } else if (transition->getTransitionType() == antlr4::atn::TransitionType::PREDICATE) { - if (checkPredicate(static_cast(transition))) { - bool nextStateFollowSetsIsExhaustive = collectFollowSets(transition->target, stopState, followSets, stateStack, ruleStack); - isExhaustive = isExhaustive && nextStateFollowSetsIsExhaustive; - } - } else if (transition->isEpsilon()) { - bool nextStateFollowSetsIsExhaustive = collectFollowSets(transition->target, stopState, followSets, stateStack, ruleStack); - isExhaustive = isExhaustive && nextStateFollowSetsIsExhaustive; - } else if (transition->getTransitionType() == antlr4::atn::TransitionType::WILDCARD) { - FollowSetWithPath set; - set.intervals = antlr4::misc::IntervalSet::of(antlr4::Token::MIN_USER_TOKEN_TYPE, atn.maxTokenType); - set.path = ruleStack; - followSets.push_back(set); - } else { - antlr4::misc::IntervalSet label = transition->label(); - if (label.size() > 0) { - if (transition->getTransitionType() == antlr4::atn::TransitionType::NOT_SET) { - label = label.complement(antlr4::misc::IntervalSet::of(antlr4::Token::MIN_USER_TOKEN_TYPE, atn.maxTokenType)); - } - FollowSetWithPath set; - set.intervals = label; - set.path = ruleStack; - set.following = getFollowingTokens(transition); - followSets.push_back(set); - } - } - } - stateStack.pop_back(); - - return isExhaustive; +bool CodeCompletionCore::collectFollowSets( + antlr4::atn::ATNState* s, antlr4::atn::ATNState* stopState, + std::vector& followSets, + std::vector& stateStack, + std::vector& ruleStack +) { + if (std::find(stateStack.begin(), stateStack.end(), s) != stateStack.end()) { + return true; + } + stateStack.push_back(s); + + if (s == stopState || + s->getStateType() == antlr4::atn::ATNStateType::RULE_STOP) { + stateStack.pop_back(); + return false; + } + + bool isExhaustive = true; + for (antlr4::atn::ConstTransitionPtr& tp : s->transitions) { + const antlr4::atn::Transition* transition = tp.get(); + + if (transition->getTransitionType() == antlr4::atn::TransitionType::RULE) { + const antlr4::atn::RuleTransition* ruleTransition = + static_cast(transition); + + if (std::find( + ruleStack.begin(), ruleStack.end(), + ruleTransition->target->ruleIndex + ) != ruleStack.end()) { + continue; + } + + ruleStack.push_back(ruleTransition->target->ruleIndex); + + bool ruleFollowSetsIsExhaustive = collectFollowSets( + transition->target, stopState, followSets, stateStack, ruleStack + ); + ruleStack.pop_back(); + + // If the subrule had an epsilon transition to the rule end, the tokens + // added to the follow set are non-exhaustive and we should continue + // processing subsequent transitions post-rule + if (!ruleFollowSetsIsExhaustive) { + bool nextStateFollowSetsIsExhaustive = collectFollowSets( + ruleTransition->followState, stopState, followSets, stateStack, + ruleStack + ); + isExhaustive = isExhaustive && nextStateFollowSetsIsExhaustive; + } + + } else if (transition->getTransitionType() == + antlr4::atn::TransitionType::PREDICATE) { + if (checkPredicate( + static_cast(transition) + )) { + bool nextStateFollowSetsIsExhaustive = collectFollowSets( + transition->target, stopState, followSets, stateStack, ruleStack + ); + isExhaustive = isExhaustive && nextStateFollowSetsIsExhaustive; + } + } else if (transition->isEpsilon()) { + bool nextStateFollowSetsIsExhaustive = collectFollowSets( + transition->target, stopState, followSets, stateStack, ruleStack + ); + isExhaustive = isExhaustive && nextStateFollowSetsIsExhaustive; + } else if (transition->getTransitionType() == + antlr4::atn::TransitionType::WILDCARD) { + FollowSetWithPath set; + set.intervals = antlr4::misc::IntervalSet::of( + antlr4::Token::MIN_USER_TOKEN_TYPE, atn.maxTokenType + ); + set.path = ruleStack; + followSets.push_back(set); + } else { + antlr4::misc::IntervalSet label = transition->label(); + if (label.size() > 0) { + if (transition->getTransitionType() == + antlr4::atn::TransitionType::NOT_SET) { + label = label.complement(antlr4::misc::IntervalSet::of( + antlr4::Token::MIN_USER_TOKEN_TYPE, atn.maxTokenType + )); + } + FollowSetWithPath set; + set.intervals = label; + set.path = ruleStack; + set.following = getFollowingTokens(transition); + followSets.push_back(set); + } + } + } + stateStack.pop_back(); + + return isExhaustive; } - - /** - * Walks the ATN for a single rule only. It returns the token stream position for each path that could be matched - * in this rule. - * The result can be empty in case we hit only non-epsilon transitions that didn't match the current input or if we - * hit the caret position. + * Walks the ATN for a single rule only. It returns the token stream position + * for each path that could be matched in this rule. The result can be empty in + * case we hit only non-epsilon transitions that didn't match the current input + * or if we hit the caret position. * * @param startState The start state. * @param tokenListIndex The token index we are currently at. * @param callStack The stack that indicates where in the ATN we are currently. * @param precedence The current precedence level. - * @param indentation A value to determine the current indentation when doing debug prints. - * @returns the set of token stream indexes (which depend on the ways that had to be taken). + * @param indentation A value to determine the current indentation when doing + * debug prints. + * @returns the set of token stream indexes (which depend on the ways that had + * to be taken). */ -RuleEndStatus CodeCompletionCore::processRule(antlr4::atn::RuleStartState * startState, size_t tokenListIndex, RuleWithStartTokenList& callStack, int precedence, size_t indentation, bool& cancelled) -{ - // Cancelled by external caller? - if (cancel && cancel->load()) { - cancelled = true; - return {}; - } - - // Check for timeout - cancelled = false; - if (timeoutMS > 0) { - std::chrono::duration timeout(timeoutMS); - if (std::chrono::steady_clock::now() - timeoutStart > timeout) { - cancelled = true; - return {}; - } - } - - - // Start with rule specific handling before going into the ATN walk. - - // Check first if we've taken this path with the same input before. - std::map positionMap; - if (!shortcutMap.contains(startState->ruleIndex)) { - shortcutMap[startState->ruleIndex] = positionMap; - } else { - positionMap = shortcutMap[startState->ruleIndex]; - if (positionMap.contains(tokenListIndex)) { - if (showDebugOutput) { - std::cout << "=====> shortcut" << "\n"; - } - - return positionMap[tokenListIndex]; - } - } - - RuleEndStatus result; - - // For rule start states we determine and cache the follow set, which gives us 3 advantages: - // 1) We can quickly check if a symbol would be matched when we follow that rule. We can so check in advance - // and can save us all the intermediate steps if there is no match. - // 2) We'll have all symbols that are collectable already together when we are at the caret on rule enter. - // 3) We get this lookup for free with any 2nd or further visit of the same rule, which often happens - // in non trivial grammars, especially with (recursive) expressions and of course when invoking code - // completion multiple times. - - if (!followSetsByATN.contains(typeid(parser))) { - followSetsByATN[typeid(parser)] = FollowSetsPerState(); - } - - FollowSetsPerState &setsPerState = followSetsByATN[typeid(parser)]; - if (!setsPerState.contains(startState->stateNumber)) { - auto stop = atn.ruleToStopState[startState->ruleIndex]; - auto followSets = determineFollowSets(startState, stop); - setsPerState[startState->stateNumber] = followSets; - } - FollowSetsHolder followSets = setsPerState[startState->stateNumber]; - - - // Get the token index where our rule starts from our (possibly filtered) token list - size_t startTokenIndex = tokens[tokenListIndex]->getTokenIndex(); - - callStack.push_back({ - .startTokenIndex = startTokenIndex, - .ruleIndex = startState->ruleIndex, - }); - - if (tokenListIndex >= tokens.size() - 1) { // At caret? - if (preferredRules.contains(startState->ruleIndex)) { - // No need to go deeper when collecting entries and we reach a rule that we want to collect anyway. - translateStackToRuleIndex(callStack); - } else { - // Convert all follow sets to either single symbols or their associated preferred rule and add - // the result to our candidates list. - for (FollowSetWithPath &set: followSets.sets) { - RuleWithStartTokenList fullPath = callStack; - - // Rules derived from our followSet will always start at the same token as our current rule. - RuleWithStartTokenList followSetPath; - for (size_t rule: set.path) { - followSetPath.push_back({ - .startTokenIndex = startTokenIndex, - .ruleIndex = rule, - }); - } - - fullPath.insert(fullPath.end(), followSetPath.begin(), followSetPath.end()); - - if (!translateStackToRuleIndex(fullPath)) { - for (ssize_t symbol: set.intervals.toList()) { - if (!ignoredTokens.contains((size_t)symbol)) { - - if (showDebugOutput) { - std::cout << "=====> collected: " << vocabulary.getDisplayName(symbol) << "\n"; - } - if (!candidates.tokens.contains(symbol)) { - // Following is empty if there is more than one entry in the set. - candidates.tokens[symbol] = set.following; - } else { - // More than one following list for the same symbol. - if (candidates.tokens[symbol] != set.following) { - candidates.tokens[symbol] = {}; - } - } - - } - } - } - } - } - - if (!followSets.isExhaustive) { - // If we're at the caret but the follow sets is non-exhaustive (empty or all tokens are optional), - // we should continue to collect tokens following this rule - result.insert(tokenListIndex); - } - - callStack.pop_back(); - - return result; - - } else { - // Process the rule if we either could pass it without consuming anything (epsilon transition) - // or if the current input symbol will be matched somewhere after this entry point. - // Otherwise stop here. - size_t currentSymbol = tokens[tokenListIndex]->getType(); - if (followSets.isExhaustive && !followSets.combined.contains(currentSymbol)) { - callStack.pop_back(); - - return result; - } - } - - if (startState->isLeftRecursiveRule) { - precedenceStack.push_back(precedence); - } - - // The current state execution pipeline contains all yet-to-be-processed ATN states in this rule. - // For each such state we store the token index + a list of rules that lead to it. - std::vector statePipeline; - - // Bootstrap the pipeline. - statePipeline.push_back({ - .state = startState, - .tokenListIndex = tokenListIndex - }); - - while (statePipeline.size() > 0) { - if (cancel && cancel->load()) { - cancelled = true; - return {}; - } - - - PipelineEntry currentEntry = statePipeline.back(); - statePipeline.pop_back(); - ++statesProcessed; - - size_t currentSymbol = tokens[currentEntry.tokenListIndex]->getType(); - - bool atCaret = currentEntry.tokenListIndex >= tokens.size() - 1; - if (showDebugOutput) { - printDescription(indentation, currentEntry.state, generateBaseDescription(currentEntry.state), currentEntry.tokenListIndex); - if (showRuleStack) { - printRuleState(callStack); - } - } - - if (currentEntry.state->getStateType() == antlr4::atn::ATNStateType::RULE_STOP) { - // Record the token index we are at, to report it to the caller. - result.insert(currentEntry.tokenListIndex); - continue; - } - - - // We simulate here the same precedence handling as the parser does, which uses hard coded values. - // For rules that are not left recursive this value is ignored (since there is no precedence transition). - for (antlr4::atn::ConstTransitionPtr& transition: currentEntry.state->transitions) { - - switch (transition->getTransitionType()) { - case antlr4::atn::TransitionType::RULE: { - const atn::RuleTransition * ruleTransition = static_cast(transition.get()); - atn::RuleStartState * ruleStartState = static_cast(ruleTransition->target); - bool innerCancelled = false; - RuleEndStatus endStatus = processRule(ruleStartState, currentEntry.tokenListIndex, callStack, ruleTransition->precedence, indentation + 1, innerCancelled); - if (innerCancelled) { - cancelled = true; - return {}; - } - - for (size_t position: endStatus) { - statePipeline.push_back({ - .state = ruleTransition->followState, - .tokenListIndex = position, - }); - } - break; - } - - case antlr4::atn::TransitionType::PREDICATE: { - const atn::PredicateTransition * predTransition = static_cast(transition.get()); - if (checkPredicate(predTransition)) { - statePipeline.push_back({ - .state = transition->target, - .tokenListIndex = currentEntry.tokenListIndex, - }); - } - break; - } - - case antlr4::atn::TransitionType::PRECEDENCE: { - const atn::PrecedencePredicateTransition * predTransition = static_cast(transition.get()); - if (predTransition->getPrecedence() >= precedenceStack[precedenceStack.size() - 1]) { - statePipeline.push_back({ - .state = transition->target, - .tokenListIndex = currentEntry.tokenListIndex, - }); - } - - break; - } - - case antlr4::atn::TransitionType::WILDCARD: { - if (atCaret) { - if (!translateStackToRuleIndex(callStack)) { - for (auto token: antlr4::misc::IntervalSet::of(antlr4::Token::MIN_USER_TOKEN_TYPE, atn.maxTokenType).toList()) { - if (!ignoredTokens.contains(token)) { - candidates.tokens[token] = {}; - } - } - } - } else { - statePipeline.push_back({ - .state = transition->target, - .tokenListIndex = currentEntry.tokenListIndex + 1, - }); - } - break; - } - - default: { - if (transition->isEpsilon()) { - // Jump over simple states with a single outgoing epsilon transition. - statePipeline.push_back({ - .state = transition->target, - .tokenListIndex = currentEntry.tokenListIndex, - }); - continue; - } - - antlr4::misc::IntervalSet set = transition->label(); - if (set.size() > 0) { - if (transition->getTransitionType() == antlr4::atn::TransitionType::NOT_SET) { - set = set.complement(antlr4::misc::IntervalSet::of(antlr4::Token::MIN_USER_TOKEN_TYPE, atn.maxTokenType)); - } - if (atCaret) { - if (!translateStackToRuleIndex(callStack)) { - std::vector list = set.toList(); - bool hasTokenSequence = list.size() == 1; - for (size_t symbol: list) { - if (!ignoredTokens.contains(symbol)) { - if (showDebugOutput) { - std::cout << "=====> collected: " << vocabulary.getDisplayName(symbol) << "\n"; - } - - std::vector followingTokens; - if (hasTokenSequence) { - followingTokens = getFollowingTokens(transition.get()); - } - if (!candidates.tokens.contains(symbol)) { - candidates.tokens[symbol] = followingTokens; - } else { - candidates.tokens[symbol] = longestCommonPrefix(followingTokens, candidates.tokens[symbol]); - } - } - } - } - } else { - if (set.contains(currentSymbol)) { - if (showDebugOutput) { - std::cout << "=====> consumed: " << vocabulary.getDisplayName(currentSymbol) << "\n"; - } - statePipeline.push_back({ - .state = transition->target, - .tokenListIndex = currentEntry.tokenListIndex + 1, - }); - } - } - } - } - } - } - } - - callStack.pop_back(); - - if (startState->isLeftRecursiveRule) { - precedenceStack.pop_back(); - } - - // Cache the result, for later lookup to avoid duplicate walks. - positionMap[tokenListIndex] = result; - - return result; +RuleEndStatus CodeCompletionCore::processRule( + antlr4::atn::RuleStartState* startState, size_t tokenListIndex, + RuleWithStartTokenList& callStack, int precedence, size_t indentation, + bool& cancelled +) { + // Cancelled by external caller? + if (cancel && cancel->load()) { + cancelled = true; + return {}; + } + + // Check for timeout + cancelled = false; + if (timeoutMS > 0) { + std::chrono::duration timeout(timeoutMS); + if (std::chrono::steady_clock::now() - timeoutStart > timeout) { + cancelled = true; + return {}; + } + } + + // Start with rule specific handling before going into the ATN walk. + + // Check first if we've taken this path with the same input before. + std::map positionMap; + if (!shortcutMap.contains(startState->ruleIndex)) { + shortcutMap[startState->ruleIndex] = positionMap; + } else { + positionMap = shortcutMap[startState->ruleIndex]; + if (positionMap.contains(tokenListIndex)) { + if (showDebugOutput) { + std::cout << "=====> shortcut" << "\n"; + } + + return positionMap[tokenListIndex]; + } + } + + RuleEndStatus result; + + // For rule start states we determine and cache the follow set, which gives us + // 3 advantages: 1) We can quickly check if a symbol would be matched when we + // follow that rule. We can so check in advance + // and can save us all the intermediate steps if there is no match. + // 2) We'll have all symbols that are collectable already together when we are + // at the caret on rule enter. 3) We get this lookup for free with any 2nd or + // further visit of the same rule, which often happens + // in non trivial grammars, especially with (recursive) expressions and of + // course when invoking code completion multiple times. + + if (!followSetsByATN.contains(typeid(parser))) { + followSetsByATN[typeid(parser)] = FollowSetsPerState(); + } + + FollowSetsPerState& setsPerState = followSetsByATN[typeid(parser)]; + if (!setsPerState.contains(startState->stateNumber)) { + auto stop = atn.ruleToStopState[startState->ruleIndex]; + auto followSets = determineFollowSets(startState, stop); + setsPerState[startState->stateNumber] = followSets; + } + FollowSetsHolder followSets = setsPerState[startState->stateNumber]; + + // Get the token index where our rule starts from our (possibly filtered) + // token list + size_t startTokenIndex = tokens[tokenListIndex]->getTokenIndex(); + + callStack.push_back({ + .startTokenIndex = startTokenIndex, + .ruleIndex = startState->ruleIndex, + }); + + if (tokenListIndex >= tokens.size() - 1) { // At caret? + if (preferredRules.contains(startState->ruleIndex)) { + // No need to go deeper when collecting entries and we reach a rule that + // we want to collect anyway. + translateStackToRuleIndex(callStack); + } else { + // Convert all follow sets to either single symbols or their associated + // preferred rule and add the result to our candidates list. + for (FollowSetWithPath& set : followSets.sets) { + RuleWithStartTokenList fullPath = callStack; + + // Rules derived from our followSet will always start at the same token + // as our current rule. + RuleWithStartTokenList followSetPath; + for (size_t rule : set.path) { + followSetPath.push_back({ + .startTokenIndex = startTokenIndex, + .ruleIndex = rule, + }); + } + + fullPath.insert( + fullPath.end(), followSetPath.begin(), followSetPath.end() + ); + + if (!translateStackToRuleIndex(fullPath)) { + for (ssize_t symbol : set.intervals.toList()) { + if (!ignoredTokens.contains((size_t)symbol)) { + if (showDebugOutput) { + std::cout << "=====> collected: " + << vocabulary.getDisplayName(symbol) << "\n"; + } + if (!candidates.tokens.contains(symbol)) { + // Following is empty if there is more than one entry in the + // set. + candidates.tokens[symbol] = set.following; + } else { + // More than one following list for the same symbol. + if (candidates.tokens[symbol] != set.following) { + candidates.tokens[symbol] = {}; + } + } + } + } + } + } + } + + if (!followSets.isExhaustive) { + // If we're at the caret but the follow sets is non-exhaustive (empty or + // all tokens are optional), we should continue to collect tokens + // following this rule + result.insert(tokenListIndex); + } + + callStack.pop_back(); + + return result; + + } else { + // Process the rule if we either could pass it without consuming anything + // (epsilon transition) or if the current input symbol will be matched + // somewhere after this entry point. Otherwise stop here. + size_t currentSymbol = tokens[tokenListIndex]->getType(); + if (followSets.isExhaustive && + !followSets.combined.contains(currentSymbol)) { + callStack.pop_back(); + + return result; + } + } + + if (startState->isLeftRecursiveRule) { + precedenceStack.push_back(precedence); + } + + // The current state execution pipeline contains all yet-to-be-processed ATN + // states in this rule. For each such state we store the token index + a list + // of rules that lead to it. + std::vector statePipeline; + + // Bootstrap the pipeline. + statePipeline.push_back( + {.state = startState, .tokenListIndex = tokenListIndex} + ); + + while (statePipeline.size() > 0) { + if (cancel && cancel->load()) { + cancelled = true; + return {}; + } + + PipelineEntry currentEntry = statePipeline.back(); + statePipeline.pop_back(); + ++statesProcessed; + + size_t currentSymbol = tokens[currentEntry.tokenListIndex]->getType(); + + bool atCaret = currentEntry.tokenListIndex >= tokens.size() - 1; + if (showDebugOutput) { + printDescription( + indentation, currentEntry.state, + generateBaseDescription(currentEntry.state), + currentEntry.tokenListIndex + ); + if (showRuleStack) { + printRuleState(callStack); + } + } + + if (currentEntry.state->getStateType() == + antlr4::atn::ATNStateType::RULE_STOP) { + // Record the token index we are at, to report it to the caller. + result.insert(currentEntry.tokenListIndex); + continue; + } + + // We simulate here the same precedence handling as the parser does, which + // uses hard coded values. For rules that are not left recursive this value + // is ignored (since there is no precedence transition). + for (antlr4::atn::ConstTransitionPtr& transition : + currentEntry.state->transitions) { + switch (transition->getTransitionType()) { + case antlr4::atn::TransitionType::RULE: { + const atn::RuleTransition* ruleTransition = + static_cast(transition.get()); + atn::RuleStartState* ruleStartState = + static_cast(ruleTransition->target); + bool innerCancelled = false; + RuleEndStatus endStatus = processRule( + ruleStartState, currentEntry.tokenListIndex, callStack, + ruleTransition->precedence, indentation + 1, innerCancelled + ); + if (innerCancelled) { + cancelled = true; + return {}; + } + + for (size_t position : endStatus) { + statePipeline.push_back({ + .state = ruleTransition->followState, + .tokenListIndex = position, + }); + } + break; + } + + case antlr4::atn::TransitionType::PREDICATE: { + const atn::PredicateTransition* predTransition = + static_cast(transition.get()); + if (checkPredicate(predTransition)) { + statePipeline.push_back({ + .state = transition->target, + .tokenListIndex = currentEntry.tokenListIndex, + }); + } + break; + } + + case antlr4::atn::TransitionType::PRECEDENCE: { + const atn::PrecedencePredicateTransition* predTransition = + static_cast( + transition.get() + ); + if (predTransition->getPrecedence() >= + precedenceStack[precedenceStack.size() - 1]) { + statePipeline.push_back({ + .state = transition->target, + .tokenListIndex = currentEntry.tokenListIndex, + }); + } + + break; + } + + case antlr4::atn::TransitionType::WILDCARD: { + if (atCaret) { + if (!translateStackToRuleIndex(callStack)) { + for (auto token : + antlr4::misc::IntervalSet::of( + antlr4::Token::MIN_USER_TOKEN_TYPE, atn.maxTokenType + ) + .toList()) { + if (!ignoredTokens.contains(token)) { + candidates.tokens[token] = {}; + } + } + } + } else { + statePipeline.push_back({ + .state = transition->target, + .tokenListIndex = currentEntry.tokenListIndex + 1, + }); + } + break; + } + + default: { + if (transition->isEpsilon()) { + // Jump over simple states with a single outgoing epsilon + // transition. + statePipeline.push_back({ + .state = transition->target, + .tokenListIndex = currentEntry.tokenListIndex, + }); + continue; + } + + antlr4::misc::IntervalSet set = transition->label(); + if (set.size() > 0) { + if (transition->getTransitionType() == + antlr4::atn::TransitionType::NOT_SET) { + set = set.complement(antlr4::misc::IntervalSet::of( + antlr4::Token::MIN_USER_TOKEN_TYPE, atn.maxTokenType + )); + } + if (atCaret) { + if (!translateStackToRuleIndex(callStack)) { + std::vector list = set.toList(); + bool hasTokenSequence = list.size() == 1; + for (size_t symbol : list) { + if (!ignoredTokens.contains(symbol)) { + if (showDebugOutput) { + std::cout << "=====> collected: " + << vocabulary.getDisplayName(symbol) << "\n"; + } + + std::vector followingTokens; + if (hasTokenSequence) { + followingTokens = getFollowingTokens(transition.get()); + } + if (!candidates.tokens.contains(symbol)) { + candidates.tokens[symbol] = followingTokens; + } else { + candidates.tokens[symbol] = longestCommonPrefix( + followingTokens, candidates.tokens[symbol] + ); + } + } + } + } + } else { + if (set.contains(currentSymbol)) { + if (showDebugOutput) { + std::cout << "=====> consumed: " + << vocabulary.getDisplayName(currentSymbol) << "\n"; + } + statePipeline.push_back({ + .state = transition->target, + .tokenListIndex = currentEntry.tokenListIndex + 1, + }); + } + } + } + } + } + } + } + + callStack.pop_back(); + + if (startState->isLeftRecursiveRule) { + precedenceStack.pop_back(); + } + + // Cache the result, for later lookup to avoid duplicate walks. + positionMap[tokenListIndex] = result; + + return result; } - - // ---------------------------------------------------------------------------- // MARK: - Debug // ---------------------------------------------------------------------------- -std::string CodeCompletionCore::generateBaseDescription(antlr4::atn::ATNState * state) -{ - std::string stateValue = (state->stateNumber == atn::ATNState::INVALID_STATE_NUMBER) ? "Invalid" : std::to_string(state->stateNumber); - std::stringstream output; - - output << "[" << stateValue << " " << atnStateTypeMap[(size_t)state->getStateType()] << "]"; - output << " in "; - output << ruleNames[state->ruleIndex]; - return output.str(); +std::string CodeCompletionCore::generateBaseDescription( + antlr4::atn::ATNState* state +) { + std::string stateValue = + (state->stateNumber == atn::ATNState::INVALID_STATE_NUMBER) + ? "Invalid" + : std::to_string(state->stateNumber); + std::stringstream output; + + output << "[" << stateValue << " " + << atnStateTypeMap[(size_t)state->getStateType()] << "]"; + output << " in "; + output << ruleNames[state->ruleIndex]; + return output.str(); } - -void CodeCompletionCore::printDescription(size_t indentation, antlr4::atn::ATNState * state, std::string const& baseDescription, size_t tokenIndex) -{ - std::string indent = std::string(indentation * 2, ' '); - std::string output = ""; - std::string transitionDescription = ""; - - if (debugOutputWithTransitions) { - for (antlr4::atn::ConstTransitionPtr& transition: state->transitions) { - - std::string labels = ""; - std::vector symbols = transition->label().toList(); - - if (symbols.size() > 2) { - // Only print start and end symbols to avoid large lists in debug output. - labels = vocabulary.getDisplayName((size_t)symbols[0]) + " .. " + vocabulary.getDisplayName((size_t)symbols[symbols.size() - 1]); - } else { - for (size_t symbol: symbols) { - if (labels.size() > 0) { - labels += ", "; - } - labels += vocabulary.getDisplayName(symbol); - } - } - if (labels.size() == 0) { - labels = "ε"; - } - - transitionDescription += "\n" + indent + "\t(" + labels + ") " + - "[" + std::to_string(transition->target->stateNumber) + " " + atnStateTypeMap[(size_t)transition->target->getStateType()] + "]" - " in " + ruleNames[transition->target->ruleIndex]; - } - } - - if (tokenIndex >= tokens.size() - 1) { - output = "<<" + std::to_string(tokenStartIndex + tokenIndex) + ">> "; - } else { - output = "<" + std::to_string(tokenStartIndex + tokenIndex) + "> "; - } - - std::cout << indent + output + "Current state: " + baseDescription + transitionDescription << "\n"; +void CodeCompletionCore::printDescription( + size_t indentation, antlr4::atn::ATNState* state, + std::string const& baseDescription, size_t tokenIndex +) { + std::string indent = std::string(indentation * 2, ' '); + std::string output = ""; + std::string transitionDescription = ""; + + if (debugOutputWithTransitions) { + for (antlr4::atn::ConstTransitionPtr& transition : state->transitions) { + std::string labels = ""; + std::vector symbols = transition->label().toList(); + + if (symbols.size() > 2) { + // Only print start and end symbols to avoid large lists in debug + // output. + labels = vocabulary.getDisplayName((size_t)symbols[0]) + " .. " + + vocabulary.getDisplayName((size_t)symbols[symbols.size() - 1]); + } else { + for (size_t symbol : symbols) { + if (labels.size() > 0) { + labels += ", "; + } + labels += vocabulary.getDisplayName(symbol); + } + } + if (labels.size() == 0) { + labels = "ε"; + } + + transitionDescription += + "\n" + indent + "\t(" + labels + ") " + "[" + + std::to_string(transition->target->stateNumber) + " " + + atnStateTypeMap[(size_t)transition->target->getStateType()] + + "]" + " in " + + ruleNames[transition->target->ruleIndex]; + } + } + + if (tokenIndex >= tokens.size() - 1) { + output = "<<" + std::to_string(tokenStartIndex + tokenIndex) + ">> "; + } else { + output = "<" + std::to_string(tokenStartIndex + tokenIndex) + "> "; + } + + std::cout << indent + output + "Current state: " + baseDescription + + transitionDescription + << "\n"; } - -void CodeCompletionCore::printRuleState(RuleWithStartTokenList const& stack) -{ - if (stack.size() == 0) { - std::cout << "\n"; - return; - } - - if (stack.size() > 0) { - for (RuleWithStartToken rule: stack) { - std::cout << ruleNames[rule.ruleIndex]; - } - std::cout << "\n"; - } +void CodeCompletionCore::printRuleState(RuleWithStartTokenList const& stack) { + if (stack.size() == 0) { + std::cout << "\n"; + return; + } + + if (stack.size() > 0) { + for (RuleWithStartToken rule : stack) { + std::cout << ruleNames[rule.ruleIndex]; + } + std::cout << "\n"; + } } - -} // namespace c3; +} // namespace c3 diff --git a/ports/cpp/source/antlr4-c3/CodeCompletionCore.hpp b/ports/cpp/source/antlr4-c3/CodeCompletionCore.hpp index 0792085..5e88065 100644 --- a/ports/cpp/source/antlr4-c3/CodeCompletionCore.hpp +++ b/ports/cpp/source/antlr4-c3/CodeCompletionCore.hpp @@ -1,22 +1,22 @@ // // CodeCompletionCore.hpp // -// C++ port of antlr4-c3 (TypeScript) by Mike Lischke +// C++ port of antlr4-c3 (TypeScript) by Mike Lischke // Licensed under the MIT License. -// +// #ifndef CodeCompletionCore_hpp #define CodeCompletionCore_hpp -#include +#include +#include #include -#include #include -#include #include -#include -#include "antlr4-runtime.h" +#include +#include +#include "antlr4-runtime.h" // ---------------------------------------------------------------------------- // Supporting Types @@ -26,209 +26,220 @@ namespace c3 { using TokenList = std::vector; using RuleList = std::vector; - struct RuleWithStartToken { - size_t startTokenIndex; - size_t ruleIndex; + size_t startTokenIndex; + size_t ruleIndex; }; using RuleWithStartTokenList = std::vector; - /** * A record for a follow set along with the path at which this set was found. - * If there is only a single symbol in the interval set then we also collect and store tokens which follow - * this symbol directly in its rule (i.e. there is no intermediate rule transition). Only single label transitions - * are considered. This is useful if you have a chain of tokens which can be suggested as a whole, because there is - * a fixed sequence in the grammar. + * If there is only a single symbol in the interval set then we also collect and + * store tokens which follow this symbol directly in its rule (i.e. there is no + * intermediate rule transition). Only single label transitions are considered. + * This is useful if you have a chain of tokens which can be suggested as a + * whole, because there is a fixed sequence in the grammar. */ struct FollowSetWithPath { - antlr4::misc::IntervalSet intervals; - RuleList path; - TokenList following; + antlr4::misc::IntervalSet intervals; + RuleList path; + TokenList following; }; - - /** - * A list of follow sets (for a given state number) + all of them combined for quick hit tests + whether they are - * exhaustive (false if subsequent yet-unprocessed rules could add further tokens to the follow set, true otherwise). - * This data is static in nature (because the used ATN states are part of a static struct: the ATN). - * Hence it can be shared between all C3 instances, however it depends on the actual parser class (type). + * A list of follow sets (for a given state number) + all of them combined for + * quick hit tests + whether they are exhaustive (false if subsequent + * yet-unprocessed rules could add further tokens to the follow set, true + * otherwise). This data is static in nature (because the used ATN states are + * part of a static struct: the ATN). Hence it can be shared between all C3 + * instances, however it depends on the actual parser class (type). */ struct FollowSetsHolder { - std::vector sets; - antlr4::misc::IntervalSet combined; - bool isExhaustive; + std::vector sets; + antlr4::misc::IntervalSet combined; + bool isExhaustive; }; - using FollowSetsPerState = std::map; - /** Token stream position info after a rule was processed. */ using RuleEndStatus = std::unordered_set; struct PipelineEntry { - antlr4::atn::ATNState * state; - size_t tokenListIndex; + antlr4::atn::ATNState* state; + size_t tokenListIndex; }; - - - // ---------------------------------------------------------------------------- // CandidatesCollection // ---------------------------------------------------------------------------- struct CandidateRule { - size_t startTokenIndex; - RuleList ruleList; + size_t startTokenIndex; + RuleList ruleList; }; - /** * All the candidates which have been found. Tokens and rules are separated. - * – Token entries include a list of tokens that directly follow them (see also the "following" member in the - * FollowSetWithPath class). - * – Rule entries include the index of the starting token within the evaluated rule, along with a call stack of rules - * found during evaluation. - * – cancelled will be true if the collectCandidates() was cancelled or timed out. + * – Token entries include a list of tokens that directly follow them (see also + * the "following" member in the FollowSetWithPath class). – Rule entries + * include the index of the starting token within the evaluated rule, along with + * a call stack of rules found during evaluation. – cancelled will be true if + * the collectCandidates() was cancelled or timed out. */ struct CandidatesCollection { - std::map tokens; - std::map rules; - bool cancelled; + std::map tokens; + std::map rules; + bool cancelled; }; - - // ---------------------------------------------------------------------------- // Code Completion Core // ---------------------------------------------------------------------------- -class CodeCompletionCore -{ +class CodeCompletionCore { public: - - // -------------------------------------------------------- - // Construction - // -------------------------------------------------------- - - CodeCompletionCore(antlr4::Parser * parser); - - - // -------------------------------------------------------- - // Configuration - // -------------------------------------------------------- - - /** - * Tailoring of the result: - * Tokens which should not appear in the candidates set. - */ - std::unordered_set ignoredTokens; - - /** - * Rules which replace any candidate token they contain. - * This allows to return descriptive rules (e.g. className, instead of ID/identifier). - */ - std::unordered_set preferredRules; - - /** - * Specify if preferred rules should translated top-down (higher index rule returns first) or - * bottom-up (lower index rule returns first). - */ - bool translateRulesTopDown = false; - - - - - // -------------------------------------------------------- - // Debugging Options - // -------------------------------------------------------- - // Print human readable ATN state and other info. - - /** Not dependent on showDebugOutput. Prints the collected rules + tokens to terminal. */ - bool showResult = false; - - /** Enables printing ATN state info to terminal. */ - bool showDebugOutput = false; - - /** Only relevant when showDebugOutput is true. Enables transition printing for a state. */ - bool debugOutputWithTransitions = false; - - /** Also depends on showDebugOutput. Enables call stack printing for each rule recursion. */ - bool showRuleStack = false; - - - - // -------------------------------------------------------- - // Usage - // -------------------------------------------------------- - - /** - * This is the main entry point. The caret token index specifies the token stream index for the token which - * currently covers the caret (or any other position you want to get code completion candidates for). - * Optionally you can pass in a parser rule context which limits the ATN walk to only that or called rules. - * This can significantly speed up the retrieval process but might miss some candidates (if they are outside of - * the given context). - * - * @param caretTokenIndex The index of the token at the caret position. - * @param context An option parser rule context to limit the search space. - * @param timeoutMS If non-zero, the number of milliseconds until collecting times out. - * @param cancel If set to a non-NULL atomic boolean, and that boolean value is set to true while the function is executing, then - * collecting candidates will abort as soon as possible. - * @returns The collection of completion candidates. If cancelled or timed out, the returned collection will have its 'cancelled' - * value set to true and the collected candidates may be incomplete. - */ - CandidatesCollection collectCandidates(size_t caretTokenIndex, antlr4::ParserRuleContext * context = nullptr, size_t timeoutMS = 0, std::atomic * cancel = nullptr); - - - - - // -------------------------------------------------------- - // Private - // -------------------------------------------------------- + // -------------------------------------------------------- + // Construction + // -------------------------------------------------------- + + CodeCompletionCore(antlr4::Parser* parser); + + // -------------------------------------------------------- + // Configuration + // -------------------------------------------------------- + + /** + * Tailoring of the result: + * Tokens which should not appear in the candidates set. + */ + std::unordered_set ignoredTokens; + + /** + * Rules which replace any candidate token they contain. + * This allows to return descriptive rules (e.g. className, instead of + * ID/identifier). + */ + std::unordered_set preferredRules; + + /** + * Specify if preferred rules should translated top-down (higher index rule + * returns first) or bottom-up (lower index rule returns first). + */ + bool translateRulesTopDown = false; + + // -------------------------------------------------------- + // Debugging Options + // -------------------------------------------------------- + // Print human readable ATN state and other info. + + /** Not dependent on showDebugOutput. Prints the collected rules + tokens to + * terminal. */ + bool showResult = false; + + /** Enables printing ATN state info to terminal. */ + bool showDebugOutput = false; + + /** Only relevant when showDebugOutput is true. Enables transition printing + * for a state. */ + bool debugOutputWithTransitions = false; + + /** Also depends on showDebugOutput. Enables call stack printing for each rule + * recursion. */ + bool showRuleStack = false; + + // -------------------------------------------------------- + // Usage + // -------------------------------------------------------- + + /** + * This is the main entry point. The caret token index specifies the token + * stream index for the token which currently covers the caret (or any other + * position you want to get code completion candidates for). Optionally you + * can pass in a parser rule context which limits the ATN walk to only that or + * called rules. This can significantly speed up the retrieval process but + * might miss some candidates (if they are outside of the given context). + * + * @param caretTokenIndex The index of the token at the caret position. + * @param context An option parser rule context to limit the search space. + * @param timeoutMS If non-zero, the number of milliseconds until collecting + * times out. + * @param cancel If set to a non-NULL atomic boolean, and that boolean value + * is set to true while the function is executing, then collecting candidates + * will abort as soon as possible. + * @returns The collection of completion candidates. If cancelled or timed + * out, the returned collection will have its 'cancelled' value set to true + * and the collected candidates may be incomplete. + */ + CandidatesCollection collectCandidates( + size_t caretTokenIndex, antlr4::ParserRuleContext* context = nullptr, + size_t timeoutMS = 0, std::atomic* cancel = nullptr + ); + + // -------------------------------------------------------- + // Private + // -------------------------------------------------------- private: - - static std::map followSetsByATN; - static std::vector atnStateTypeMap; - - antlr4::Parser * parser; - antlr4::atn::ATN const& atn; - antlr4::dfa::Vocabulary const& vocabulary; - std::vector const& ruleNames; - std::vector tokens; - std::vector precedenceStack; - - size_t tokenStartIndex = 0; - size_t statesProcessed = 0; - - /** - * A mapping of rule index + token stream position to end token positions. - * A rule which has been visited before with the same input position will always produce the same output positions. - */ - std::map> shortcutMap; - - /** The collected candidates (rules and tokens). */ - c3::CandidatesCollection candidates; - size_t timeoutMS; - std::atomic * cancel; - std::chrono::steady_clock::time_point timeoutStart; - - bool checkPredicate(const antlr4::atn::PredicateTransition * transition); - bool translateStackToRuleIndex(RuleWithStartTokenList const& ruleWithStartTokenList); - bool translateToRuleIndex(size_t i, RuleWithStartTokenList const& ruleWithStartTokenList); - std::vector getFollowingTokens(const antlr4::atn::Transition * transition); - FollowSetsHolder determineFollowSets(antlr4::atn::ATNState * start, antlr4::atn::ATNState * stop); - bool collectFollowSets(antlr4::atn::ATNState * s, antlr4::atn::ATNState * stopState, std::vector& followSets, std::vector& stateStack, std::vector& ruleStack); - RuleEndStatus processRule(antlr4::atn::RuleStartState * startState, size_t tokenListIndex, RuleWithStartTokenList& callStack, int precedence, size_t indentation, bool& timedOut); - - std::string generateBaseDescription(antlr4::atn::ATNState * state); - void printDescription(size_t indentation, antlr4::atn::ATNState * state, std::string const& baseDescription, size_t tokenIndex); - void printRuleState(RuleWithStartTokenList const& stack); + static std::map followSetsByATN; + static std::vector atnStateTypeMap; + + antlr4::Parser* parser; + antlr4::atn::ATN const& atn; + antlr4::dfa::Vocabulary const& vocabulary; + std::vector const& ruleNames; + std::vector tokens; + std::vector precedenceStack; + + size_t tokenStartIndex = 0; + size_t statesProcessed = 0; + + /** + * A mapping of rule index + token stream position to end token positions. + * A rule which has been visited before with the same input position will + * always produce the same output positions. + */ + std::map> shortcutMap; + + /** The collected candidates (rules and tokens). */ + c3::CandidatesCollection candidates; + size_t timeoutMS; + std::atomic* cancel; + std::chrono::steady_clock::time_point timeoutStart; + + bool checkPredicate(const antlr4::atn::PredicateTransition* transition); + bool translateStackToRuleIndex( + RuleWithStartTokenList const& ruleWithStartTokenList + ); + bool translateToRuleIndex( + size_t i, RuleWithStartTokenList const& ruleWithStartTokenList + ); + std::vector getFollowingTokens( + const antlr4::atn::Transition* transition + ); + FollowSetsHolder determineFollowSets( + antlr4::atn::ATNState* start, antlr4::atn::ATNState* stop + ); + bool collectFollowSets( + antlr4::atn::ATNState* s, antlr4::atn::ATNState* stopState, + std::vector& followSets, + std::vector& stateStack, + std::vector& ruleStack + ); + RuleEndStatus processRule( + antlr4::atn::RuleStartState* startState, size_t tokenListIndex, + RuleWithStartTokenList& callStack, int precedence, size_t indentation, + bool& timedOut + ); + + std::string generateBaseDescription(antlr4::atn::ATNState* state); + void printDescription( + size_t indentation, antlr4::atn::ATNState* state, + std::string const& baseDescription, size_t tokenIndex + ); + void printRuleState(RuleWithStartTokenList const& stack); }; -} // namespace c3 +} // namespace c3 #endif /* CodeCompletionCore_hpp */ diff --git a/ports/cpp/test/cpp14/Cpp14Test.cpp b/ports/cpp/test/cpp14/Cpp14Test.cpp index 8af94bf..9b202ed 100644 --- a/ports/cpp/test/cpp14/Cpp14Test.cpp +++ b/ports/cpp/test/cpp14/Cpp14Test.cpp @@ -1,11 +1,9 @@ -#include - #include #include -#include - +#include #include +#include namespace c3::test { @@ -19,11 +17,12 @@ TEST(CPP14Parser, SimpleExample) { // adjusting the grammar in any way. We use the grammar as downloaded from the // ANTLR grammar directory and set up the c3 engine instead in a way that // still returns useful info. This limits us somewhat. - const auto *source = "class A {\n" - "public:\n" - " void test() {\n" - " }\n" - "};\n"; + const auto* source = + "class A {\n" + "public:\n" + " void test() {\n" + " }\n" + "};\n"; AntlrPipeline pipeline(source); pipeline.parser.translationunit(); EXPECT_EQ(pipeline.listener.GetErrorCount(), 0); @@ -68,12 +67,17 @@ TEST(CPP14Parser, SimpleExample) { CPP14Lexer::Friend, CPP14Lexer::Typedef, CPP14Lexer::Constexpr, CPP14Lexer::Alignas, CPP14Lexer::Asm, CPP14Lexer::Namespace, CPP14Lexer::Using, CPP14Lexer::Static_assert, CPP14Lexer::Template, - CPP14Lexer::EOF)); + CPP14Lexer::EOF + ) + ); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(CPP14Parser::RuleClassname, - CPP14Parser::RuleNamespacename, - CPP14Parser::RuleIdexpression)); + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre( + CPP14Parser::RuleClassname, CPP14Parser::RuleNamespacename, + CPP14Parser::RuleIdexpression + ) + ); EXPECT_THAT( candidates.rules[CPP14Parser::RuleNamespacename].ruleList, @@ -81,8 +85,9 @@ TEST(CPP14Parser, SimpleExample) { CPP14Parser::RuleTranslationunit, CPP14Parser::RuleDeclarationseq, CPP14Parser::RuleDeclaration, CPP14Parser::RuleFunctiondefinition, CPP14Parser::RuleDeclarator, CPP14Parser::RulePtrdeclarator, - CPP14Parser::RulePtroperator, - CPP14Parser::RuleNestednamespecifier)); + CPP14Parser::RulePtroperator, CPP14Parser::RuleNestednamespecifier + ) + ); EXPECT_THAT( candidates.rules[CPP14Parser::RuleClassname].ruleList, @@ -91,7 +96,9 @@ TEST(CPP14Parser, SimpleExample) { CPP14Parser::RuleDeclaration, CPP14Parser::RuleFunctiondefinition, CPP14Parser::RuleDeclarator, CPP14Parser::RulePtrdeclarator, CPP14Parser::RulePtroperator, CPP14Parser::RuleNestednamespecifier, - CPP14Parser::RuleTypename)); + CPP14Parser::RuleTypename + ) + ); } for (auto translateRulesTopDown : {false, true}) { // 2) Within the method body. @@ -104,8 +111,8 @@ TEST(CPP14Parser, SimpleExample) { CPP14Parser::RuleTranslationunit, CPP14Parser::RuleDeclarationseq, CPP14Parser::RuleDeclaration, - CPP14Parser::RuleBlockdeclaration, // TS: +- `RuleFunctiondefinition` - CPP14Parser::RuleSimpledeclaration, // TS: -- + CPP14Parser::RuleBlockdeclaration, // TS: +- `RuleFunctiondefinition` + CPP14Parser::RuleSimpledeclaration, // TS: -- CPP14Parser::RuleDeclspecifierseq, CPP14Parser::RuleDeclspecifier, CPP14Parser::RuleTypespecifier, @@ -139,40 +146,49 @@ TEST(CPP14Parser, SimpleExample) { CPP14Parser::RulePrimaryexpression, }; - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(CPP14Parser::RuleClassname, - CPP14Parser::RuleNamespacename, - CPP14Parser::RuleIdexpression)); - - EXPECT_THAT(candidates.rules[CPP14Parser::RuleIdexpression].ruleList, - ElementsAreArray(idexpressionStack)); - - EXPECT_THAT(candidates.rules[CPP14Parser::RuleClassname].ruleList, - ElementsAreArray([&] { - auto stack = idexpressionStack; - stack.pop_back(); - for (auto rule : { - CPP14Parser::RuleSimpletypespecifier, - CPP14Parser::RuleNestednamespecifier, - CPP14Parser::RuleTypename, - }) { - stack.emplace_back(rule); - } - return stack; - }())); - - EXPECT_THAT(candidates.rules[CPP14Parser::RuleNamespacename].ruleList, - ElementsAreArray([&] { - auto stack = idexpressionStack; - stack.pop_back(); - for (auto rule : { - CPP14Parser::RuleSimpletypespecifier, - CPP14Parser::RuleNestednamespecifier, - }) { - stack.emplace_back(rule); - } - return stack; - }())); + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre( + CPP14Parser::RuleClassname, CPP14Parser::RuleNamespacename, + CPP14Parser::RuleIdexpression + ) + ); + + EXPECT_THAT( + candidates.rules[CPP14Parser::RuleIdexpression].ruleList, + ElementsAreArray(idexpressionStack) + ); + + EXPECT_THAT( + candidates.rules[CPP14Parser::RuleClassname].ruleList, + ElementsAreArray([&] { + auto stack = idexpressionStack; + stack.pop_back(); + for (auto rule : { + CPP14Parser::RuleSimpletypespecifier, + CPP14Parser::RuleNestednamespecifier, + CPP14Parser::RuleTypename, + }) { + stack.emplace_back(rule); + } + return stack; + }()) + ); + + EXPECT_THAT( + candidates.rules[CPP14Parser::RuleNamespacename].ruleList, + ElementsAreArray([&] { + auto stack = idexpressionStack; + stack.pop_back(); + for (auto rule : { + CPP14Parser::RuleSimpletypespecifier, + CPP14Parser::RuleNestednamespecifier, + }) { + stack.emplace_back(rule); + } + return stack; + }()) + ); } { // 2) Within the method body. @@ -183,18 +199,20 @@ TEST(CPP14Parser, SimpleExample) { EXPECT_EQ(candidates.tokens.size(), 82); - EXPECT_THAT(Keys(candidates.tokens), - IsSupersetOf({ - CPP14Lexer::If, - CPP14Lexer::This, - CPP14Lexer::New, - CPP14Lexer::Case, - CPP14Lexer::While, - CPP14Lexer::Throw, - // Fixing issue #12 causes this to be included that was - // previously not returned - CPP14Lexer::Decltype, - })); + EXPECT_THAT( + Keys(candidates.tokens), + IsSupersetOf({ + CPP14Lexer::If, + CPP14Lexer::This, + CPP14Lexer::New, + CPP14Lexer::Case, + CPP14Lexer::While, + CPP14Lexer::Throw, + // Fixing issue #12 causes this to be included that was + // previously not returned + CPP14Lexer::Decltype, + }) + ); EXPECT_FALSE(candidates.tokens.contains(CPP14Lexer::Override)); EXPECT_FALSE(candidates.tokens.contains(CPP14Lexer::Export)); @@ -204,12 +222,13 @@ TEST(CPP14Parser, SimpleExample) { } TEST(CPP14Parser, SimpleCppExampleWithErrorsInInput) { - const auto *source = "class A {\n" - "public:\n" - " void test() {\n" - " if ()" - " }\n" - "};\n"; + const auto* source = + "class A {\n" + "public:\n" + " void test() {\n" + " if ()" + " }\n" + "};\n"; AntlrPipeline pipeline(source); pipeline.parser.translationunit(); EXPECT_EQ(pipeline.listener.GetErrorCount(), 3); @@ -242,8 +261,9 @@ TEST(CPP14Parser, SimpleCppExampleWithErrorsInInput) { // At the opening parenthesis. auto candidates = completion.collectCandidates(11); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(CPP14Lexer::LeftParen)); + EXPECT_THAT( + Keys(candidates.tokens), UnorderedElementsAre(CPP14Lexer::LeftParen) + ); } { // At the closing parenthesis -> again everything in an expression allowed @@ -286,8 +306,9 @@ TEST(CPP14Parser, RealCppFile) { const auto source = [] { // Assume we are at antlr4-c3/ports/cpp/build std::ifstream file("../../../tests/Parser.cpp"); - std::string content((std::istreambuf_iterator(file)), - std::istreambuf_iterator()); + std::string content( + (std::istreambuf_iterator(file)), std::istreambuf_iterator() + ); return content; }(); @@ -331,31 +352,40 @@ TEST(CPP14Parser, RealCppFile) { CPP14Parser::RuleDeclaratorid, }; - std::vector classnameStack = - Concat(idexpressionStack, { - CPP14Parser::RuleIdexpression, - CPP14Parser::RuleQualifiedid, - CPP14Parser::RuleNestednamespecifier, - CPP14Parser::RuleTypename, - }); - - std::vector namespacenameStack = - Concat(idexpressionStack, { - CPP14Parser::RuleIdexpression, - CPP14Parser::RuleQualifiedid, - CPP14Parser::RuleNestednamespecifier, - }); + std::vector classnameStack = Concat( + idexpressionStack, + { + CPP14Parser::RuleIdexpression, + CPP14Parser::RuleQualifiedid, + CPP14Parser::RuleNestednamespecifier, + CPP14Parser::RuleTypename, + } + ); + + std::vector namespacenameStack = Concat( + idexpressionStack, + { + CPP14Parser::RuleIdexpression, + CPP14Parser::RuleQualifiedid, + CPP14Parser::RuleNestednamespecifier, + } + ); { auto candidates = completion.collectCandidates(3469); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(CPP14Parser::RuleClassname, - CPP14Parser::RuleNamespacename, - CPP14Parser::RuleIdexpression)); + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre( + CPP14Parser::RuleClassname, CPP14Parser::RuleNamespacename, + CPP14Parser::RuleIdexpression + ) + ); - EXPECT_THAT(candidates.rules[CPP14Parser::RuleIdexpression].ruleList, - ElementsAreArray(idexpressionStack)); + EXPECT_THAT( + candidates.rules[CPP14Parser::RuleIdexpression].ruleList, + ElementsAreArray(idexpressionStack) + ); } { // We should receive more specific rules when translating top down. @@ -363,18 +393,22 @@ TEST(CPP14Parser, RealCppFile) { completion.translateRulesTopDown = true; auto candidates = completion.collectCandidates(3469); - EXPECT_THAT(candidates.rules[CPP14Parser::RuleClassname].ruleList, - ElementsAreArray(classnameStack)); + EXPECT_THAT( + candidates.rules[CPP14Parser::RuleClassname].ruleList, + ElementsAreArray(classnameStack) + ); - EXPECT_THAT(candidates.rules[CPP14Parser::RuleNamespacename].ruleList, - ElementsAreArray(namespacenameStack)); + EXPECT_THAT( + candidates.rules[CPP14Parser::RuleNamespacename].ruleList, + ElementsAreArray(namespacenameStack) + ); // We are starting a primary expression in a function body, so everything // related to expressions and control flow is allowed here. We only check // for a few possible keywords. - EXPECT_EQ(candidates.tokens.size(), 40); // TS: 82 + EXPECT_EQ(candidates.tokens.size(), 40); // TS: 82 - { // TS: at each statement in this block must be EXPECT_TRUE + { // TS: at each statement in this block must be EXPECT_TRUE EXPECT_FALSE(candidates.tokens.contains(CPP14Lexer::If)); EXPECT_FALSE(candidates.tokens.contains(CPP14Lexer::This)); EXPECT_FALSE(candidates.tokens.contains(CPP14Lexer::New)); @@ -394,4 +428,4 @@ TEST(CPP14Parser, RealCppFile) { } } -} // namespace c3::test +} // namespace c3::test diff --git a/ports/cpp/test/expr/ExprTest.cpp b/ports/cpp/test/expr/ExprTest.cpp index 01edd87..b3f1a65 100644 --- a/ports/cpp/test/expr/ExprTest.cpp +++ b/ports/cpp/test/expr/ExprTest.cpp @@ -1,8 +1,7 @@ -#include - #include #include +#include #include namespace c3::test { @@ -24,11 +23,16 @@ TEST(SimpleExpressionParser, MostSimpleSetup) { auto candidates = completion.collectCandidates(0); EXPECT_THAT( Keys(candidates.tokens), - UnorderedElementsAre(ExprLexer::VAR, ExprLexer::LET, ExprLexer::ID)); - EXPECT_THAT(candidates.tokens[ExprLexer::VAR], - ElementsAre(ExprLexer::ID, ExprLexer::EQUAL)); - EXPECT_THAT(candidates.tokens[ExprLexer::LET], - ElementsAre(ExprLexer::ID, ExprLexer::EQUAL)); + UnorderedElementsAre(ExprLexer::VAR, ExprLexer::LET, ExprLexer::ID) + ); + EXPECT_THAT( + candidates.tokens[ExprLexer::VAR], + ElementsAre(ExprLexer::ID, ExprLexer::EQUAL) + ); + EXPECT_THAT( + candidates.tokens[ExprLexer::LET], + ElementsAre(ExprLexer::ID, ExprLexer::EQUAL) + ); EXPECT_THAT(candidates.tokens[ExprLexer::ID], ElementsAre()); } { @@ -46,8 +50,9 @@ TEST(SimpleExpressionParser, MostSimpleSetup) { { // 4) On the equal sign (ignoring whitespace positions from now on). auto candidates = completion.collectCandidates(4); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(ExprLexer::EQUAL)); + EXPECT_THAT( + Keys(candidates.tokens), UnorderedElementsAre(ExprLexer::EQUAL) + ); } { // 5) On the variable reference 'a'. But since we have not configure the c3 @@ -60,10 +65,13 @@ TEST(SimpleExpressionParser, MostSimpleSetup) { // 6) On the '+' operator. Usually you would not show operators as // candidates, but we have not set up the c3 engine yet to not return them. auto candidates = completion.collectCandidates(8); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(ExprLexer::PLUS, ExprLexer::MINUS, - ExprLexer::MULTIPLY, ExprLexer::DIVIDE, - ExprLexer::OPEN_PAR)); + EXPECT_THAT( + Keys(candidates.tokens), + UnorderedElementsAre( + ExprLexer::PLUS, ExprLexer::MINUS, ExprLexer::MULTIPLY, + ExprLexer::DIVIDE, ExprLexer::OPEN_PAR + ) + ); } } @@ -85,8 +93,10 @@ TEST(SimpleExpressionParser, TypicalSetup) { { // 1) At the input start. auto candidates = completion.collectCandidates(0); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(ExprLexer::VAR, ExprLexer::LET)); + EXPECT_THAT( + Keys(candidates.tokens), + UnorderedElementsAre(ExprLexer::VAR, ExprLexer::LET) + ); // NOTE: Behaviour differs from TypeScript version EXPECT_THAT(candidates.tokens[ExprLexer::VAR], UnorderedElementsAre()); @@ -109,9 +119,12 @@ TEST(SimpleExpressionParser, TypicalSetup) { // Here we get 2 rule indexes, derived from 2 different IDs possible at this // caret position. These are what we told the engine above to be preferred // rules for us. - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleFunctionRef, - ExprParser::RuleVariableRef)); + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre( + ExprParser::RuleFunctionRef, ExprParser::RuleVariableRef + ) + ); EXPECT_EQ(candidates.rules[ExprParser::RuleFunctionRef].startTokenIndex, 6); EXPECT_EQ(candidates.rules[ExprParser::RuleVariableRef].startTokenIndex, 6); } @@ -120,8 +133,10 @@ TEST(SimpleExpressionParser, TypicalSetup) { // still be a function reference!). auto candidates = completion.collectCandidates(7); EXPECT_EQ(candidates.tokens.size(), 0); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleFunctionRef)); + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre(ExprParser::RuleFunctionRef) + ); EXPECT_EQ(candidates.rules[ExprParser::RuleFunctionRef].startTokenIndex, 6); } } @@ -137,35 +152,44 @@ TEST(SimpleExpressionParser, RecursivePreferredRule) { { // 1) On the variable reference 'a'. auto candidates = completion.collectCandidates(6); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleSimpleExpression)); + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre(ExprParser::RuleSimpleExpression) + ); // The start token of the simpleExpression rule begins at token 'a'. EXPECT_EQ( - candidates.rules[ExprParser::RuleSimpleExpression].startTokenIndex, 6); + candidates.rules[ExprParser::RuleSimpleExpression].startTokenIndex, 6 + ); } { // 2) On the variable reference 'b'. completion.translateRulesTopDown = false; auto candidates = completion.collectCandidates(10); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleSimpleExpression)); + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre(ExprParser::RuleSimpleExpression) + ); // When translateRulesTopDown is false, startTokenIndex should match the // start token for the lower index (less specific) rule in the expression, // which is 'a'. EXPECT_EQ( - candidates.rules[ExprParser::RuleSimpleExpression].startTokenIndex, 6); + candidates.rules[ExprParser::RuleSimpleExpression].startTokenIndex, 6 + ); } { // 3) On the variable reference 'b' topDown preferred rules. completion.translateRulesTopDown = true; auto candidates = completion.collectCandidates(10); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleSimpleExpression)); + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre(ExprParser::RuleSimpleExpression) + ); // When translateRulesTopDown is true, startTokenIndex should match the // start token for the higher index (more specific) rule in the expression, // which is 'b'. EXPECT_EQ( - candidates.rules[ExprParser::RuleSimpleExpression].startTokenIndex, 10); + candidates.rules[ExprParser::RuleSimpleExpression].startTokenIndex, 10 + ); } } @@ -184,9 +208,12 @@ TEST(SimpleExpressionParser, CandidateRulesWithDifferentStartTokens) { { // 1) On the token 'var'. auto candidates = completion.collectCandidates(0); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleAssignment, - ExprParser::RuleVariableRef)); + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre( + ExprParser::RuleAssignment, ExprParser::RuleVariableRef + ) + ); // The start token of the assignment and variableRef rules begin at token // 'var'. EXPECT_EQ(candidates.rules[ExprParser::RuleAssignment].startTokenIndex, 0); @@ -195,9 +222,12 @@ TEST(SimpleExpressionParser, CandidateRulesWithDifferentStartTokens) { { // 2) On the variable reference 'a'. auto candidates = completion.collectCandidates(6); - EXPECT_THAT(Keys(candidates.rules), - UnorderedElementsAre(ExprParser::RuleAssignment, - ExprParser::RuleVariableRef)); + EXPECT_THAT( + Keys(candidates.rules), + UnorderedElementsAre( + ExprParser::RuleAssignment, ExprParser::RuleVariableRef + ) + ); // The start token of the assignment rule begins at token 'var'. EXPECT_EQ(candidates.rules[ExprParser::RuleAssignment].startTokenIndex, 0); // The start token of the variableRef rule begins at token 'a'. @@ -205,4 +235,4 @@ TEST(SimpleExpressionParser, CandidateRulesWithDifferentStartTokens) { } } -} // namespace c3::test +} // namespace c3::test diff --git a/ports/cpp/test/utility/AntlrPipeline.hpp b/ports/cpp/test/utility/AntlrPipeline.hpp index 2e423d5..406317e 100644 --- a/ports/cpp/test/utility/AntlrPipeline.hpp +++ b/ports/cpp/test/utility/AntlrPipeline.hpp @@ -9,20 +9,24 @@ namespace c3::test { class CountingErrorListener final : public antlr4::BaseErrorListener { public: - void syntaxError(antlr4::Recognizer *recognizer, - antlr4::Token *offendingSymbol, std::size_t line, - std::size_t charPositionInLine, const std::string &msg, - std::exception_ptr e) override { + void syntaxError( + antlr4::Recognizer* recognizer, antlr4::Token* offendingSymbol, + std::size_t line, std::size_t charPositionInLine, const std::string& msg, + std::exception_ptr e + ) override { errorCount += 1; } - std::size_t GetErrorCount() const { return errorCount; } + std::size_t GetErrorCount() const { + return errorCount; + } private: std::size_t errorCount = 0; }; -template struct AntlrPipeline { +template +struct AntlrPipeline { AntlrPipeline(std::string_view text) : chars(text), lexer(&chars), tokens(&lexer), parser(&tokens) { parser.removeErrorListeners(); @@ -36,4 +40,4 @@ template struct AntlrPipeline { CountingErrorListener listener; }; -} // namespace c3::test +} // namespace c3::test diff --git a/ports/cpp/test/utility/Collections.hpp b/ports/cpp/test/utility/Collections.hpp index 64cfdbd..df88cd3 100644 --- a/ports/cpp/test/utility/Collections.hpp +++ b/ports/cpp/test/utility/Collections.hpp @@ -5,20 +5,21 @@ namespace c3::test { -template std::vector Keys(const std::map &map) { +template +std::vector Keys(const std::map& map) { std::vector keys; - for (const auto &[key, value] : map) { + for (const auto& [key, value] : map) { keys.emplace_back(key); } return keys; } template -std::vector Concat(std::vector lhs, const std::vector &rhs) { - for (const auto &element : rhs) { +std::vector Concat(std::vector lhs, const std::vector& rhs) { + for (const auto& element : rhs) { lhs.emplace_back(element); } return lhs; } -} // namespace c3::test +} // namespace c3::test diff --git a/ports/cpp/test/utility/Testing.hpp b/ports/cpp/test/utility/Testing.hpp index 81b1b20..bccd226 100644 --- a/ports/cpp/test/utility/Testing.hpp +++ b/ports/cpp/test/utility/Testing.hpp @@ -14,4 +14,4 @@ using testing::IsSupersetOf; using testing::UnorderedElementsAre; using testing::UnorderedElementsAreArray; -} // namespace c3::test +} // namespace c3::test diff --git a/ports/cpp/test/whitebox/WhiteboxTest.cpp b/ports/cpp/test/whitebox/WhiteboxTest.cpp index e43f6a8..bf328a8 100644 --- a/ports/cpp/test/whitebox/WhiteboxTest.cpp +++ b/ports/cpp/test/whitebox/WhiteboxTest.cpp @@ -1,8 +1,7 @@ -#include - #include #include +#include #include namespace c3::test { @@ -15,75 +14,87 @@ struct WhiteboxGrammar { /// (optional tokens) TEST(WhiteboxGrammarTests, CaretAtTransitionToRuleWithNonExhaustiveFollowSet) { AntlrPipeline pipeline("LOREM "); - auto *ctx = pipeline.parser.test1(); + auto* ctx = pipeline.parser.test1(); EXPECT_EQ(pipeline.listener.GetErrorCount(), 1); c3::CodeCompletionCore completion(&pipeline.parser); auto candidates = completion.collectCandidates(1, ctx); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(WhiteboxLexer::IPSUM, WhiteboxLexer::DOLOR, - WhiteboxLexer::SIT, WhiteboxLexer::AMET, - WhiteboxLexer::CONSECTETUR)); + EXPECT_THAT( + Keys(candidates.tokens), + UnorderedElementsAre( + WhiteboxLexer::IPSUM, WhiteboxLexer::DOLOR, WhiteboxLexer::SIT, + WhiteboxLexer::AMET, WhiteboxLexer::CONSECTETUR + ) + ); } /// (epsilon-only transition to rule end) TEST(WhiteboxGrammarTests, CaretAtTransitionToRuleWithEmptyFollowSet) { AntlrPipeline pipeline("LOREM "); - auto *ctx = pipeline.parser.test2(); + auto* ctx = pipeline.parser.test2(); EXPECT_EQ(pipeline.listener.GetErrorCount(), 1); c3::CodeCompletionCore completion(&pipeline.parser); auto candidates = completion.collectCandidates(1, ctx); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(WhiteboxLexer::IPSUM, WhiteboxLexer::DOLOR, - WhiteboxLexer::SIT, WhiteboxLexer::AMET, - WhiteboxLexer::CONSECTETUR)); + EXPECT_THAT( + Keys(candidates.tokens), + UnorderedElementsAre( + WhiteboxLexer::IPSUM, WhiteboxLexer::DOLOR, WhiteboxLexer::SIT, + WhiteboxLexer::AMET, WhiteboxLexer::CONSECTETUR + ) + ); } TEST(WhiteboxGrammarTests, CaretAtOneOfMultiplePossibleStates) { for (const auto index : {4, 5, 6, 7}) { AntlrPipeline pipeline("LOREM IPSUM "); - auto *ctx = [&]() -> antlr4::ParserRuleContext * { + auto* ctx = [&]() -> antlr4::ParserRuleContext* { switch (index) { - case 4: - return pipeline.parser.test4(); - case 5: - return pipeline.parser.test5(); - case 6: - return pipeline.parser.test6(); - case 7: - return pipeline.parser.test7(); - default: - std::abort(); + case 4: + return pipeline.parser.test4(); + case 5: + return pipeline.parser.test5(); + case 6: + return pipeline.parser.test6(); + case 7: + return pipeline.parser.test7(); + default: + std::abort(); } }(); c3::CodeCompletionCore completion(&pipeline.parser); auto candidates = completion.collectCandidates(2, ctx); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(WhiteboxLexer::DOLOR)); - EXPECT_THAT(candidates.tokens[WhiteboxLexer::DOLOR], - UnorderedElementsAre()); + EXPECT_THAT( + Keys(candidates.tokens), UnorderedElementsAre(WhiteboxLexer::DOLOR) + ); + EXPECT_THAT( + candidates.tokens[WhiteboxLexer::DOLOR], UnorderedElementsAre() + ); } } -TEST(WhiteboxGrammarTests, - CaretAtOneOfMultiplePossibleStatesWithCommonFollowList) { +TEST( + WhiteboxGrammarTests, CaretAtOneOfMultiplePossibleStatesWithCommonFollowList +) { AntlrPipeline pipeline("LOREM IPSUM "); - auto *ctx = pipeline.parser.test8(); + auto* ctx = pipeline.parser.test8(); c3::CodeCompletionCore completion(&pipeline.parser); auto candidates = completion.collectCandidates(2, ctx); - EXPECT_THAT(Keys(candidates.tokens), - UnorderedElementsAre(WhiteboxLexer::DOLOR)); - EXPECT_THAT(candidates.tokens[WhiteboxLexer::DOLOR], - UnorderedElementsAre(WhiteboxLexer::SIT)); + EXPECT_THAT( + Keys(candidates.tokens), UnorderedElementsAre(WhiteboxLexer::DOLOR) + ); + EXPECT_THAT( + candidates.tokens[WhiteboxLexer::DOLOR], + UnorderedElementsAre(WhiteboxLexer::SIT) + ); } -} // namespace c3::test +} // namespace c3::test