Skip to content

Commit

Permalink
VM: Fix interpretation of literals with common prefixes
Browse files Browse the repository at this point in the history
This fixes fully-qualified emojis in RGI eating up more characters than
needed.

Closes #8.

EOW did not reset the word length to 0.
  • Loading branch information
alimpfard committed Aug 18, 2020
1 parent 57dfe03 commit 04425cf
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 81 deletions.
7 changes: 7 additions & 0 deletions src/vm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1514,6 +1514,9 @@ class Builder {
builder.CreateStore(llvm::ConstantInt::getTrue(
llvm::Type::getInt1Ty(module.TheContext)),
module.anything_matched);
builder.CreateStore(llvm::ConstantInt::getTrue(
llvm::Type::getInt1Ty(module.TheContext)),
module.anything_matched_after_backtrack);
// increment nlex_fed
builder.CreateCall(
module.nlex_restore,
Expand Down Expand Up @@ -1544,6 +1547,10 @@ class Builder {
swinst->addCase(llvm::ConstantInt::get(
llvm::Type::getInt8Ty(module.TheContext), c),
cbb);
// This node will increment the fed ptr, so there's no need to continue
// tracking from the beginning of the word.
if (node->has_transition(0))
offsetidx = 0;
queue.push({ node, cbb, offsetidx + 1 });
}
// swap empty switch with jump
Expand Down
174 changes: 93 additions & 81 deletions src/wordtree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,101 +7,113 @@
#include <utility>

namespace WordTreeActions {
struct store_value_tag {};
struct store_value_tag {
};
}; // namespace WordTreeActions

template <typename StringT, typename MetadataT = int,
typename CollectionT = std::set<StringT>,
typename PairCollectionT = std::set<std::pair<StringT, MetadataT>>,
typename _CharT = typename StringT::value_type>
template<typename StringT, typename MetadataT = int,
typename CollectionT = std::set<StringT>,
typename PairCollectionT = std::set<std::pair<StringT, MetadataT>>,
typename _CharT = typename StringT::value_type>

class WordTree {
using CharT = _CharT;
using value_type = CharT;
using CharT = _CharT;
using value_type = CharT;

constexpr static CharT
EOW{}; // value-constructed Char type should mark end of word
constexpr static CharT
EOW {}; // value-constructed Char type should mark end of word

struct WordTreeNode {
public:
std::map<CharT, std::shared_ptr<struct WordTreeNode>> elements;
MetadataT metadata = {};
struct WordTreeNode {
public:
std::map<CharT, std::shared_ptr<struct WordTreeNode>> elements;
MetadataT metadata = {};

WordTreeNode() : elements({}) {}
WordTreeNode()
: elements({})
{
}

auto begin() const { return elements.cbegin(); }
auto end() const { return elements.cend(); }
};
auto begin() const { return elements.cbegin(); }
auto end() const { return elements.cend(); }
auto has_transition(CharT t) const { return elements.count(t) > 0; }
};

public:
std::shared_ptr<WordTreeNode> root_node;
WordTree(CollectionT words) {
root_node = std::make_shared<WordTreeNode>();
for (auto word : words)
insert(word);
}
WordTree(CollectionT words, WordTreeActions::store_value_tag tag) {
root_node = std::make_shared<WordTreeNode>();
for (auto word : words)
insert(word, word);
}
WordTree(const PairCollectionT &words) {
root_node = std::make_shared<WordTreeNode>();
for (auto [word, tag] : words)
insert(word, tag);
}
WordTree() { root_node = std::make_shared<WordTreeNode>(); }
std::shared_ptr<WordTreeNode> root_node;
WordTree(CollectionT words)
{
root_node = std::make_shared<WordTreeNode>();
for (auto word : words)
insert(word);
}
WordTree(CollectionT words, WordTreeActions::store_value_tag tag)
{
root_node = std::make_shared<WordTreeNode>();
for (auto word : words)
insert(word, word);
}
WordTree(const PairCollectionT& words)
{
root_node = std::make_shared<WordTreeNode>();
for (auto [word, tag] : words)
insert(word, tag);
}
WordTree() { root_node = std::make_shared<WordTreeNode>(); }

~WordTree() = default;
~WordTree() = default;

void insert(StringT value) {
auto _root = root_node;
for (auto c : value) {
if (_root->elements.count(c)) {
_root = _root->elements[c];
} else {
_root->elements[c] = std::make_shared<WordTreeNode>();
_root = _root->elements[c];
}
void insert(StringT value)
{
auto _root = root_node;
for (auto c : value) {
if (_root->elements.count(c)) {
_root = _root->elements[c];
} else {
_root->elements[c] = std::make_shared<WordTreeNode>();
_root = _root->elements[c];
}
}
if (!_root->elements.count(EOW))
_root->elements[EOW] = std::make_shared<struct WordTreeNode>();
}
if (!_root->elements.count(EOW))
_root->elements[EOW] = std::make_shared<struct WordTreeNode>();
}
void insert(StringT value, MetadataT val) {
auto _root = root_node;
for (auto c : value) {
if (_root->elements.count(c)) {
_root = _root->elements[c];
} else {
_root->elements[c] = std::make_shared<WordTreeNode>();
_root = _root->elements[c];
}
void insert(StringT value, MetadataT val)
{
auto _root = root_node;
for (auto c : value) {
if (_root->elements.count(c)) {
_root = _root->elements[c];
} else {
_root->elements[c] = std::make_shared<WordTreeNode>();
_root = _root->elements[c];
}
}
std::shared_ptr<struct WordTreeNode> end;
if (!_root->elements.count(EOW))
_root->elements[EOW] = std::make_shared<struct WordTreeNode>();
end = _root->elements[EOW];
end->metadata = val;
}
std::shared_ptr<struct WordTreeNode> end;
if (!_root->elements.count(EOW))
_root->elements[EOW] = std::make_shared<struct WordTreeNode>();
end = _root->elements[EOW];
end->metadata = val;
}
bool contains(StringT value) const {
auto _root = root_node;
for (auto c : value) {
if (!_root->elements.count(c))
return false;
bool contains(StringT value) const
{
auto _root = root_node;
for (auto c : value) {
if (!_root->elements.count(c))
return false;
}
if (!_root->elements.count(EOW))
return false;
return true;
}
if (!_root->elements.count(EOW))
return false;
return true;
}
bool get(StringT value, MetadataT *val) const {
auto _root = root_node;
for (auto c : value) {
if (!_root->elements.count(c))
return false;
bool get(StringT value, MetadataT* val) const
{
auto _root = root_node;
for (auto c : value) {
if (!_root->elements.count(c))
return false;
}
if (!_root->elements.count(EOW))
return false;
*val = _root->elements[EOW]->metadata;
return true;
}
if (!_root->elements.count(EOW))
return false;
*val = _root->elements[EOW]->metadata;
return true;
}
};

0 comments on commit 04425cf

Please sign in to comment.