Skip to content

Commit

Permalink
remove useless code
Browse files Browse the repository at this point in the history
  • Loading branch information
ZHEQIUSHUI committed Apr 26, 2024
1 parent 6716f9f commit d1b4aba
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 957 deletions.
3 changes: 1 addition & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,7 @@ function(build_exec name main_source)
src/runner/utils/memory_utils.cpp
src/runner/utils/cqdm.cpp
src/runner/Tokenizer/Tokenizer.cpp
src/runner/Tokenizer/QwenTokenizer.cpp
src/runner/Tokenizer/chatglm.cpp)
src/runner/Tokenizer/QwenTokenizer.cpp)

target_link_libraries(${name} ax_engine ax_interpreter ax_sys ax_ivps)
target_link_libraries(${name} sentencepiece re2::re2)
Expand Down
128 changes: 64 additions & 64 deletions src/runner/Tokenizer/Tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

#include "QwenTokenizer.hpp"

#include "chatglm.h"
// #include "chatglm.h"

#include "httplib.h"
#include "json.hpp"
Expand Down Expand Up @@ -231,69 +231,69 @@ class TokenizerQwen : public BaseTokenizer
}
};

class TokenizerGLM3 : public BaseTokenizer
{
std::shared_ptr<chatglm::ChatGLM3Tokenizer> sp;
bool _b_bos, _b_eos;

private:
/* data */
public:
bool Init(std::string model_path, bool b_bos = true, bool b_eos = false) override
{
if (!file_exist(model_path))
{
ALOGE("tokenizer model file(%s) not exist", model_path.c_str());
return false;
}
// std::vector<char> sp_model_data;
// read_file(model_path, sp_model_data);
// std::string_view serialized_model_proto(sp_model_data.data(), sp_model_data.size());

sp.reset(new chatglm::ChatGLM3Tokenizer(model_path));

this->_b_bos = b_bos;
this->_b_eos = b_eos;
return true;
}

bool Encode(std::string input, std::vector<int> &output) override
{
if (_b_bos)
{
// input += "<|im_start|>";
}
if (_b_eos)
{
// input += "<|endoftext|>";
}
output = sp->encode(input, 1024);

return true;
}

std::vector<int> Encode(std::string input) override
{
std::vector<int> output;
Encode(input, output);
return output;
}

std::string Decode(const std::vector<int> input) override
{
return sp->decode(input);
}

int GetBosID() override
{
return sp->sp.bos_id();
}

int GetEosID() override
{
return sp->sp.eos_id();
}
};
// class TokenizerGLM3 : public BaseTokenizer
// {
// std::shared_ptr<chatglm::ChatGLM3Tokenizer> sp;
// bool _b_bos, _b_eos;

// private:
// /* data */
// public:
// bool Init(std::string model_path, bool b_bos = true, bool b_eos = false) override
// {
// if (!file_exist(model_path))
// {
// ALOGE("tokenizer model file(%s) not exist", model_path.c_str());
// return false;
// }
// // std::vector<char> sp_model_data;
// // read_file(model_path, sp_model_data);
// // std::string_view serialized_model_proto(sp_model_data.data(), sp_model_data.size());

// sp.reset(new chatglm::ChatGLM3Tokenizer(model_path));

// this->_b_bos = b_bos;
// this->_b_eos = b_eos;
// return true;
// }

// bool Encode(std::string input, std::vector<int> &output) override
// {
// if (_b_bos)
// {
// // input += "<|im_start|>";
// }
// if (_b_eos)
// {
// // input += "<|endoftext|>";
// }
// output = sp->encode(input, 1024);

// return true;
// }

// std::vector<int> Encode(std::string input) override
// {
// std::vector<int> output;
// Encode(input, output);
// return output;
// }

// std::string Decode(const std::vector<int> input) override
// {
// return sp->decode(input);
// }

// int GetBosID() override
// {
// return sp->sp.bos_id();
// }

// int GetEosID() override
// {
// return sp->sp.eos_id();
// }
// };

class Tokenizer_Http : public BaseTokenizer
{
Expand Down
Loading

0 comments on commit d1b4aba

Please sign in to comment.