From 8f31c3b81f59346bcc50d600e18a9a0f820d2f95 Mon Sep 17 00:00:00 2001 From: DronCode Date: Wed, 4 Dec 2024 21:29:00 +0300 Subject: [PATCH] [0.0.24] Protobuf initial support (CI, small tests, base analyzer, base env) --- .github/workflows/build.yml | 33 +++- CMakeLists.txt | 7 +- Protobuf/CMakeLists.txt | 43 ++++++ Protobuf/include/RG3/Protobuf/Compiler.h | 44 ++++++ Protobuf/include/RG3/Protobuf/Protobuf.h | 4 + .../include/RG3/Protobuf/ProtobufAnalyzer.h | 41 +++++ Protobuf/source/Protobuf.cpp | 1 + Protobuf/source/ProtobufAnalyzer.cpp | 143 ++++++++++++++++++ PyBind/CMakeLists.txt | 2 +- Tests/Unit/CMakeLists.txt | 3 +- Tests/Unit/source/Tests_Protobuf.cpp | 66 ++++++++ 11 files changed, 379 insertions(+), 8 deletions(-) create mode 100644 Protobuf/CMakeLists.txt create mode 100644 Protobuf/include/RG3/Protobuf/Compiler.h create mode 100644 Protobuf/include/RG3/Protobuf/Protobuf.h create mode 100644 Protobuf/include/RG3/Protobuf/ProtobufAnalyzer.h create mode 100644 Protobuf/source/Protobuf.cpp create mode 100644 Protobuf/source/ProtobufAnalyzer.cpp create mode 100644 Tests/Unit/source/Tests_Protobuf.cpp diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index fdae71f..05253d8 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -25,7 +25,8 @@ jobs: llvm_tag: "llvmorg-19.1.4", python_version: "3.10", os_version: "2022", - artifact_id: "RG3_Windows" + artifact_id: "RG3_Windows", + protobuf_tag: "v29.1" } - { name: "Ubuntu Linux", @@ -38,7 +39,8 @@ jobs: llvm_tag: "llvmorg-19.1.4", python_version: "3.10", os_version: "24.04", - artifact_id: "RG3_Linux" + artifact_id: "RG3_Linux", + protobuf_tag: "v29.1" } - { name: "macOS", @@ -51,7 +53,8 @@ jobs: llvm_tag: "llvmorg-19.1.4", python_version: "3.10", os_version: "13", - artifact_id: "RG3_macOS" + artifact_id: "RG3_macOS", + protobuf_tag: "v29.1" } steps: @@ -186,12 +189,36 @@ jobs: llvm_repo/llvm llvm_repo/clang + - name: Checkout Protobuf + uses: actions/checkout@v3 + with: + submodules: "recursive" + repository: "protocolbuffers/protobuf" + ref: ${{ matrix.config.protobuf_tag }} + path: "protobuf_repo" + + # Protobuf (will use cache later) + - name: Build Protobuf + working-directory: protobuf_repo + env: + CC: ${{ matrix.config.cc }} + CXX: ${{ matrix.config.cxx }} + run: | + mkdir build + mkdir build/install + cmake -DCMAKE_BUILD_TYPE=MinSizeRel -DBUILD_SHARED_LIBS=OFF -DABSL_PROPAGATE_CXX_STD=ON -DCMAKE_CXX_STANDARD=17 -Dprotobuf_BUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX=build/install -B build -S . -G "${{ matrix.config.cmake_generator }}" + cmake --build build --config MinSizeRel + cmake --install build + # Build our project - name: Build RG3 env: LLVM_DIR: ${{ github.workspace }}/llvm_repo/build/lib/cmake/llvm CLANG_DIR: ${{ github.workspace }}/llvm_repo/build/lib/cmake/clang Boost_ROOT: ${{ github.workspace }}/boost_1_81_0/stage/lib/cmake/Boost-1.81.0 + absl_DIR: ${{ github.workspace }}/protobuf_repo/build/install/lib/cmake/absl + utf8_range_DIR: ${{ github.workspace }}/protobuf_repo/build/install/lib/cmake/utf8_range_DIR + Protobuf_DIR: ${{ github.workspace }}/protobuf_repo/build/install/lib/cmake/protobuf Python3_USE_STATIC_LIBS: "TRUE" CC: ${{ matrix.config.cc }} CXX: ${{ matrix.config.cxx }} diff --git a/CMakeLists.txt b/CMakeLists.txt index f32a8f4..15a0d90 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,9 +10,10 @@ endif() add_subdirectory(ThirdParty/fmt) -add_subdirectory(Cpp) -add_subdirectory(LLVM) -add_subdirectory(PyBind) +add_subdirectory(Cpp) # Base entities & logic +add_subdirectory(LLVM) # LLVM driver & AST parsing +add_subdirectory(Protobuf) # Protobuf driver & analyzer +add_subdirectory(PyBind) # Python bindings # Unit tests add_subdirectory(ThirdParty/googletest) diff --git a/Protobuf/CMakeLists.txt b/Protobuf/CMakeLists.txt new file mode 100644 index 0000000..7de01b8 --- /dev/null +++ b/Protobuf/CMakeLists.txt @@ -0,0 +1,43 @@ +project(RG3_LLVM) + +# ------- Boost +set(Boost_USE_STATIC_LIBS ON) +find_package(Boost COMPONENTS filesystem REQUIRED HINTS $ENV{Boost_ROOT}) + +# ------- Protobuf +# Required to have Protobuf_DIR, absl_DIR and utf8_range_DIR + +find_package(absl REQUIRED CONFIG HINTS $ENV{absl_DIR}) +find_package(utf8_range REQUIRED CONFIG HINTS $ENV{utf8_range_DIR}) +find_package(Protobuf REQUIRED CONFIG HINTS $ENV{Protobuf_DIR}) + +message(STATUS "Found Protobuf ${Protobuf_VERSION} (${Protobuf_DIR})") + +# ------- RG3 Protobuf +file(GLOB_RECURSE RG3_PROTOBUF_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/source/*.cpp") +add_library(RG3_Protobuf STATIC ${RG3_PROTOBUF_SOURCES}) +add_library(RG3::Protobuf ALIAS RG3_Protobuf) +target_include_directories(RG3_Protobuf PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") +target_include_directories(RG3_Protobuf PUBLIC ${Protobuf_INCLUDE_DIRS}) +target_include_directories(RG3_Protobuf PUBLIC ${absl_INCLUDE_DIRS}) +target_include_directories(RG3_Protobuf PUBLIC ${utf8_range_INCLUDE_DIRS}) + +if (MSVC) + # /bigobj support + target_compile_options(RG3_LLVM PUBLIC /bigobj) +endif() + +target_link_libraries(RG3_Protobuf PUBLIC + # Protobuf + protobuf::libprotobuf + protobuf::libprotoc + protobuf::libupb + utf8_range::utf8_range + absl::base + absl::strings + # Boost + ${Boost_LIBRARIES} # RG3 + RG3::Cpp + # FMT + fmt::fmt +) \ No newline at end of file diff --git a/Protobuf/include/RG3/Protobuf/Compiler.h b/Protobuf/include/RG3/Protobuf/Compiler.h new file mode 100644 index 0000000..139dfa1 --- /dev/null +++ b/Protobuf/include/RG3/Protobuf/Compiler.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include + + +namespace rg3::pb +{ + /** + * @brief In most cases used syntax from .proto file, but you able to specify it directly here (in C++/Python code) + */ + enum class ProtobufSyntax { PB_SYNTAX_2 = 2, PB_SYNTAX_3 = 3 }; + + /** + * @brief There are 2 issues in protoc: errors and warnings + */ + enum class IssueKind { IK_WARNING, IK_ERROR }; + + /** + * @brief Most useful parameters for protoc (in-memory version). Unlike LLVM CompilerConfig, that options useful only for protoc + */ + struct CompilerConfig { + ProtobufSyntax eSyntax { ProtobufSyntax::PB_SYNTAX_3 }; + std::vector vIncludeDirs {}; + bool bUseStrictMode { true }; + bool bEnableGRPC { false }; + bool bGenerateClientStubs { false }; + bool bUseLiteGenerator { false }; + }; + + /** + * @brief Describe issue in protoc + */ + struct CompilerIssue + { + IssueKind eKind { IssueKind::IK_WARNING }; + int iLine { 0 }; + int iColumn { 0 }; + std::string sMessage {}; + }; + + using CompilerIssuesVector = std::vector; +} \ No newline at end of file diff --git a/Protobuf/include/RG3/Protobuf/Protobuf.h b/Protobuf/include/RG3/Protobuf/Protobuf.h new file mode 100644 index 0000000..260812f --- /dev/null +++ b/Protobuf/include/RG3/Protobuf/Protobuf.h @@ -0,0 +1,4 @@ +#pragma once + +namespace rg3::pb +{} \ No newline at end of file diff --git a/Protobuf/include/RG3/Protobuf/ProtobufAnalyzer.h b/Protobuf/include/RG3/Protobuf/ProtobufAnalyzer.h new file mode 100644 index 0000000..d004772 --- /dev/null +++ b/Protobuf/include/RG3/Protobuf/ProtobufAnalyzer.h @@ -0,0 +1,41 @@ +#pragma once + +#include + +#include + +#include +#include +#include + + +namespace rg3::pb +{ + /** + * @brief This class implements a single threaded analyzer of protobuf code. + */ + class ProtobufAnalyzer final : public boost::noncopyable + { + public: + using CodeSource = std::variant; // string is a code repr in memory (id0.proto), filesystem::path for FS path + + ProtobufAnalyzer(); + + void setCode(const std::string& sCode); + void setFile(const std::filesystem::path& sPath); + void setSource(const CodeSource& src); + + void setCompilerConfig(const CompilerConfig& sConfig); + const CompilerConfig& getCompilerConfig() const; + CompilerConfig& getCompilerConfig(); + + [[nodiscard]] const CompilerIssuesVector& getIssues() const; + + bool analyze(); + + private: + CodeSource m_sSource {}; + CompilerConfig m_sConfig {}; + CompilerIssuesVector m_aIssues {}; + }; +} \ No newline at end of file diff --git a/Protobuf/source/Protobuf.cpp b/Protobuf/source/Protobuf.cpp new file mode 100644 index 0000000..8b73bce --- /dev/null +++ b/Protobuf/source/Protobuf.cpp @@ -0,0 +1 @@ +void stub() {} \ No newline at end of file diff --git a/Protobuf/source/ProtobufAnalyzer.cpp b/Protobuf/source/ProtobufAnalyzer.cpp new file mode 100644 index 0000000..9e02082 --- /dev/null +++ b/Protobuf/source/ProtobufAnalyzer.cpp @@ -0,0 +1,143 @@ +#include + +#include +#include + +#include +#include +#include +#include + + +namespace rg3::pb +{ + ProtobufAnalyzer::ProtobufAnalyzer() = default; + + void ProtobufAnalyzer::setCode(const std::string& sCode) + { + m_sSource = sCode; + } + + void ProtobufAnalyzer::setFile(const std::filesystem::path& sPath) + { + m_sSource = sPath; + } + + void ProtobufAnalyzer::setSource(const CodeSource& src) + { + m_sSource = src; + } + + void ProtobufAnalyzer::setCompilerConfig(const CompilerConfig& sConfig) + { + m_sConfig = sConfig; + } + + const CompilerConfig& ProtobufAnalyzer::getCompilerConfig() const + { + return m_sConfig; + } + + CompilerConfig& ProtobufAnalyzer::getCompilerConfig() + { + return m_sConfig; + } + + const CompilerIssuesVector& ProtobufAnalyzer::getIssues() const + { + return m_aIssues; + } + + struct InMemoryErrorCollector final : google::protobuf::io::ErrorCollector + { + std::vector* paIssues {}; + + explicit InMemoryErrorCollector(std::vector* pOut) : google::protobuf::io::ErrorCollector(), paIssues(pOut) {} + + void RecordError(int line, google::protobuf::io::ColumnNumber column, absl::string_view message) override + { + CompilerIssue& sIssue = paIssues->emplace_back(); + sIssue.eKind = IssueKind::IK_ERROR; + sIssue.iLine = line; + sIssue.iColumn = column; + sIssue.sMessage = message.data(); + } + + void RecordWarning(int line, google::protobuf::io::ColumnNumber column, absl::string_view message) override + { + CompilerIssue& sIssue = paIssues->emplace_back(); + sIssue.eKind = IssueKind::IK_WARNING; + sIssue.iLine = line; + sIssue.iColumn = column; + sIssue.sMessage = message.data(); + } + }; + + template constexpr bool always_false_v = false; + + std::pair, std::string> getStream(const ProtobufAnalyzer::CodeSource& source) { + return std::visit([](const auto& value) -> std::pair, std::string> { + using T = std::decay_t; + + if constexpr (std::is_same_v) { + return { std::make_unique(value), "id0.proto" }; + } else if constexpr (std::is_same_v) { + auto stream = std::make_unique(value); + if (!stream->is_open()) + { + return { nullptr, "" }; + } + + return { std::move(stream), value.string() }; + } else { + static_assert(always_false_v, "Unhandled variant type"); + } + }, source); + } + + bool ProtobufAnalyzer::analyze() + { + m_aIssues.clear(); + + // 1. Parse + auto [pStreamMem, sStreamId] = getStream(m_sSource); + if (!pStreamMem) + { + CompilerIssue& sIssue = m_aIssues.emplace_back(); + sIssue.sMessage = "Failed to handle I/O (unable to open file IO)"; + sIssue.iColumn = sIssue.iLine = 0; + return false; + } + + InMemoryErrorCollector sErrorCollector { &m_aIssues }; + google::protobuf::compiler::Parser sProtobufParser {}; + google::protobuf::io::IstreamInputStream sStream { pStreamMem.get() }; + google::protobuf::io::Tokenizer sTokenizer { &sStream, &sErrorCollector }; + google::protobuf::FileDescriptorProto sDescriptor {}; + + sProtobufParser.RecordErrorsTo(&sErrorCollector); + sDescriptor.set_name(sStreamId); + + if (!sProtobufParser.Parse(&sTokenizer, &sDescriptor)) + { + return false; + } + + // Semantic analysis + google::protobuf::DescriptorPool sDescriptorPool; + + google::protobuf::DescriptorPool sBuiltinPool(google::protobuf::DescriptorPool::generated_pool()); + const google::protobuf::FileDescriptor* pFileDescriptor = sBuiltinPool.BuildFile(sDescriptor); + + if (!pFileDescriptor) + { + CompilerIssue& sIssue = m_aIssues.emplace_back(); + sIssue.sMessage = "Semantic error: Failed to resolve types in the file."; + sIssue.iColumn = sIssue.iLine = 0; + return false; + } + + // Done + return std::count_if(m_aIssues.begin(), m_aIssues.end(), [](const CompilerIssue& sIssue) -> bool { return sIssue.eKind == IssueKind::IK_ERROR; }) == 0; + } +} \ No newline at end of file diff --git a/PyBind/CMakeLists.txt b/PyBind/CMakeLists.txt index 5129338..037aeae 100644 --- a/PyBind/CMakeLists.txt +++ b/PyBind/CMakeLists.txt @@ -13,7 +13,7 @@ file(GLOB_RECURSE RG3_PYBIND_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/source/*.cpp") add_library(RG3_PyBind SHARED ${RG3_PYBIND_SOURCES}) add_library(RG3::PyBind ALIAS RG3_PyBind) target_include_directories(RG3_PyBind PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include") -target_link_libraries(RG3_PyBind PUBLIC RG3::Cpp RG3::LLVM) +target_link_libraries(RG3_PyBind PUBLIC RG3::Cpp RG3::LLVM RG3::Protobuf) target_link_libraries(RG3_PyBind PUBLIC ${Boost_LIBRARIES} ${Python3_LIBRARIES} Python3::Module) target_link_libraries(RG3_PyBind PRIVATE fmt::fmt) diff --git a/Tests/Unit/CMakeLists.txt b/Tests/Unit/CMakeLists.txt index 5104d6b..7cd2b22 100644 --- a/Tests/Unit/CMakeLists.txt +++ b/Tests/Unit/CMakeLists.txt @@ -9,4 +9,5 @@ add_executable(RG3_Unit ${RG3_UNIT_SOURCES}) target_link_libraries(RG3_Unit gtest RG3::LLVM - RG3::Cpp) \ No newline at end of file + RG3::Cpp + RG3::Protobuf) \ No newline at end of file diff --git a/Tests/Unit/source/Tests_Protobuf.cpp b/Tests/Unit/source/Tests_Protobuf.cpp new file mode 100644 index 0000000..36f781f --- /dev/null +++ b/Tests/Unit/source/Tests_Protobuf.cpp @@ -0,0 +1,66 @@ +#include + +#include +#include + + +class Tests_Protobuf : public ::testing::Test +{ + protected: + void SetUp() override + { + g_Analyzer = std::make_unique(); + } + + void TearDown() override + { + g_Analyzer = nullptr; + } + + protected: + std::unique_ptr g_Analyzer { nullptr }; +}; + +TEST_F(Tests_Protobuf, CheckErrorHandler) +{ + g_Analyzer->setCode(R"( +syntax = "proto3"; + +message WhatTheFuck { + string name = 1; + int32 id = 1; + mdma_pzdc field = 40; +} +)"); + + g_Analyzer->getCompilerConfig().eSyntax = rg3::pb::ProtobufSyntax::PB_SYNTAX_3; + g_Analyzer->getCompilerConfig().vIncludeDirs = {}; + ASSERT_FALSE(g_Analyzer->analyze()); +} + +TEST_F(Tests_Protobuf, SimpleUsage) +{ + g_Analyzer->setCode(R"( +syntax = "proto3"; + +import "google/protobuf/descriptor.proto"; + +// Custom entries (should be declared somehow) +extend google.protobuf.MessageOptions { + bool runtime = 707228; +} + +message User { + option (runtime) = true; + + string id = 1; + string first_name = 2; + string last_name = 3; + string avatar = 4; +} +)"); + + g_Analyzer->getCompilerConfig().eSyntax = rg3::pb::ProtobufSyntax::PB_SYNTAX_3; + g_Analyzer->getCompilerConfig().vIncludeDirs = {}; + ASSERT_TRUE(g_Analyzer->analyze()); +} \ No newline at end of file