From 32b803609a61c2dae3d3bb4a6eebe04b92ec6b49 Mon Sep 17 00:00:00 2001 From: Francesco Valla Date: Wed, 18 Dec 2024 17:39:54 +0100 Subject: [PATCH] Move to libarchive Use libarchive for all the decompression operations, in order to reduce the complexity while supporting more input formats. Signed-off-by: Francesco Valla --- .github/workflows/cmake-single-platform.yml | 2 +- CMakeLists.txt | 29 +- README.md | 2 +- bmap-writer-test.sh | 47 +++- bmap-writer.cpp | 295 ++++---------------- 5 files changed, 113 insertions(+), 262 deletions(-) diff --git a/.github/workflows/cmake-single-platform.yml b/.github/workflows/cmake-single-platform.yml index 4b62276..2256b73 100644 --- a/.github/workflows/cmake-single-platform.yml +++ b/.github/workflows/cmake-single-platform.yml @@ -23,7 +23,7 @@ jobs: - uses: actions/checkout@v4 - name: Install dependencies - run: sudo apt-get update && sudo apt-get --no-install-recommends install -y bmap-tools libssl-dev libxml2-dev zlib1g-dev liblzma-dev cppcheck + run: sudo apt-get update && sudo apt-get --no-install-recommends install -y bmap-tools libssl-dev libxml2-dev libarchive-dev cppcheck - name: Cppcheck run: cppcheck --enable=all --suppress=missingIncludeSystem *.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a99efb..c623fb5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -23,29 +23,12 @@ else() message(FATAL_ERROR "OpenSSL not found") endif() -# Find zlib -find_package(ZLIB REQUIRED) -if (ZLIB_FOUND) - include_directories(${ZLIB_INCLUDE_DIRS}) +# Find libarchive +find_package(LibArchive REQUIRED) +if (LibArchive_FOUND) + include_directories(${LIBARCHIVE_INCLUDE_DIR}) else() - message(FATAL_ERROR "zlib not found") -endif() - -# Find liblzma -find_package(LibLZMA REQUIRED) -if (LIBLZMA_FOUND) - include_directories(${LIBLZMA_INCLUDE_DIRS}) -else() - message(FATAL_ERROR "liblzma not found") -endif() - -# Find libzstd -find_package(PkgConfig REQUIRED) -pkg_check_modules(ZSTD REQUIRED IMPORTED_TARGET libzstd) -if (ZSTD_FOUND) - include_directories(${ZSTD_INCLUDE_DIRS}) -else() - message(FATAL_ERROR "libzstd not found") + message(FATAL_ERROR "libarchive not found") endif() # Add the executable @@ -53,7 +36,7 @@ add_executable(bmap-writer bmap-writer.cpp) target_compile_options(bmap-writer PUBLIC -Wformat -Wformat-security -Wconversion -Wsign-conversion -pedantic -Werror) # Link the libraries -target_link_libraries(bmap-writer ${LIBXML2_LIBRARIES} ${OPENSSL_LIBRARIES} ${ZLIB_LIBRARIES} ${LIBLZMA_LIBRARIES} ${ZSTD_LIBRARIES}) +target_link_libraries(bmap-writer ${LIBXML2_LIBRARIES} ${OPENSSL_LIBRARIES} ${LibArchive_LIBRARIES}) # Specify the install rules install(TARGETS bmap-writer DESTINATION bin) diff --git a/README.md b/README.md index ba0f958..513ca97 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ Unlike the Yocto BMAP tool, `bmap-writer` is C++ based does not require Python a - **Alternative to Yocto BMAP Tool**: Provides a lightweight alternative specifically for embedded systems. - **No Python Required**: Does not require Python, making it easier to integrate into various environments. -- **Support for Compressed Images**: Handles gzip, xz anz zstd compressed images, decompressing them on-the-fly during the writing process. +- **Support for Compressed Images**: Handles all compression filters that are supported by `libarchive`, decompressing the data on-the-fly during the writing process. - **Checksum Verification**: Ensures data integrity by verifying checksums for each block. - **Efficient Writing**: Writes only the necessary blocks, reducing the overall write time and wear on storage devices. diff --git a/bmap-writer-test.sh b/bmap-writer-test.sh index 85b29b2..e0fab3d 100755 --- a/bmap-writer-test.sh +++ b/bmap-writer-test.sh @@ -10,11 +10,36 @@ if [ ! -f test.img ]; then dd if=/dev/urandom of=test.img bs=4k count=1 seek=131072 conv=notrunc > /dev/null 2>&1 fi +if [ ! -f test.img.tar ]; then + echo "## Enclose the file inside tar" + tar -cf test.img.tar test.img +fi + +if [ ! -f test.img.tar.gz ]; then + echo "## Enclose the file inside tar.gz" + tar -czf test.img.tar.gz test.img +fi + +if [ ! -f test.img.bz2 ]; then + echo "## Compress the file with bzip2" + bzip2 -f -k -c test.img > test.img.bz2 +fi + if [ ! -f test.img.gz ]; then echo "## Compress the file with gzip" gzip -9 test.img -c > test.img.gz fi +if [ ! -f test.img.lz4 ]; then + echo "## Compress the file with lz4" + lz4 -f -k -c test.img > test.img.lz4 +fi + +if [ ! -f test.img.lzo ]; then + echo "## Compress the file with lzo" + lzop -f -k -c test.img > test.img.lzo +fi + if [ ! -f test.img.xz ]; then echo "## Compress the file with xz" xz -z test.img -c > test.img.xz @@ -22,7 +47,7 @@ fi if [ ! -f test.img.zst ]; then echo "## Compress the file with zstd" - zstd -f -k -c -3 --threads=8 test.img > test.img.zst + zstd -f -k -c test.img > test.img.zst fi if [ ! -f test.img.bmap ] ; then @@ -37,10 +62,30 @@ echo "## Write the file with bmap-writer" ./bmap-writer test.img test.img.bmap test.none.img.out cmp test.img.out test.none.img.out +echo "## Write the file with bmap-writer and tar" +./bmap-writer test.img.tar test.img.bmap test.tar.img.out +cmp test.img.out test.tar.img.out + +echo "## Write the file with bmap-writer and tar+gzip" +./bmap-writer test.img.tar.gz test.img.bmap test.tar.gz.img.out +cmp test.img.out test.tar.gz.img.out + +echo "## Write the file with bmap-writer and bzip2" +./bmap-writer test.img.bz2 test.img.bmap test.bz2.img.out +cmp test.img.out test.bz2.img.out + echo "## Write the file with bmap-writer and gzip" ./bmap-writer test.img.gz test.img.bmap test.gz.img.out cmp test.img.out test.gz.img.out +echo "## Write the file with bmap-writer and lz4" +./bmap-writer test.img.lz4 test.img.bmap test.lz4.img.out +cmp test.img.out test.lz4.img.out + +echo "## Write the file with bmap-writer and lzo" +./bmap-writer test.img.lzo test.img.bmap test.lzo.img.out +cmp test.img.out test.lzo.img.out + echo "## Write the file with bmap-writer and xz" ./bmap-writer test.img.xz test.img.bmap test.xz.img.out cmp test.img.out test.xz.img.out diff --git a/bmap-writer.cpp b/bmap-writer.cpp index 7ce73b4..39f7c92 100644 --- a/bmap-writer.cpp +++ b/bmap-writer.cpp @@ -34,28 +34,11 @@ #include #include #include -#include -#include -#include +#include #define CHECKSUM_LENGTH 64 #define RANGE_LENGTH 19 -#define GZIP_MAGIC_0 0x1f -#define GZIP_MAGIC_1 0x8b -#define XZ_MAGIC_0 0xfd -#define XZ_MAGIC_1 '7' -#define XZ_MAGIC_2 'z' -#define XZ_MAGIC_3 'X' -#define XZ_MAGIC_4 'Z' -#define XZ_MAGIC_5 0x00 -#define ZSTD_MAGIC_0 0x28 -#define ZSTD_MAGIC_1 0xb5 -#define ZSTD_MAGIC_2 0x2f -#define ZSTD_MAGIC_3 0xfd - -#define DEC_BUFFER_SIZE (1024 * 16) - struct range_t { std::string checksum; std::string range; @@ -130,33 +113,6 @@ std::string computeSHA256(const std::vector& buffer, size_t size) { return output.str(); } -int getCompressionType(const std::string &imageFile, std::string &compressionType) { - - std::ifstream file(imageFile, std::ios::binary); - if (!file) { - std::cerr << "Failed to open image file" << std::endl; - return -1; - } - - unsigned char buffer[6]; - file.read(reinterpret_cast(buffer), 6); - file.close(); - - if (buffer[0] == GZIP_MAGIC_0 && buffer[1] == GZIP_MAGIC_1) { - compressionType = "gzip"; - } else if (buffer[0] == XZ_MAGIC_0 && buffer[1] == XZ_MAGIC_1 && buffer[2] == XZ_MAGIC_2 && - buffer[3] == XZ_MAGIC_3 && buffer[4] == XZ_MAGIC_4 && buffer[5] == XZ_MAGIC_5) { - compressionType = "xz"; - } else if (buffer[0] == ZSTD_MAGIC_0 && buffer[1] == ZSTD_MAGIC_1 && - buffer[2] == ZSTD_MAGIC_2 && buffer[3] == ZSTD_MAGIC_3) { - compressionType = "zstd"; - } else { - compressionType = "none"; - } - - return 0; -} - bool isDeviceMounted(const std::string &device) { std::ifstream mounts("/proc/mounts"); std::string line; @@ -180,61 +136,47 @@ void printBufferHex(const char *buffer, size_t size) { std::cout << std::endl; } -int BmapWriteImage(const std::string &imageFile, const bmap_t &bmap, const std::string &device, const std::string &compressionType) { - gzFile gzImg = nullptr; - lzma_stream lzmaStream = LZMA_STREAM_INIT; - std::vector decBufferIn(DEC_BUFFER_SIZE); - ZSTD_DStream* zstdStream = nullptr; - ZSTD_inBuffer zstdIn = { nullptr, 0, 0 }; - ZSTD_outBuffer zstdOut = { nullptr, 0, 0 }; - size_t decHead = 0; - std::ifstream imgFile; +int BmapWriteImage(const std::string &imageFile, const bmap_t &bmap, const std::string &device) { + static const size_t read_block_size = 16384; + struct archive *a = nullptr; int dev_fd = -1; int ret = 0; try { + size_t decHead = 0; + dev_fd = open(device.c_str(), O_WRONLY | O_CREAT | O_SYNC, S_IRUSR | S_IWUSR); if (dev_fd < 0) { throw std::string("Unable to open or create target device"); } - if (compressionType == "gzip") { - gzImg = gzopen(imageFile.c_str(), "rb"); - if (!gzImg) { - throw std::string("Unable to open gzip image file"); - } - } else if (compressionType == "xz") { - imgFile.open(imageFile, std::ios::binary); - if (!imgFile) { - throw std::string("Unable to open xz image file"); - } - lzma_ret ret = lzma_stream_decoder(&lzmaStream, UINT64_MAX, 0); - if (ret != LZMA_OK) { - throw std::string("Failed to initialize lzma decoder: ") + std::to_string(static_cast(ret)); - } + a = archive_read_new(); - lzmaStream.avail_in = 0; - } else if (compressionType == "zstd") { - imgFile.open(imageFile, std::ios::binary); - if (!imgFile) { - throw std::string("Unable to open image file"); - } + /* Support all compression types */ + archive_read_support_filter_all(a); + + /* Support a single compressed file or tar archive */ + archive_read_support_format_raw(a); + archive_read_support_format_tar(a); - zstdStream = ZSTD_createDStream(); - if (zstdStream == nullptr) { - throw std::string("Failed to initialize zstd decoder"); + int r = archive_read_open_filename(a, imageFile.c_str(), read_block_size); + if (r != ARCHIVE_OK) { + throw std::string("Failed to open archive: ") + std::string(archive_error_string(a)); + } else { + if (archive_format_name(a) != nullptr) { + std::cout << "Detected format: " << std::string(archive_format_name(a)) << std::endl; } - zstdIn.src = decBufferIn.data(); - zstdIn.size = decBufferIn.size(); - zstdIn.pos = zstdIn.size; - } else if (compressionType == "none") { - imgFile.open(imageFile, std::ios::binary); - if (!imgFile) { - throw std::string("Unable to open image file"); + /* Last filter is always the wrapper and would be printed as "none" */ + for (int i = 0; i < archive_filter_count(a) - 1; i++) { + std::cout << "Detected compression: " << std::string(archive_filter_name(a, i)) << std::endl; } - } else { - throw std::string("Unsupported compression type ") + compressionType; + } + + struct archive_entry *ae; + r = archive_read_next_header(a, &ae); + if (r != ARCHIVE_OK) { + throw std::string("Failed to read archive header: ") + std::string(archive_error_string(a)); } for (const auto &range : bmap.ranges) { @@ -248,137 +190,35 @@ int BmapWriteImage(const std::string &imageFile, const bmap_t &bmap, const std:: std::vector buffer(bufferSize); size_t outBytes = 0; - if (compressionType == "gzip") { - gzseek(gzImg, static_cast(startBlock * bmap.blockSize), SEEK_SET); - int readBytes = gzread(gzImg, buffer.data(), static_cast(bufferSize)); - if (readBytes < 0) { - throw std::string("Failed to read from gzip image file"); + const size_t outStart = startBlock * bmap.blockSize; + const size_t outEnd = ((endBlock + 1) * bmap.blockSize); + + while (outBytes < bufferSize) { + ssize_t readData = archive_read_data(a, buffer.data() + outBytes, bufferSize - outBytes); + + // If no more data is available in the input buffer and the input file has been + // read completely, stop this decompression loop + if (readData <= 0) + break; + + size_t chunkSize = static_cast(readData); + + if (decHead >= outStart && (decHead + chunkSize) <= outEnd) { + // Case 1: all decoded data can be used + outBytes += chunkSize; + } else if (decHead < outStart && (decHead + chunkSize) <= outStart) { + // Case 2: all decoded data shall be discarded + } else if (decHead < outStart && (decHead + chunkSize) > outStart) { + // Case 3: only the last portion of the decoded data can be used + std::move(buffer.begin() + static_cast(outStart - decHead), + buffer.begin() + static_cast(chunkSize), + buffer.begin()); + size_t validData = chunkSize - (outStart - decHead); + outBytes += validData; } - outBytes = static_cast(readBytes); - } else if (compressionType == "xz") { - const size_t outStart = startBlock * bmap.blockSize; - const size_t outEnd = ((endBlock + 1) * bmap.blockSize); - - // Initialize the output buffer for the decompressor - lzmaStream.next_out = reinterpret_cast(buffer.data()); - lzmaStream.avail_out = static_cast(buffer.size()); - - while (outBytes < bufferSize) { - size_t chunkSize = 0; - - // Whenever no more input data is available, read some from the compressed file - // and reset the input parameters for the decompressor - if (lzmaStream.avail_in == 0) { - imgFile.read(decBufferIn.data(), static_cast(decBufferIn.size())); - if (imgFile.gcount() == 0 && imgFile.fail()) { - throw std::string("Failed to read from xz image file"); - } else { - lzmaStream.next_in = reinterpret_cast(decBufferIn.data()); - lzmaStream.avail_in = static_cast(imgFile.gcount()); - } - } - - // Save the current status of the output buffer... - chunkSize = lzmaStream.avail_out; - - lzma_ret ret = lzma_code(&lzmaStream, LZMA_RUN); - if (ret != LZMA_OK && ret != LZMA_STREAM_END) { - throw std::string("Failed to decompress xz image file: ") + std::to_string(static_cast(ret)); - } - // ...and then extract the size of the decompressed chunk - chunkSize -= lzmaStream.avail_out; - - if (decHead >= outStart && (decHead + chunkSize) <= outEnd) { - // Case 1: all decoded data can be used - outBytes += chunkSize; - } else if (decHead < outStart && (decHead + chunkSize) <= outStart) { - // Case 2: all decoded data shall be discarded - lzmaStream.next_out = reinterpret_cast(buffer.data()); - lzmaStream.avail_out = static_cast(buffer.size()); - } else if (decHead < outStart && (decHead + chunkSize) > outStart) { - // Case 3: only the last portion of the decoded data can be used - std::move(buffer.begin() + static_cast(outStart - decHead), - buffer.begin() + static_cast(chunkSize), - buffer.begin()); - size_t validData = chunkSize - (outStart - decHead); - outBytes += validData; - lzmaStream.next_out = reinterpret_cast(buffer.data()) + validData; - lzmaStream.avail_out = buffer.size() - validData; - } - - // Advance the head of the decompressed data - decHead += chunkSize; - - // In case all the required data has been decompressed OR the XZ stream is ended - // OR the input file has been read completely, stop this decompression loop - if ((lzmaStream.avail_out == 0) || (ret == LZMA_STREAM_END) || - (lzmaStream.avail_in == 0 && imgFile.eof())) { - break; - } - } - } else if (compressionType == "zstd") { - const size_t outStart = startBlock * bmap.blockSize; - const size_t outEnd = ((endBlock + 1) * bmap.blockSize); - - // Init output buffer - zstdOut.dst = buffer.data(); - zstdOut.size = buffer.size(); - zstdOut.pos = 0; - - while (outBytes < bufferSize) { - size_t chunkSize = 0; - size_t zrc; - - if (zstdIn.pos == zstdIn.size) { - imgFile.read(decBufferIn.data(), static_cast(decBufferIn.size())); - if (imgFile.gcount() == 0 && imgFile.fail()) { - throw std::string("Failed to read from zstd image file"); - } else { - zstdIn.size = static_cast(imgFile.gcount()); - zstdIn.pos = 0; - } - } - - zrc = ZSTD_decompressStream(zstdStream, &zstdOut, &zstdIn); - if (ZSTD_isError(zrc)) { - throw std::string("Failed to decompress zstd image file: ") + std::string(ZSTD_getErrorName(zrc)); - } - - chunkSize = zstdOut.pos - outBytes; - - if (decHead >= outStart && (decHead + chunkSize) <= outEnd) { - // Case 1: all decoded data can be used - outBytes += chunkSize; - } else if (decHead < outStart && (decHead + chunkSize) <= outStart) { - // Case 2: all decoded data shall be discarded - zstdOut.pos = 0; - } else if (decHead < outStart && (decHead + chunkSize) > outStart) { - // Case 3: only the last portion of the decoded data can be used - std::move(buffer.begin() + static_cast(outStart - decHead), - buffer.begin() + static_cast(chunkSize), - buffer.begin()); - size_t validData = chunkSize - (outStart - decHead); - outBytes += validData; - zstdOut.pos = validData; - } - - // Advance the head of the decompressed data - decHead += chunkSize; - - // If no more data is available in the input buffer and the input file has been - // read completely, stop this decompression loop - if ((zstdIn.pos == zstdIn.size) && imgFile.eof()) { - break; - } - } - } else if (compressionType == "none") { - imgFile.seekg(static_cast(startBlock * bmap.blockSize), std::ios::beg); - imgFile.read(buffer.data(), static_cast(bufferSize)); - outBytes = static_cast(imgFile.gcount()); - if (outBytes == 0 && imgFile.fail()) { - throw std::string("Failed to read from image file"); - } + // Advance the head of the decompressed data + decHead += chunkSize; } // Compute and verify the checksum @@ -404,7 +244,7 @@ int BmapWriteImage(const std::string &imageFile, const bmap_t &bmap, const std:: std::cout << "Finished writing image to device." << std::endl; } - catch (std::string& err) { + catch (const std::string& err) { std::cerr << err << std::endl; ret = -1; } @@ -413,16 +253,8 @@ int BmapWriteImage(const std::string &imageFile, const bmap_t &bmap, const std:: close(dev_fd); } - if (imgFile.is_open()) { - imgFile.close(); - } - - if (compressionType == "gzip") { - gzclose(gzImg); - } else if (compressionType == "xz") { - lzma_end(&lzmaStream); - } else if (compressionType == "zstd") { - ZSTD_freeDStream(zstdStream); + if (a != nullptr) { + archive_read_free(a); } return ret; @@ -449,16 +281,7 @@ int main(int argc, char *argv[]) { std::cerr << "BlockSize not found in BMAP file" << std::endl; return 1; } - int ret=0; - std::string compressionType; - - ret = getCompressionType(imageFile, compressionType); - if (ret != 0) { - std::cerr << "Failed to detect compression type" << std::endl; - return ret; - } - - ret = BmapWriteImage(imageFile, bmap, device, compressionType); + int ret = BmapWriteImage(imageFile, bmap, device); if (ret != 0) { std::cerr << "Failed to write image to device" << std::endl; return ret;