-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2b451af
commit 45bc211
Showing
7 changed files
with
615 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
name: Deploy | ||
|
||
on: | ||
release: | ||
types: [created] | ||
|
||
env: | ||
BUILD_TYPE: Release | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-20.04 | ||
|
||
steps: | ||
- uses: actions/checkout@v2 | ||
|
||
- name: Install Dependencies | ||
shell: bash | ||
run: | | ||
sudo apt update | ||
sudo apt install libzstd-dev -y | ||
- name: Create Build Environment | ||
run: cmake -E make_directory ${{runner.workspace}}/build | ||
|
||
- name: Configure CMake | ||
shell: bash | ||
working-directory: ${{runner.workspace}}/build | ||
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE | ||
|
||
- name: Build | ||
working-directory: ${{runner.workspace}}/build | ||
shell: bash | ||
run: cmake --build . --config $BUILD_TYPE | ||
|
||
- name: Create DEB Package | ||
id: create-deb-package | ||
working-directory: ${{runner.workspace}}/build | ||
shell: bash | ||
run: cpack | ||
|
||
- name: Release DEB | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
run: | | ||
tag_name="${GITHUB_REF##*/}" | ||
hub release edit -a $(ls ${{runner.workspace}}/build/*deb) -m "" "$tag_name" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
name: Test Build | ||
|
||
on: [push] | ||
|
||
env: | ||
BUILD_TYPE: Release | ||
|
||
jobs: | ||
build: | ||
runs-on: ubuntu-20.04 | ||
|
||
steps: | ||
- uses: actions/checkout@v2 | ||
|
||
- name: Install Dependencies | ||
shell: bash | ||
run: | | ||
sudo apt update | ||
sudo apt install libzstd-dev -y | ||
- name: Create Build Environment | ||
run: cmake -E make_directory ${{runner.workspace}}/build | ||
|
||
- name: Configure CMake | ||
shell: bash | ||
working-directory: ${{runner.workspace}}/build | ||
run: cmake $GITHUB_WORKSPACE -DCMAKE_BUILD_TYPE=$BUILD_TYPE | ||
|
||
- name: Build | ||
working-directory: ${{runner.workspace}}/build | ||
shell: bash | ||
run: cmake --build . --config $BUILD_TYPE | ||
|
||
- name: Create DEB Package | ||
id: create-deb-package | ||
working-directory: ${{runner.workspace}}/build | ||
shell: bash | ||
run: cpack |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
cmake_minimum_required(VERSION 3.16) | ||
project(t2sz VERSION 1.0.0 LANGUAGES C) | ||
|
||
add_definitions(-DVERSION="${PROJECT_VERSION}") | ||
|
||
set(CMAKE_C_STANDARD 99) | ||
|
||
add_executable(t2sz src/t2sz.c) | ||
target_link_libraries(t2sz zstd m) | ||
|
||
if (CMAKE_BUILD_TYPE STREQUAL Release) | ||
add_custom_command(TARGET t2sz POST_BUILD COMMAND ${CMAKE_STRIP} t2sz) | ||
endif () | ||
|
||
install(TARGETS t2sz DESTINATION "/usr/bin/") | ||
|
||
#set(CPACK_SET_DESTDIR ON) | ||
#set(CPACK_GENERATOR "DEB;TGZ;RPM") | ||
set(CPACK_GENERATOR "DEB") | ||
|
||
set(CPACK_DEBIAN_FILE_NAME DEB-DEFAULT) | ||
set(CPACK_DEBIAN_PACKAGE_MAINTAINER "Marco Martinelli <[email protected]>") | ||
set(CPACK_DEBIAN_PACKAGE_SHLIBDEPS ON) | ||
set(CPACK_DEBIAN_PACKAGE_DESCRIPTION "tar 2 seekable zstd.\nIt will compress a tar archive with Zstandard keeping each file in a different frame, for fast seeking.\nThe compressed archive can be uncompressed with any Zstandard tool, including zstd.") | ||
set(CPACK_DEBIAN_PACKAGE_HOMEPAGE "https://github.com/martinellimarco/t2sz") | ||
|
||
#set(CPACK_RPM_PACKAGE_AUTOREQ ON) | ||
#set(CPACK_RPM_PACKAGE_DESCRIPTION "tar 2 seekable zstd.\nIt will compress a tar archive with Zstandard keeping each file in a different frame, for fast seeking.\nThe compressed archive can be uncompressed with any Zstandard tool, including zstd.") | ||
#set(CPACK_RPM_PACKAGE_URL "https://github.com/martinellimarco/t2sz") | ||
#set(CPACK_RPM_PACKAGE_LICENSE "GPLv3") | ||
|
||
set(CPACK_PACKAGE_NAME ${PROJECT_NAME}) | ||
set(CPACK_PACKAGE_CONTACT "Marco Martinelli <[email protected]>") | ||
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR}) | ||
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR}) | ||
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH}) | ||
|
||
include(CPack) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,118 @@ | ||
# t2sz | ||
Compress .tar archives to seekable .tar.zstd | ||
|
||
It will compress a tar archive with [Zstandard](https://github.com/facebook/zstd) keeping each file in a different frame, unless `-s` is used. | ||
|
||
This allows fast seeking and extraction of a single file without decompressing the whole archive. | ||
|
||
When `-s SIZE` is used and a file is added, if the size of the file is less than `SIZE` then another one will be added in the same block, and so on until the sum of the sizes of all files packed together is at least `SIZE`. | ||
|
||
A file will be never truncated. `SIZE` is only a minimum quantity. | ||
|
||
A single block of one or more files is compressed into a single Zstandard frame. If the files in the same block are correlatable the compression ratio will be higher. | ||
|
||
The compressed archive can be uncompressed with any Zstandard tool, including `zstd`. | ||
|
||
To take advantage of seeking see the following projects: | ||
- C/C++ library: [libzstd-seek](https://github.com/martinellimarco/libzstd-seek) | ||
- Python library: [indexed_zstd](https://github.com/martinellimarco/indexed_zstd) | ||
- FUSE mount: [ratarmount](https://github.com/mxmlnkn/ratarmount) | ||
|
||
# Build | ||
|
||
You'll need `libzstd-dev` | ||
|
||
```bash | ||
sudo apt install libzstd-dev | ||
``` | ||
|
||
```bash | ||
git clone https://github.com/martinellimarco/t2sz | ||
mkdir t2sz/build | ||
cd t2sz/build | ||
cmake .. -DCMAKE_BUILD_TYPE="Release" | ||
make | ||
``` | ||
|
||
Install with | ||
|
||
```bash | ||
sudo make install | ||
``` | ||
|
||
Or if you want a debian package you can run | ||
|
||
```bash | ||
cpack | ||
``` | ||
|
||
then install it with | ||
|
||
```bash | ||
sudo dpkg -i t2sz*.deb | ||
``` | ||
|
||
# Usage | ||
|
||
```commandline | ||
Usage: t2sz [OPTIONS...] [TAR ARCHIVE] | ||
Examples: | ||
t2sz archive.tar Compress archive.tar to archive.tar.zst | ||
t2sz archive.tar -o output.tar.zst Compress archive.tar to output.tar.zst | ||
t2sz archive.tar -o /dev/stdout Compress archive.tar to standard output | ||
Options: | ||
-l [1..22] Set compression level, from 1 (lower) to 22 (highest). Default is 22. | ||
-o FILENAME Output file name. | ||
-s SIZE Minimum size of an input block, in bytes. | ||
A block is composed by one or more whole files. A file is never truncated. | ||
If not specified one block will contain exactly one file, no matter the file size. | ||
Each block is compressed to a zstd frame but if the archive has a lot of small files | ||
having a file per block doesn't compress very well. With this you can set a trade off. | ||
The greater is SIZE the smaller will be the archive at the expense of the seek speed. | ||
SIZE may be followed by the following multiplicative suffixes: | ||
k/K/KiB = 1024 | ||
M/MiB = 1024*1024 | ||
kB/KB = 1000 | ||
MB = 1000*1000 | ||
-v Verbose. List the elements in the tar archive and their size. | ||
-f Overwrite output without prompting. | ||
-h Print this help. | ||
-V Print the version. | ||
``` | ||
|
||
# About -s and -l | ||
|
||
One may wonder what are the best choices for minimum block size `-s` and compression level `-l`. | ||
|
||
The real answer is that it depends on the kind of data you are working with. In short, do your own math and feel free to report your results. | ||
|
||
If you are working with big files (hundreds of MiB) then you will not have many benefits in terms of seeking time if you use `-s`, but you should increase your compression level to get smaller archives. | ||
|
||
On the other hand, if you have a thousands of small files (few MiB or less) that usually compress well and are correlated you may take advantage of both `-s` and `-l`. | ||
|
||
What follows is a test I made with a dataset of ~100.000 binary files less than 4MiB in size. The exact numbers are not important. | ||
|
||
The first table shows the compression ratio of each combination of `-s` (min block size) and '-l' (level). | ||
|
||
Intuitively at `-s 1K -l 1` the resulting archive is 57.38% of the size of the uncompressed .tar archive. | ||
|
||
At the same time `-s 256M -l 22` gives the best results in term of compression ration, with a generated archive that is only 33.47% of the original. | ||
|
||
Of course seeking in a block of 256M is not too fast. A safer choice in this particular case is something around `-s 32N`. | ||
|
||
The second table shows the time it took to compress each archive, divided by the minimum time. | ||
|
||
The fastest choice is at `-s 512K -l 1` while at `-s 256M -l 22` we get the slowest one, that takes 69.48 times more. | ||
|
||
|
||
![compression ratio](doc/compression-ratio.png) | ||
|
||
![speed ratio](doc/speed-ratio.png) | ||
|
||
|
||
# License | ||
|
||
See LICENSE |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.