diff --git a/.github/scripts/command/load_dump_bench.sh b/.github/scripts/command/load_dump_bench.sh index 5f0c7bcdd0ae..7bf5e4df0fdc 100755 --- a/.github/scripts/command/load_dump_bench.sh +++ b/.github/scripts/command/load_dump_bench.sh @@ -1,114 +1,83 @@ -#!/bin/bash -e +#!/bin/bash -ex source .github/scripts/common/common.sh [[ -z "$META" ]] && META=sqlite3 [[ -z "$START_META" ]] && START_META=true +[[ -z "$BIGDIR" ]] && BIGDIR=true source .github/scripts/start_meta_engine.sh META_URL=$(get_meta_url $META) -if [ "$START_META" = true ]; then - start_meta_engine $META -fi +META_URL2=$(get_meta_url2 $META) +FILE_COUNT_IN_BIGDIR=100000 -test_load_dump_with_small_dir(){ - prepare_test +prepare_test_data(){ + umount_jfs /tmp/jfs $META_URL + python3 .github/scripts/flush_meta.py $META_URL + rm -rf /var/jfs/myjfs || true create_database $META_URL - echo meta_url is: $META_URL - wget -q https://s.juicefs.com/static/bench/2M_emtpy_files.dump.gz - gzip -dfk 2M_emtpy_files.dump.gz - load_file=2M_emtpy_files.dump - start=`date +%s` - ./juicefs load $META_URL $load_file - end=`date +%s` - runtime=$((end-start)) - version=$(./juicefs -V|cut -b 17- | sed 's/:/-/g') - .github/scripts/save_benchmark.sh --name load_small_dir --result $runtime --meta $META --storage file - echo "load cost $runtime seconds" - start=`date +%s` - ./juicefs dump $META_URL dump.json --fast - end=`date +%s` - runtime=$((end-start)) - echo "dump cost $runtime seconds" - .github/scripts/save_benchmark.sh --name dump_small_dir --result $runtime --meta $META --storage file - ./juicefs mount $META_URL /jfs -d --no-usage-report - inode=$(df -i /jfs | grep JuiceFS |awk -F" " '{print $3}') - if [ "$inode" -ne "2233313" ]; then - echo "<FATAL>: inode error: $inode" - exit 1 - fi + ./juicefs format $META_URL myjfs + ./juicefs mount -d $META_URL /tmp/jfs + threads=10 + ./juicefs mdtest $META_URL /bigdir --depth=1 --dirs=0 --files=$((FILE_COUNT_IN_BIGDIR/threads)) --threads=$threads --write=8192 + ./juicefs mdtest $META_URL /smalldir --depth=3 --dirs=10 --files=10 --threads=10 --write=8192 } -test_load_dump_with_big_dir_subdir(){ - do_load_dump_with_big_dir true -} +if [[ "$START_META" == "true" ]]; then + start_meta_engine $META + prepare_test_data +fi -test_load_dump_with_big_dir(){ - do_load_dump_with_big_dir false +test_dump_load(){ + do_dump_load dump.json } -do_load_dump_with_big_dir(){ - with_subdir=$1 - prepare_test - create_database $META_URL - echo meta_url is: $META_URL - wget -q https://s.juicefs.com/static/bench/1M_files_in_one_dir.dump.gz - gzip -dfk 1M_files_in_one_dir.dump.gz - load_file=1M_files_in_one_dir.dump - start=`date +%s` - ./juicefs load $META_URL $load_file - end=`date +%s` - runtime=$((end-start)) - echo "load cost $runtime seconds" - version=$(./juicefs -V|cut -b 17- | sed 's/:/-/g') - .github/scripts/save_benchmark.sh --name load_big_dir --result $runtime --meta $META --storage file - start=`date +%s` - if [ "$with_subdir" = true ] ; then - ./juicefs dump $META_URL dump.json --subdir test --fast - else - ./juicefs dump $META_URL dump.json --fast - fi - end=`date +%s` - runtime=$((end-start)) - echo "dump cost $runtime seconds" - .github/scripts/save_benchmark.sh --name dump_big_dir --result $runtime --meta $META --storage file - ./juicefs mount $META_URL /jfs -d --no-usage-report - df -i /jfs - inode=$(df -i /jfs | grep JuiceFS |awk -F" " '{print $3}') - echo "inode: $inode" - if [ "$inode" -ne "1000003" ]; then - echo "<FATAL>: inode error: $inode" - exit 1 - fi +test_dump_load_fast(){ + do_dump_load dump.json.gz --fast } -test_list_with_big_dir(){ - start=`date +%s` - file_count=$(ls -l /jfs/test/test-dir.0-0/mdtest_tree.0/ | wc -l) - echo "file_count: $file_count" - end=`date +%s` - runtime=$((end-start)) - echo "list cost $runtime seconds" - version=$(./juicefs -V|cut -b 17- | sed 's/:/-/g') - .github/scripts/save_benchmark.sh --name list_big_dir --result $runtime --meta $META --storage file - if [ "$file_count" -ne "1000001" ]; then - echo "<FATAL>: file_count error: $file_count" - exit 1 - fi +test_dump_load_in_binary(){ + do_dump_load dump.bin --binary } -prepare_test() -{ - umount_jfs /jfs $META_URL - ls -l /jfs/.config && exit 1 || true - ./juicefs status $META_URL && UUID=$(./juicefs status $META_URL | grep UUID | cut -d '"' -f 4) || echo "meta not exist" - if [ -n "$UUID" ];then - ./juicefs destroy --yes $META_URL $UUID +do_dump_load(){ + dump_file=$1 + shift + options=$@ + ./juicefs dump $META_URL $dump_file $options --threads=50 + # python3 .github/scripts/flush_meta.py $META_URL2 + create_database $META_URL2 + if [[ "$options" == *"--binary"* ]]; then + ./juicefs load $META_URL2 $dump_file $options + else + ./juicefs load $META_URL2 $dump_file + fi + ./juicefs mount $META_URL2 /tmp/jfs2 -d + df -i /tmp/jfs /tmp/jfs2 + iused1=$(df -i /tmp/jfs | tail -1 | awk '{print $3}') + iused2=$(df -i /tmp/jfs2 | tail -1 | awk '{print $3}') + [[ "$iused1" == "$iused2" ]] || (echo "<FATAL>: iused error: $iused1 $iused2" && exit 1) + ./juicefs summary /tmp/jfs/ --csv + ./juicefs summary /tmp/jfs2/ --csv + summary1=$(./juicefs summary /tmp/jfs/ --csv | head -n +2 | tail -n 1) + summary2=$(./juicefs summary /tmp/jfs2/ --csv | head -n +2 | tail -n 1) + [[ "$summary1" == "$summary2" ]] || (echo "<FATAL>: summary error: $summary1 $summary2" && exit 1) + + if [[ "$BIGDIR" == "true" ]]; then + file_count=$(ls -l /tmp/jfs2/bigdir/test-dir.0-0/mdtest_tree.0/ | wc -l) + file_count=$((file_count-1)) + if [[ "$file_count" -ne "$FILE_COUNT_IN_BIGDIR" ]]; then + echo "<FATAL>: file_count error: $file_count" + exit 1 + fi fi - # python3 .github/scripts/flush_meta.py $META_URL - # rm -rf /var/jfs/myjfs || true - # rm -rf /var/jfsCache/myjfs || true + ./juicefs rmr /tmp/jfs2/smalldir + ls /tmp/jfs2/smalldir && echo "<FATAL>: ls should fail" && exit 1 || true + umount_jfs /tmp/jfs2 $META_URL2 + ./juicefs status $META_URL2 && UUID=$(./juicefs status $META_URL2 | grep UUID | cut -d '"' -f 4) + ./juicefs destroy --yes $META_URL2 $UUID } + source .github/scripts/common/run_test.sh && run_test $@ \ No newline at end of file diff --git a/.github/scripts/start_meta_engine.sh b/.github/scripts/start_meta_engine.sh index ac6a6fc7855d..65d3dbb44e71 100755 --- a/.github/scripts/start_meta_engine.sh +++ b/.github/scripts/start_meta_engine.sh @@ -203,7 +203,7 @@ get_meta_url(){ elif [ "$meta" == "tidb" ]; then meta_url="mysql://root:@(127.0.0.1:4000)/test" elif [ "$meta" == "etcd" ]; then - meta_url="etcd://localhost:2379/jfs" + meta_url="etcd://localhost:2379/test" elif [ "$meta" == "fdb" ]; then meta_url="fdb:///home/runner/fdb.cluster?prefix=jfs" elif [ "$meta" == "ob" ]; then @@ -217,6 +217,39 @@ get_meta_url(){ return 0 } +get_meta_url2(){ + meta=$1 + if [ "$meta" == "postgres" ]; then + meta_url="postgres://postgres:postgres@127.0.0.1:5432/test2?sslmode=disable" + elif [ "$meta" == "mysql" ]; then + meta_url="mysql://root:root@(127.0.0.1)/test2" + elif [ "$meta" == "redis" ]; then + meta_url="redis://127.0.0.1:6379/2" + elif [ "$meta" == "sqlite3" ]; then + meta_url="sqlite3://test2.db" + elif [ "$meta" == "tikv" ]; then + meta_url="tikv://127.0.0.1:2379/jfs" + elif [ "$meta" == "badger" ]; then + meta_url="badger:///tmp/test2" + elif [ "$meta" == "mariadb" ]; then + meta_url="mysql://root:root@(127.0.0.1)/test2" + elif [ "$meta" == "tidb" ]; then + meta_url="mysql://root:@(127.0.0.1:4000)/test2" + elif [ "$meta" == "etcd" ]; then + meta_url="etcd://localhost:2379/test2" + elif [ "$meta" == "fdb" ]; then + meta_url="fdb:///home/runner/fdb.cluster?prefix=kfs" + elif [ "$meta" == "ob" ]; then + meta_url="mysql://root:@\\(127.0.0.1:2881\\)/test2" + else + echo >&2 "<FATAL>: meta $meta is not supported" + meta_url="" + return 1 + fi + echo $meta_url + return 0 +} + create_database(){ meta_url=$1 db_name=$(basename $meta_url | awk -F? '{print $1}') diff --git a/.github/scripts/utils.py b/.github/scripts/utils.py index 5c8da75ce094..01a559c3aa62 100644 --- a/.github/scripts/utils.py +++ b/.github/scripts/utils.py @@ -12,7 +12,7 @@ import time from minio import Minio -def flush_meta(meta_url): +def flush_meta(meta_url:str): print(f'start flush meta: {meta_url}') if meta_url.startswith('sqlite3://'): path = meta_url[len('sqlite3://'):] @@ -24,25 +24,38 @@ def flush_meta(meta_url): if os.path.isdir(path): shutil.rmtree(path) print(f'remove badger dir {path} succeed') - elif meta_url.startswith('redis://'): - host_port= meta_url[8:].split('/')[0] + elif meta_url.startswith('redis://') or meta_url.startswith('tikv://'): + default_port = {"redis": 6379, "tikv": 2379} + protocol = meta_url.split("://")[0] + host_port= meta_url.split("://")[1].split('/')[0] if ':' in host_port: host = host_port.split(':')[0] port = host_port.split(':')[1] else: host = host_port - port = 6379 - print(f'flush redis: {host}:{port}') - run_cmd(f'redis-cli -h {host} -p {port} flushall') - print(f'flush redis succeed') + port = default_port[protocol] + db = meta_url.split("://")[1].split('/')[1] + assert db + print(f'flushing {protocol}://{host}:{port}/{db}') + if protocol == 'redis': + run_cmd(f'redis-cli -h {host} -p {port} -n {db} flushdb') + elif protocol == 'tikv': + # TODO: should only flush the specified db + run_cmd(f'echo "delall --yes" |tcli -pd {host}:{port}') + else: + raise Exception(f'{protocol} not supported') + print(f'flush {protocol}://{host}:{port}/{db} succeed') elif meta_url.startswith('mysql://'): create_mysql_db(meta_url) elif meta_url.startswith('postgres://'): create_postgres_db(meta_url) - elif meta_url.startswith('tikv://'): - run_cmd('echo "delall --yes" |tcli -pd localhost:2379') elif meta_url.startswith('fdb://'): - run_cmd('''fdbcli -C /home/runner/fdb.cluster --exec "writemode on ; clearrange '' \xFF"''') + # fdb:///home/runner/fdb.cluster?prefix=jfs2 + prefix = meta_url.split('?prefix=')[1] if '?prefix=' in meta_url else "" + cluster_file = meta_url.split('fdb://')[1].split('?')[0] + print(f'flushing fdb: cluster_file: {cluster_file}, prefix: {prefix}') + run_cmd(f'echo "writemode on; clearrange {prefix} {prefix}\\xff" | fdbcli -C {cluster_file}') + print(f'flush fdb succeed') else: raise Exception(f'{meta_url} not supported') print('flush meta succeed') diff --git a/.github/workflows/load.yml b/.github/workflows/load.yml index 828dbc46b53e..d4d77b2de3c4 100644 --- a/.github/workflows/load.yml +++ b/.github/workflows/load.yml @@ -1,23 +1,19 @@ -name: "load-test" +name: "load" on: push: branches: - 'main' - 'release-**' paths: - - '**.go' - - 'Makefile' - '**/load.yml' - - '.github/scripts/command/load_dump_bench.sh' + - '**/load_dump_bench.sh' pull_request: branches: - 'main' - 'release-**' paths: - - '**.go' - - 'Makefile' - '**/load.yml' - - '.github/scripts/command/load_dump_bench.sh' + - '**/load_dump_bench.sh' schedule: - cron: '0 19 * * *' workflow_dispatch: @@ -39,13 +35,13 @@ jobs: if [ "${{github.event_name}}" == "schedule" ] || [ "${{github.event_name}}" == "workflow_dispatch" ]; then echo 'meta_matrix=["sqlite3", "redis", "mysql", "tikv", "tidb", "postgres", "mariadb", "fdb"]' >> $GITHUB_OUTPUT else - echo 'meta_matrix=["redis"]' >> $GITHUB_OUTPUT + echo 'meta_matrix=["redis", "tikv", "mysql"]' >> $GITHUB_OUTPUT fi outputs: meta_matrix: ${{ steps.set-matrix.outputs.meta_matrix }} load: - timeout-minutes: 30 + timeout-minutes: 90 needs: [build-matrix] strategy: fail-fast: false @@ -55,18 +51,6 @@ jobs: runs-on: ubuntu-20.04 steps: - - name: Remove unused software - if: false - shell: bash - run: | - echo "before remove unused software" - sudo df -h - sudo rm -rf /usr/share/dotnet - sudo rm -rf /usr/local/lib/android - sudo rm -rf /opt/ghc - echo "after remove unused software" - sudo df -h - - name: Checkout uses: actions/checkout@v3 with: @@ -96,8 +80,8 @@ jobs: with: target: ${{steps.vars.outputs.target}} - - name: Load and dump with small directory - timeout-minutes: 30 + - name: Test dump load in binary format + timeout-minutes: 60 env: AWS_ACCESS_KEY_ID: ${{secrets.CI_COVERAGE_AWS_AK}} AWS_SECRET_ACCESS_KEY: ${{secrets.CI_COVERAGE_AWS_SK}} @@ -105,10 +89,10 @@ jobs: META: ${{matrix.meta}} START_META: true run: | - sudo -E GOCOVERDIR=$(pwd)/cover .github/scripts/command/load_dump_bench.sh test_load_dump_with_small_dir - - - name: Load and dump with big directory - timeout-minutes: 30 + sudo -E GOCOVERDIR=$(pwd)/cover .github/scripts/command/load_dump_bench.sh test_dump_load_in_binary + + - name: Test dump load fast + timeout-minutes: 60 env: AWS_ACCESS_KEY_ID: ${{secrets.CI_COVERAGE_AWS_AK}} AWS_SECRET_ACCESS_KEY: ${{secrets.CI_COVERAGE_AWS_SK}} @@ -116,40 +100,25 @@ jobs: META: ${{matrix.meta}} START_META: false run: | - sudo -E GOCOVERDIR=$(pwd)/cover .github/scripts/command/load_dump_bench.sh test_load_dump_with_big_dir - - - name: Load and dump subdir with big directory - if: false - timeout-minutes: 30 + sudo -E GOCOVERDIR=$(pwd)/cover .github/scripts/command/load_dump_bench.sh test_dump_load_fast + + - name: Test dump load + timeout-minutes: 60 env: AWS_ACCESS_KEY_ID: ${{secrets.CI_COVERAGE_AWS_AK}} AWS_SECRET_ACCESS_KEY: ${{secrets.CI_COVERAGE_AWS_SK}} AWS_ACCESS_TOKEN: ${{secrets.CI_COVERAGE_AWS_TOKEN}} META: ${{matrix.meta}} - START_META: false + START_META: false run: | - sudo -E GOCOVERDIR=$(pwd)/cover .github/scripts/command/load_dump_bench.sh test_load_dump_with_big_dir_subdir - - - name: List big directory - timeout-minutes: 30 - env: - AWS_ACCESS_KEY_ID: ${{secrets.CI_COVERAGE_AWS_AK}} - AWS_SECRET_ACCESS_KEY: ${{secrets.CI_COVERAGE_AWS_SK}} - AWS_ACCESS_TOKEN: ${{secrets.CI_COVERAGE_AWS_TOKEN}} - META: ${{matrix.meta}} - START_META: false - run: | - sudo -E GOCOVERDIR=$(pwd)/cover .github/scripts/command/load_dump_bench.sh test_list_with_big_dir - + sudo -E GOCOVERDIR=$(pwd)/cover .github/scripts/command/load_dump_bench.sh test_dump_load + - name: log if: always() - shell: bash run: | - if [ -f ~/.juicefs/juicefs.log ]; then - tail -300 ~/.juicefs/juicefs.log - grep "<FATAL>:" ~/.juicefs/juicefs.log && exit 1 || true - fi - + tail -500 /var/log/juicefs.log + grep "<FATAL>:" /var/log/juicefs.log && exit 1 || true + - name: upload coverage report timeout-minutes: 5 uses: ./.github/actions/upload-coverage