Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CI: refactor dump/load stress test. #5511

Merged
merged 27 commits into from
Jan 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 59 additions & 90 deletions .github/scripts/command/load_dump_bench.sh
Original file line number Diff line number Diff line change
@@ -1,114 +1,83 @@
#!/bin/bash -e
#!/bin/bash -ex

source .github/scripts/common/common.sh

[[ -z "$META" ]] && META=sqlite3
[[ -z "$START_META" ]] && START_META=true
[[ -z "$BIGDIR" ]] && BIGDIR=true
source .github/scripts/start_meta_engine.sh
META_URL=$(get_meta_url $META)
if [ "$START_META" = true ]; then
start_meta_engine $META
fi
META_URL2=$(get_meta_url2 $META)
FILE_COUNT_IN_BIGDIR=100000

test_load_dump_with_small_dir(){
prepare_test
prepare_test_data(){
umount_jfs /tmp/jfs $META_URL
python3 .github/scripts/flush_meta.py $META_URL
rm -rf /var/jfs/myjfs || true
create_database $META_URL
echo meta_url is: $META_URL
wget -q https://s.juicefs.com/static/bench/2M_emtpy_files.dump.gz
gzip -dfk 2M_emtpy_files.dump.gz
load_file=2M_emtpy_files.dump
start=`date +%s`
./juicefs load $META_URL $load_file
end=`date +%s`
runtime=$((end-start))
version=$(./juicefs -V|cut -b 17- | sed 's/:/-/g')
.github/scripts/save_benchmark.sh --name load_small_dir --result $runtime --meta $META --storage file
echo "load cost $runtime seconds"
start=`date +%s`
./juicefs dump $META_URL dump.json --fast
end=`date +%s`
runtime=$((end-start))
echo "dump cost $runtime seconds"
.github/scripts/save_benchmark.sh --name dump_small_dir --result $runtime --meta $META --storage file
./juicefs mount $META_URL /jfs -d --no-usage-report
inode=$(df -i /jfs | grep JuiceFS |awk -F" " '{print $3}')
if [ "$inode" -ne "2233313" ]; then
echo "<FATAL>: inode error: $inode"
exit 1
fi
./juicefs format $META_URL myjfs
./juicefs mount -d $META_URL /tmp/jfs
threads=10
./juicefs mdtest $META_URL /bigdir --depth=1 --dirs=0 --files=$((FILE_COUNT_IN_BIGDIR/threads)) --threads=$threads --write=8192
./juicefs mdtest $META_URL /smalldir --depth=3 --dirs=10 --files=10 --threads=10 --write=8192
}

test_load_dump_with_big_dir_subdir(){
do_load_dump_with_big_dir true
}
if [[ "$START_META" == "true" ]]; then
start_meta_engine $META
prepare_test_data
fi

test_load_dump_with_big_dir(){
do_load_dump_with_big_dir false
test_dump_load(){
do_dump_load dump.json
}

do_load_dump_with_big_dir(){
with_subdir=$1
prepare_test
create_database $META_URL
echo meta_url is: $META_URL
wget -q https://s.juicefs.com/static/bench/1M_files_in_one_dir.dump.gz
gzip -dfk 1M_files_in_one_dir.dump.gz
load_file=1M_files_in_one_dir.dump
start=`date +%s`
./juicefs load $META_URL $load_file
end=`date +%s`
runtime=$((end-start))
echo "load cost $runtime seconds"
version=$(./juicefs -V|cut -b 17- | sed 's/:/-/g')
.github/scripts/save_benchmark.sh --name load_big_dir --result $runtime --meta $META --storage file
start=`date +%s`
if [ "$with_subdir" = true ] ; then
./juicefs dump $META_URL dump.json --subdir test --fast
else
./juicefs dump $META_URL dump.json --fast
fi
end=`date +%s`
runtime=$((end-start))
echo "dump cost $runtime seconds"
.github/scripts/save_benchmark.sh --name dump_big_dir --result $runtime --meta $META --storage file
./juicefs mount $META_URL /jfs -d --no-usage-report
df -i /jfs
inode=$(df -i /jfs | grep JuiceFS |awk -F" " '{print $3}')
echo "inode: $inode"
if [ "$inode" -ne "1000003" ]; then
echo "<FATAL>: inode error: $inode"
exit 1
fi
test_dump_load_fast(){
do_dump_load dump.json.gz --fast
}

test_list_with_big_dir(){
start=`date +%s`
file_count=$(ls -l /jfs/test/test-dir.0-0/mdtest_tree.0/ | wc -l)
echo "file_count: $file_count"
end=`date +%s`
runtime=$((end-start))
echo "list cost $runtime seconds"
version=$(./juicefs -V|cut -b 17- | sed 's/:/-/g')
.github/scripts/save_benchmark.sh --name list_big_dir --result $runtime --meta $META --storage file
if [ "$file_count" -ne "1000001" ]; then
echo "<FATAL>: file_count error: $file_count"
exit 1
fi
test_dump_load_in_binary(){
do_dump_load dump.bin --binary
}

prepare_test()
{
umount_jfs /jfs $META_URL
ls -l /jfs/.config && exit 1 || true
./juicefs status $META_URL && UUID=$(./juicefs status $META_URL | grep UUID | cut -d '"' -f 4) || echo "meta not exist"
if [ -n "$UUID" ];then
./juicefs destroy --yes $META_URL $UUID
do_dump_load(){
dump_file=$1
shift
options=$@
./juicefs dump $META_URL $dump_file $options --threads=50
# python3 .github/scripts/flush_meta.py $META_URL2
create_database $META_URL2
if [[ "$options" == *"--binary"* ]]; then
./juicefs load $META_URL2 $dump_file $options
else
./juicefs load $META_URL2 $dump_file
fi
./juicefs mount $META_URL2 /tmp/jfs2 -d
df -i /tmp/jfs /tmp/jfs2
iused1=$(df -i /tmp/jfs | tail -1 | awk '{print $3}')
iused2=$(df -i /tmp/jfs2 | tail -1 | awk '{print $3}')
[[ "$iused1" == "$iused2" ]] || (echo "<FATAL>: iused error: $iused1 $iused2" && exit 1)
./juicefs summary /tmp/jfs/ --csv
./juicefs summary /tmp/jfs2/ --csv
summary1=$(./juicefs summary /tmp/jfs/ --csv | head -n +2 | tail -n 1)
summary2=$(./juicefs summary /tmp/jfs2/ --csv | head -n +2 | tail -n 1)
[[ "$summary1" == "$summary2" ]] || (echo "<FATAL>: summary error: $summary1 $summary2" && exit 1)

if [[ "$BIGDIR" == "true" ]]; then
file_count=$(ls -l /tmp/jfs2/bigdir/test-dir.0-0/mdtest_tree.0/ | wc -l)
file_count=$((file_count-1))
if [[ "$file_count" -ne "$FILE_COUNT_IN_BIGDIR" ]]; then
echo "<FATAL>: file_count error: $file_count"
exit 1
fi
fi
# python3 .github/scripts/flush_meta.py $META_URL
# rm -rf /var/jfs/myjfs || true
# rm -rf /var/jfsCache/myjfs || true
./juicefs rmr /tmp/jfs2/smalldir
ls /tmp/jfs2/smalldir && echo "<FATAL>: ls should fail" && exit 1 || true
umount_jfs /tmp/jfs2 $META_URL2
./juicefs status $META_URL2 && UUID=$(./juicefs status $META_URL2 | grep UUID | cut -d '"' -f 4)
./juicefs destroy --yes $META_URL2 $UUID
}


source .github/scripts/common/run_test.sh && run_test $@


35 changes: 34 additions & 1 deletion .github/scripts/start_meta_engine.sh
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ get_meta_url(){
elif [ "$meta" == "tidb" ]; then
meta_url="mysql://root:@(127.0.0.1:4000)/test"
elif [ "$meta" == "etcd" ]; then
meta_url="etcd://localhost:2379/jfs"
meta_url="etcd://localhost:2379/test"
elif [ "$meta" == "fdb" ]; then
meta_url="fdb:///home/runner/fdb.cluster?prefix=jfs"
elif [ "$meta" == "ob" ]; then
Expand All @@ -217,6 +217,39 @@ get_meta_url(){
return 0
}

get_meta_url2(){
meta=$1
if [ "$meta" == "postgres" ]; then
meta_url="postgres://postgres:[email protected]:5432/test2?sslmode=disable"
elif [ "$meta" == "mysql" ]; then
meta_url="mysql://root:root@(127.0.0.1)/test2"
elif [ "$meta" == "redis" ]; then
meta_url="redis://127.0.0.1:6379/2"
elif [ "$meta" == "sqlite3" ]; then
meta_url="sqlite3://test2.db"
elif [ "$meta" == "tikv" ]; then
meta_url="tikv://127.0.0.1:2379/jfs"
elif [ "$meta" == "badger" ]; then
meta_url="badger:///tmp/test2"
elif [ "$meta" == "mariadb" ]; then
meta_url="mysql://root:root@(127.0.0.1)/test2"
elif [ "$meta" == "tidb" ]; then
meta_url="mysql://root:@(127.0.0.1:4000)/test2"
elif [ "$meta" == "etcd" ]; then
meta_url="etcd://localhost:2379/test2"
elif [ "$meta" == "fdb" ]; then
meta_url="fdb:///home/runner/fdb.cluster?prefix=kfs"
elif [ "$meta" == "ob" ]; then
meta_url="mysql://root:@\\(127.0.0.1:2881\\)/test2"
else
echo >&2 "<FATAL>: meta $meta is not supported"
meta_url=""
return 1
fi
echo $meta_url
return 0
}

create_database(){
meta_url=$1
db_name=$(basename $meta_url | awk -F? '{print $1}')
Expand Down
33 changes: 23 additions & 10 deletions .github/scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import time
from minio import Minio

def flush_meta(meta_url):
def flush_meta(meta_url:str):
print(f'start flush meta: {meta_url}')
if meta_url.startswith('sqlite3://'):
path = meta_url[len('sqlite3://'):]
Expand All @@ -24,25 +24,38 @@ def flush_meta(meta_url):
if os.path.isdir(path):
shutil.rmtree(path)
print(f'remove badger dir {path} succeed')
elif meta_url.startswith('redis://'):
host_port= meta_url[8:].split('/')[0]
elif meta_url.startswith('redis://') or meta_url.startswith('tikv://'):
default_port = {"redis": 6379, "tikv": 2379}
protocol = meta_url.split("://")[0]
host_port= meta_url.split("://")[1].split('/')[0]
if ':' in host_port:
host = host_port.split(':')[0]
port = host_port.split(':')[1]
else:
host = host_port
port = 6379
print(f'flush redis: {host}:{port}')
run_cmd(f'redis-cli -h {host} -p {port} flushall')
print(f'flush redis succeed')
port = default_port[protocol]
db = meta_url.split("://")[1].split('/')[1]
assert db
print(f'flushing {protocol}://{host}:{port}/{db}')
if protocol == 'redis':
run_cmd(f'redis-cli -h {host} -p {port} -n {db} flushdb')
elif protocol == 'tikv':
# TODO: should only flush the specified db
run_cmd(f'echo "delall --yes" |tcli -pd {host}:{port}')
else:
raise Exception(f'{protocol} not supported')
print(f'flush {protocol}://{host}:{port}/{db} succeed')
elif meta_url.startswith('mysql://'):
create_mysql_db(meta_url)
elif meta_url.startswith('postgres://'):
create_postgres_db(meta_url)
elif meta_url.startswith('tikv://'):
run_cmd('echo "delall --yes" |tcli -pd localhost:2379')
elif meta_url.startswith('fdb://'):
run_cmd('''fdbcli -C /home/runner/fdb.cluster --exec "writemode on ; clearrange '' \xFF"''')
# fdb:///home/runner/fdb.cluster?prefix=jfs2
prefix = meta_url.split('?prefix=')[1] if '?prefix=' in meta_url else ""
cluster_file = meta_url.split('fdb://')[1].split('?')[0]
print(f'flushing fdb: cluster_file: {cluster_file}, prefix: {prefix}')
run_cmd(f'echo "writemode on; clearrange {prefix} {prefix}\\xff" | fdbcli -C {cluster_file}')
print(f'flush fdb succeed')
else:
raise Exception(f'{meta_url} not supported')
print('flush meta succeed')
Expand Down
Loading
Loading