diff --git a/apps/io_tester/test_cases/one_cpu_starved_shard_can_still_saturate_io.sh b/apps/io_tester/test_cases/one_cpu_starved_shard_can_still_saturate_io.sh new file mode 100644 index 0000000000..1854f4dd0b --- /dev/null +++ b/apps/io_tester/test_cases/one_cpu_starved_shard_can_still_saturate_io.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# Test scenario: +# A single CPU-starved shard has a batch IO job. +# Goal: it should be able to utilize the entire bandwidth of the disk, +# despite the rare polls. + +if [ $# -ne 1 ]; then + echo "Usage: $0 IO_TESTER_EXECUTABLE" >&2 + exit 1 +fi + +"$1" --smp=7 --storage=/dev/null --conf=<(cat <<'EOF' +- name: tablet-streaming + data_size: 1GB + shards: [0] + type: seqread + shard_info: + parallelism: 50 + reqsize: 128kB + shares: 200 +- name: cpuhog + type: cpu + shards: [0] + shard_info: + parallelism: 1 + execution_time: 550us + +EOF +) --io-properties-file=<(cat <<'EOF' +# i4i.2xlarge +disks: +- mountpoint: /dev + read_bandwidth: 1542559872 + read_iops: 218786 + write_bandwidth: 1130867072 + write_iops: 121499 +EOF +) diff --git a/apps/io_tester/test_cases/one_cpu_starved_shard_has_reasonable_fairness.sh b/apps/io_tester/test_cases/one_cpu_starved_shard_has_reasonable_fairness.sh new file mode 100644 index 0000000000..27dd05378b --- /dev/null +++ b/apps/io_tester/test_cases/one_cpu_starved_shard_has_reasonable_fairness.sh @@ -0,0 +1,39 @@ +#!/usr/bin/env bash + +# Test scenario: +# all shards contend for IO, but one shard is additionally CPU-starved +# and polls rarely. +# Goal: it should still be getting a reasonably fair share of disk bandwidth. + +if [ $# -ne 1 ]; then + echo "Usage: $0 IO_TESTER_EXECUTABLE" >&2 + exit 1 +fi + +"$1" --smp=7 --storage=/dev/null --conf=<(cat <<'EOF' +- name: tablet-streaming + data_size: 1GB + shards: all + type: seqread + shard_info: + parallelism: 50 + reqsize: 128kB + shares: 200 +- name: cpuhog + type: cpu + shards: [0] + shard_info: + parallelism: 1 + execution_time: 550us + +EOF +) --io-properties-file=<(cat <<'EOF' +# i4i.2xlarge +disks: +- mountpoint: /dev + read_bandwidth: 1542559872 + read_iops: 218786 + write_bandwidth: 1130867072 + write_iops: 121499 +EOF +) --duration=2 diff --git a/apps/io_tester/test_cases/scylla_tablet_migration.sh b/apps/io_tester/test_cases/scylla_tablet_migration.sh new file mode 100644 index 0000000000..1caffe558d --- /dev/null +++ b/apps/io_tester/test_cases/scylla_tablet_migration.sh @@ -0,0 +1,77 @@ +#!/usr/bin/env bash + +# Test scenario: +# Simulation of a ScyllaDB workload which prompted some changes to the IO scheduler: +# database queries concurrent with tablet streaming. +# +# All 7 shards are running a low-priority (200 shares) batch IO workload +# and a high-priority (1000 shares), moderate-bandwidth, interactive workload. +# +# The interactive workload requires about 30% of the node's +# total bandwidth (as measured in tokens), in small random reads. +# The batch workload does large sequential reads and wants to utilize all +# spare bandwidth. +# +# This workload is almost symmetric across shards, but is slightly skewed +# and shard 0 is slightly more loaded. But even on this shard, the workload +# doesn't need more than 35% of the fair bandwidth of this shard. +# +# Due to the distribution of shares across IO classes, the user expects that +# the interactive workload should be guaranteed (1000 / (1000 + 200)) == ~84% of +# the disk bandwidth on each shard. So if it's only asking for less than 35%, +# the lower-priority job shouldn't disturb it. +# +# But before the relevant IO scheduler changes, this goal wasn't met, +# and the interactive workload on shard 0 was instead starved for IO +# by the low-priority workloads on other shards. + +if [ $# -ne 1 ]; then + echo "Usage: $0 IO_TESTER_EXECUTABLE" >&2 + exit 1 +fi + +"$1" --smp=7 --storage=/dev/null --conf=<(cat <<'EOF' +- name: tablet-streaming + data_size: 1GB + shards: all + type: seqread + shard_info: + parallelism: 50 + reqsize: 128kB + shares: 200 +- name: cassandra-stress + shards: all + type: randread + data_size: 1GB + shard_info: + parallelism: 100 + reqsize: 1536 + shares: 1000 + rps: 75 + options: + pause_distribution: poisson + sleep_type: steady +- name: cassandra-stress-slight-imbalance + shards: [0] + type: randread + data_size: 1GB + shard_info: + parallelism: 100 + reqsize: 1536 + class: cassandra-stress + rps: 10 + options: + pause_distribution: poisson + sleep_type: steady + +EOF +) --io-properties-file=<(cat <<'EOF' +# i4i.2xlarge +disks: +- mountpoint: /dev + read_bandwidth: 1542559872 + read_iops: 218786 + write_bandwidth: 1130867072 + write_iops: 121499 +EOF +) diff --git a/apps/io_tester/test_cases/tau_nemesis.sh b/apps/io_tester/test_cases/tau_nemesis.sh new file mode 100644 index 0000000000..92031e38d4 --- /dev/null +++ b/apps/io_tester/test_cases/tau_nemesis.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash + +# There is a `tau` mechanism in `fair_queue` which lets newly-activated +# IO classes to monopolize the shard's IO queue for a while. +# +# This isn't very useful and can result in major performance problems, +# as this test illustrates. The `highprio` workload could have tail latency +# of about 2 milliseconds, but the `bursty_lowprio` is allowed by `tau` to butt in +# periodically and preempt `highprio` for ~30ms, bringing its tail latency +# to that threshold. + +if [ $# -ne 1 ]; then + echo "Usage: $0 IO_TESTER_EXECUTABLE" >&2 + exit 1 +fi + +"$1" --smp=7 --storage=/dev/null --conf=<(cat <<'EOF' +- name: filler + data_size: 1GB + shards: all + type: seqread + shard_info: + parallelism: 10 + reqsize: 128kB + shares: 10 +- name: bursty_lowprio + data_size: 1GB + shards: all + type: seqread + shard_info: + parallelism: 1 + reqsize: 128kB + shares: 100 + batch: 50 + rps: 8 +- name: highprio + shards: all + type: randread + data_size: 1GB + shard_info: + parallelism: 100 + reqsize: 1536 + shares: 1000 + rps: 50 + options: + pause_distribution: poisson + sleep_type: steady +EOF +) --io-properties-file=<(cat <<'EOF' +# i4i.2xlarge +disks: +- mountpoint: /dev + read_bandwidth: 1542559872 + read_iops: 218786 + write_bandwidth: 1130867072 + write_iops: 121499 +EOF +)