From 1cf7fc071130775d112b98a4785c07c372173601 Mon Sep 17 00:00:00 2001 From: Nick Tehrany Date: Mon, 14 Nov 2022 13:39:51 +0000 Subject: [PATCH 1/5] Fix tracing secnum --- zns.trace/trace.bt | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/zns.trace/trace.bt b/zns.trace/trace.bt index ec921f3..961dc46 100644 --- a/zns.trace/trace.bt +++ b/zns.trace/trace.bt @@ -2,6 +2,12 @@ #include #include +/* NOTE, the values are defined as 512B sector size + * Change the below define to 12 for 4K sector size + */ + +#define SECTOR_SHIFT 9 + BEGIN { if($# != 2) { printf("Invalid args. Requires [dev name] [Zone Size]."); @@ -16,7 +22,7 @@ k:nvme_setup_cmd / ((struct request *)arg1)->q->disk->disk_name == str($1) / { $nvme_cmd = (struct nvme_command *)*(arg1+sizeof(struct request)); $opcode = (uint8)$nvme_cmd->rw.opcode; - $secnum = $nvme_cmd->rw.slba; + $secnum = ((struct request *)arg1)->__sector; // Bitwise And to get zone starting LBA with zone MASK $zlbas = ($secnum & @ZONE_MASK); @@ -26,11 +32,11 @@ k:nvme_setup_cmd / ((struct request *)arg1)->q->disk->disk_name == str($1) / { @z_rw_ctr_map[$zlbas, nvme_cmd_write]++; // Convert data_len to 512B sectors - $data_len = (((struct request *)arg1)->__data_len >> 9); + $data_len = (((struct request *)arg1)->__data_len >> SECTOR_SHIFT); @z_data_map[$zlbas, nvme_cmd_write] = @z_data_map[$zlbas, nvme_cmd_write] + $data_len; if(@logging == 1) { - printf("w_cmd at ZLBAS: %ld size: %d\n", $zlbas, $data_len); + printf("w_cmd at : <%ld, %d, %d>\n", $zlbas, ($zlbas / $2) + 1, $data_len); } } @@ -40,11 +46,11 @@ k:nvme_setup_cmd / ((struct request *)arg1)->q->disk->disk_name == str($1) / { @z_rw_ctr_map[$zlbas, nvme_cmd_read]++; // Convert data_len to 512B sectors - $data_len = (((struct request *)arg1)->__data_len >> 9); + $data_len = (((struct request *)arg1)->__data_len >> SECTOR_SHIFT); @z_data_map[$zlbas, nvme_cmd_read] = @z_data_map[$zlbas, nvme_cmd_read] + $data_len; if(@logging == 1) { - printf("r_cmd at ZLBAS: %ld size: %d\n", $zlbas, $data_len); + printf("r_cmd at : <%ld, %d, %d>\n", $zlbas, ($zlbas / $2) + 1, $data_len); } } @@ -59,7 +65,7 @@ k:nvme_setup_cmd / ((struct request *)arg1)->q->disk->disk_name == str($1) / { } if(@logging == 1 && $cmd != REQ_OP_DRV_OUT) { - printf("reset_cmd zlbas: %ld\n", $zlbas); + printf("reset_cmd : <%ld, %d>\n", $zlbas, ($zlbas / $2) + 1); } @z_reset_ctr_map[$zlbas]++; From bcb5fa67ceb6c68b377d31a609098460df530166 Mon Sep 17 00:00:00 2001 From: Nick Tehrany Date: Mon, 14 Nov 2022 13:42:44 +0000 Subject: [PATCH 2/5] Readme update --- zns.trace/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zns.trace/README.md b/zns.trace/README.md index ad8a63a..40d1f65 100644 --- a/zns.trace/README.md +++ b/zns.trace/README.md @@ -12,6 +12,8 @@ To run the tracing, simply provide the script with a ZNS device to trace, and pr The python plotting script will directly be called, however if for some reason you have data that has not been plotted you can run the python script itself with `python3 plot.py`. **Note** however, that it takes the zone size and number of zones as arguments, and therefore attempts to create figures for all data with these values. If a figure for a particular data file already exists, this data will be skipped an no new figure is generated. Therefore, in the case there are multiple data files without figures, and with different ZNS devices, simply move the files from different devices to a temporary directory and plot only data for one device at a time. Since it does not regenerate existing figures, this way you can iteratively generate figures for all data files. Or move generated data and files to different directories, we do not have an effective way to integrate this for everyone, therefore this part involves individual configuration. +**NOTE,** the script has the sector size hardcoded to 512B, for 4K sector size change the define to `SECTOR_SHIFT 12`. + ## Requirements The main requirements is for the Kernel to be built with `BPF` enabled, and [`bpftrace`](https://github.com/iovisor/bpftrace) to be installed. See their [install manual](https://github.com/iovisor/bpftrace/blob/master/INSTALL.md) for an installation guide. For plotting we provide a `requirements.txt` file with libs to install. Run `pip install -r requirements.txt` to install them. If there are version errors for `numpy` during installing, using an older `numpy` version is typically fine, as utilize only the very basics of it. From 9d7adba6134bea9e55b9197a0e64173090462a98 Mon Sep 17 00:00:00 2001 From: Nick Tehrany Date: Mon, 14 Nov 2022 14:06:11 +0000 Subject: [PATCH 3/5] Fix zone reset secnum calc Secnum with zone resets is only in the nvme command and not in request, hence need to check there in that case --- zns.trace/trace.bt | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/zns.trace/trace.bt b/zns.trace/trace.bt index 961dc46..3878483 100644 --- a/zns.trace/trace.bt +++ b/zns.trace/trace.bt @@ -36,7 +36,7 @@ k:nvme_setup_cmd / ((struct request *)arg1)->q->disk->disk_name == str($1) / { @z_data_map[$zlbas, nvme_cmd_write] = @z_data_map[$zlbas, nvme_cmd_write] + $data_len; if(@logging == 1) { - printf("w_cmd at : <%ld, %d, %d>\n", $zlbas, ($zlbas / $2) + 1, $data_len); + printf("w_cmd at : <%lld, %d, %d>\n", $secnum, $zlbas / $2, $data_len); } } @@ -50,7 +50,7 @@ k:nvme_setup_cmd / ((struct request *)arg1)->q->disk->disk_name == str($1) / { @z_data_map[$zlbas, nvme_cmd_read] = @z_data_map[$zlbas, nvme_cmd_read] + $data_len; if(@logging == 1) { - printf("r_cmd at : <%ld, %d, %d>\n", $zlbas, ($zlbas / $2) + 1, $data_len); + printf("r_cmd at : <%ld, %d, %d>\n", $secnum, $zlbas / $2, $data_len); } } @@ -60,12 +60,15 @@ k:nvme_setup_cmd / ((struct request *)arg1)->q->disk->disk_name == str($1) / { // If nvme device is in passthrough (e.g., qemu passthrough) Zone reset has flag REQ_OP_DRV_OUT // therefore include more checks on nvme_zone_mgnt_action if($cmd == REQ_OP_ZONE_RESET || (($cmd == REQ_OP_DRV_OUT && $opcode == nvme_cmd_zone_mgmt_send) && $nvme_cmd->zms.zsa == NVME_ZONE_RESET)) { + $secnum = $nvme_cmd->rw.slba; + $zlbas = ($secnum & @ZONE_MASK); + if(@logging == 1 && $cmd == REQ_OP_DRV_OUT) { - printf("reset_cmd (passthrough mode) zlbas: %ld\n", $zlbas); + printf("reset_cmd (passthrough mode) : <%ld, %d>\n", $secnum, $zlbas / $2); } if(@logging == 1 && $cmd != REQ_OP_DRV_OUT) { - printf("reset_cmd : <%ld, %d>\n", $zlbas, ($zlbas / $2) + 1); + printf("reset_cmd : <%ld, %d>\n", $secnum, $zlbas / $2); } @z_reset_ctr_map[$zlbas]++; @@ -96,11 +99,11 @@ k:nvme_complete_rq / ((struct request *)arg0)->q->disk->disk_name == str($1) / { // If nvme device is in passthrough (e.g., qemu passthrough) Zone reset has flag REQ_OP_DRV_OUT if(@logging == 1 && $cmd == REQ_OP_DRV_OUT) { - printf("completed reset_cmd (passthrough mode) zlbas %ld in: %d\n", $zlbas, @z_reset_lat_map[$zlbas, @z_reset_ctr_map[$zlbas]]); + printf("completed reset_cmd (passthrough mode) zone %ld in: %d\n", $zlbas / $2, @z_reset_lat_map[$zlbas, @z_reset_ctr_map[$zlbas]]); } if(@logging == 1 && $cmd != REQ_OP_DRV_OUT) { - printf("completed reset_cmd zlbas %ld in: %d\n", $zlbas, @z_reset_lat_map[$zlbas, @z_reset_ctr_map[$zlbas]]); + printf("completed reset_cmd zone %ld in: %d\n", $zlbas / $2, @z_reset_lat_map[$zlbas, @z_reset_ctr_map[$zlbas]]); } } } From 68bec7d450fec03831e9d4af6d65fa8befcb678c Mon Sep 17 00:00:00 2001 From: Nick Tehrany Date: Mon, 14 Nov 2022 14:16:49 +0000 Subject: [PATCH 4/5] Update --- zns.trace/trace.bt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zns.trace/trace.bt b/zns.trace/trace.bt index 3878483..75014c4 100644 --- a/zns.trace/trace.bt +++ b/zns.trace/trace.bt @@ -99,11 +99,11 @@ k:nvme_complete_rq / ((struct request *)arg0)->q->disk->disk_name == str($1) / { // If nvme device is in passthrough (e.g., qemu passthrough) Zone reset has flag REQ_OP_DRV_OUT if(@logging == 1 && $cmd == REQ_OP_DRV_OUT) { - printf("completed reset_cmd (passthrough mode) zone %ld in: %d\n", $zlbas / $2, @z_reset_lat_map[$zlbas, @z_reset_ctr_map[$zlbas]]); + printf("completed reset_cmd (passthrough mode) zone %ld in (usec): %d\n", $zlbas / $2, @z_reset_lat_map[$zlbas, @z_reset_ctr_map[$zlbas]] / 1000); } if(@logging == 1 && $cmd != REQ_OP_DRV_OUT) { - printf("completed reset_cmd zone %ld in: %d\n", $zlbas / $2, @z_reset_lat_map[$zlbas, @z_reset_ctr_map[$zlbas]]); + printf("completed reset_cmd zone %ld in (usec): %d\n", $zlbas / $2, @z_reset_lat_map[$zlbas, @z_reset_ctr_map[$zlbas]] / 1000); } } } From 72fff678ff9424bf64d3aaeaae2b926391fdacf2 Mon Sep 17 00:00:00 2001 From: Nick Tehrany Date: Mon, 14 Nov 2022 14:43:15 +0000 Subject: [PATCH 5/5] Fix trace req type --- zns.trace/trace.bt | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/zns.trace/trace.bt b/zns.trace/trace.bt index 75014c4..120f356 100644 --- a/zns.trace/trace.bt +++ b/zns.trace/trace.bt @@ -20,6 +20,7 @@ BEGIN { k:nvme_setup_cmd / ((struct request *)arg1)->q->disk->disk_name == str($1) / { $nvme_cmd = (struct nvme_command *)*(arg1+sizeof(struct request)); + $cmd = (((struct request *)arg1)->cmd_flags & REQ_OP_MASK); $opcode = (uint8)$nvme_cmd->rw.opcode; $secnum = ((struct request *)arg1)->__sector; @@ -27,7 +28,7 @@ k:nvme_setup_cmd / ((struct request *)arg1)->q->disk->disk_name == str($1) / { $zlbas = ($secnum & @ZONE_MASK); // Trace Write and Append command counters and I/O sizes - if($opcode == nvme_cmd_write || $opcode == nvme_cmd_zone_append) { + if($cmd == REQ_OP_WRITE || $cmd == REQ_OP_ZONE_APPEND) { // Store zone operation counter map under ZLBAS, operation 0x01 for write and append @z_rw_ctr_map[$zlbas, nvme_cmd_write]++; @@ -41,7 +42,7 @@ k:nvme_setup_cmd / ((struct request *)arg1)->q->disk->disk_name == str($1) / { } // Trace Read command counter and total I/O sizes - if($opcode == nvme_cmd_read) { + if($cmd == REQ_OP_READ) { // Store zone operation counter map under ZLBAS, operation 0x01 for write and append @z_rw_ctr_map[$zlbas, nvme_cmd_read]++; @@ -54,9 +55,6 @@ k:nvme_setup_cmd / ((struct request *)arg1)->q->disk->disk_name == str($1) / { } } - // Trace ZONE RESETS - $cmd = (((struct request *)arg1)->cmd_flags & REQ_OP_MASK); - // If nvme device is in passthrough (e.g., qemu passthrough) Zone reset has flag REQ_OP_DRV_OUT // therefore include more checks on nvme_zone_mgnt_action if($cmd == REQ_OP_ZONE_RESET || (($cmd == REQ_OP_DRV_OUT && $opcode == nvme_cmd_zone_mgmt_send) && $nvme_cmd->zms.zsa == NVME_ZONE_RESET)) {