Made benchmarking scripts more consistent and rounded average results…

… to four decimal places.
JanCBrammer · Sep 29, 2020 · cb89f3a · cb89f3a
1 parent 6685c6e
commit cb89f3a
Show file tree

Hide file tree

Showing 5 changed files with 24 additions and 24 deletions.
diff --git a/biopeaks/benchmarks/benchmark_ECG_local.py b/biopeaks/benchmarks/benchmark_ECG_local.py
@@ -9,7 +9,7 @@
 
 data_dir = Path(".../experiment_data")    # replace with your local "experiment_data" directory once you've downloaded the database
 
-condition = "hand_bike"    # can be one of {"sitting", "maths", "walking", "hand_bike", "jogging"}
+condition = "sitting"    # can be one of {"sitting", "maths", "walking", "hand_bike", "jogging"}
 sfreq = 250
 tolerance = 1    # in samples
 print(f"Setting tolerance for match between algorithmic and manual annotation"
@@ -44,8 +44,8 @@
         fp = comparitor.fp
         fn = comparitor.fn
 
-        sensitivity.append(float(tp) / (tp + fn))
-        precision.append(float(tp) / (tp + fp))
+        sensitivity.append(tp / (tp + fn))
+        precision.append(tp / (tp + fp))
 
         print(f"\nResults {subject}")
         print("-" * len(str(subject)))
@@ -54,5 +54,5 @@
 
 print(f"\nAverage results over {len(precision)} records")
 print("-" * 31)
-print(f"sensitivity: mean = {np.mean(sensitivity)}, std = {np.std(sensitivity)}")
-print(f"precision: mean = {np.mean(precision)}, std = {np.std(precision)}")
+print(f"sensitivity: mean = {round(np.mean(sensitivity), 4)}, std = {round(np.std(sensitivity), 4)}")
+print(f"precision: mean = {round(np.mean(precision), 4)}, std = {round(np.std(precision), 4)}")
diff --git a/biopeaks/benchmarks/benchmark_ECG_stream.py b/biopeaks/benchmarks/benchmark_ECG_stream.py
@@ -1,8 +1,8 @@
 from biopeaks.heart import ecg_peaks
 from benchmark_utils import BenchmarkDetectorGUDB
 
-condition = "hand_bike"    # can be one of {"sitting", "maths", "walking", "hand_bike", "jogging"}
+condition = "sitting"    # can be one of {"sitting", "maths", "walking", "hand_bike", "jogging"}
 tolerance = 1    # in samples
 
 pipeline = BenchmarkDetectorGUDB(ecg_peaks, tolerance)
-pipeline.benchmark_records(condition, channel="cs_V2_V1", annotation="annotation_cs")
+pipeline.benchmark_records(condition, channel="einthoven_II", annotation="annotation_cables")
diff --git a/biopeaks/benchmarks/benchmark_PPG_local.py b/biopeaks/benchmarks/benchmark_PPG_local.py
@@ -25,13 +25,13 @@
 
     peaks = ppg_peaks(record, sfreq)
 
-    comparitor = compare_annotations(peaks, annotation, tolerance)
+    comparitor = compare_annotations(annotation, peaks, tolerance)
     tp = comparitor.tp
     fp = comparitor.fp
     fn = comparitor.fn
 
-    sensitivity.append(float(tp) / (tp + fn))
-    precision.append(float(tp) / (tp + fp))
+    sensitivity.append(tp / (tp + fn))
+    precision.append(tp / (tp + fp))
 
     print(f"\nResults {subject}")
     print("-" * len(str(subject)))
@@ -40,5 +40,5 @@
 
 print(f"\nAverage results over {len(precision)} records")
 print("-" * 31)
-print(f"sensitivity: mean = {np.mean(sensitivity)}, std = {np.std(sensitivity)}")
-print(f"precision: mean = {np.mean(precision)}, std = {np.std(precision)}")
+print(f"sensitivity: mean = {round(np.mean(sensitivity), 4)}, std = {round(np.std(sensitivity), 4)}")
+print(f"precision: mean = {round(np.mean(precision), 4)}, std = {round(np.std(precision), 4)}")
diff --git a/biopeaks/benchmarks/benchmark_utils.py b/biopeaks/benchmarks/benchmark_utils.py
@@ -61,7 +61,7 @@ async def score_record(self, record, annotation):
         """
         detector_annotation = self.detector(record, self.sfreq)
 
-        comparitor = compare_annotations(detector_annotation, annotation,
+        comparitor = compare_annotations(annotation, detector_annotation,
                                          self.tolerance)
         tp = comparitor.tp
         fp = comparitor.fp
@@ -170,16 +170,16 @@ async def benchmark_record(self):
         print(f"\nAverage results over {len(precisions)} records")
         print("-" * 31)
 
-        mean_avg_time = np.mean(avg_times)
-        std_avg_time = np.std(avg_times)
+        mean_avg_time = round(np.mean(avg_times), 4)
+        std_avg_time = round(np.std(avg_times), 4)
         print(f"average run time over {self.n_runs} runs: mean = {mean_avg_time}, std = {std_avg_time}")
 
-        mean_sensitivity = np.mean(sensitivities)
-        std_sensitivity = np.std(sensitivities)
+        mean_sensitivity = round(np.mean(sensitivities), 4)
+        std_sensitivity = round(np.std(sensitivities), 4)
         print(f"sensitivity: mean = {mean_sensitivity}, std = {std_sensitivity}")
 
-        mean_precision = np.mean(precisions)
-        std_precision = np.std(precisions)
+        mean_precision = round(np.mean(precisions), 4)
+        std_precision = round(np.std(precisions), 4)
         print(f"precision: mean = {mean_precision}, std = {std_precision}")
 
 

diff --git a/paper/paper.md b/paper/paper.md
@@ -57,14 +57,14 @@ the relevant local extrema are inhalation peaks and exhalation troughs. `biopeak
 using three biosignal-specific algorithms. Breathing extrema are detected using a variant of the "zero-crossing algorithm
 with amplitude threshold" [@khodadad]. Systolic peaks in PPG signals are identified using an implementation of "Method IV;
 Event-Related Moving Averages with Dynamic Threshold" introduced by Elgendi et al. [@elgendi]. Lastly, the ECG R-peak detector is a
-custom algorithm that has been evaluated on the Glasgow University Database (GUDB) [@gudb] which contains ECG signals along with R-peak annotations. The performance of the R-peak detector has been evaluated in terms of sensitivity (aka recall; i.e., how many of the correct extrema were detected?) and precision (i.e., how many of the detected extrema are correct extrema?). Peak detection has been evaluated on all 25 records using the ECG channel corresponding to Einthoven lead II. The tolerance for true positive peak detection was set to one sample. The GUDB has not been used to optimize the R-peak detector prior to the performance evaluation. The performance at rest (sitting) and in dynamic conditions (handbike) is as follows:
+custom algorithm that has been evaluated on the Glasgow University Database (GUDB) [@gudb] which contains ECG signals along with R-peak annotations. The performance of the R-peak detector has been evaluated in terms of sensitivity (aka recall; i.e., how many of the correct extrema were detected?) and precision (i.e., how many of the detected extrema are correct extrema?). Peak detection has been evaluated on the records of all 25 participants included in the GUDB using the ECG channel corresponding to Einthoven lead II. The tolerance for true positive peak detection was set to one sample. The GUDB has not been used to optimize the R-peak detector prior to the performance evaluation. The performance at rest (sitting, 25 records) and in dynamic conditions (handbike, 24 records due to the missing R-peak annotations of participant 04) is as follows:
 
 |           |    |sitting|handbike|
 |:---------:|:--:|:-----:|:------:|
-|precision  |mean|.998   |.984    |
-|           |std |.002   |.022    |
-|sensitivity|mean|.998   |.984    |
-|           |std |.004   |.025    |
+|precision  |mean|.9995  |.9855   |
+|           |std |.0017  |.0234   |
+|sensitivity|mean|.9974  |.9853   |
+|           |std |.0037  |.0250   |
 
 The code for performance evaluation is included in the `biopeaks` installation and can be run without downloading the GUDB (the database is streamed).
 Despite the robust performance of the extrema detectors, algorithmically identified extrema can be misplaced (false positives) or extrema might be missed (false negatives),