-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdashboard.py
394 lines (319 loc) · 21.9 KB
/
dashboard.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
# Import necessary libraries
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import matplotlib.colors as mcolors
import plotly.express as px
from sklearn.decomposition import PCA
import seaborn as sns
import functions.helpers as helpers
# Set the page configuration
st.set_page_config(initial_sidebar_state="collapsed", page_title='Cardiotocography Dashboard', page_icon='🩺')
# Hide sidebar and its pages
st.markdown(
"""
<style>
[data-testid="stSidebar"] {
display: none;
}
</style>
""",
unsafe_allow_html=True,
)
def desaturate_color(color, amount=0.5):
"""
Desaturate a given color by blending it with white.
Parameters:
color: str or tuple - Original color in any format recognized by matplotlib.
amount: float - Amount to desaturate (0.0 is no change, 1.0 is white).
Returns:
tuple - Desaturated color as an RGB tuple.
"""
rgb = mcolors.to_rgb(color)
white = np.array([1, 1, 1])
desaturated_rgb = (1 - amount) * np.array(rgb) + amount * white
return tuple(desaturated_rgb)
def main(featured_df, target_df):
st.title('Cardiotocography Dashboard')
# Show intro text
st.markdown('This dashboard provides an overview of a [Cardiotocography dataset](https://archive.ics.uci.edu/dataset/193/cardiotocography). The dataset contains features of fetal heart rate (FHR) and uterine contractions (UC) and the target variable Normal, Suspect, Pathologic (NSP). Feel free to explore the dataset by selecting a categorical variable from the dropdown menu below.')
st.markdown('### Cardiotocography dataset overview')
# Number of features
# Number of samples
# Number of missing values
# Display overview on one row
col1, col2, col3 = st.columns(3)
with col1:
st.write(f'Number of features: {featured_df.shape[1]}')
with col2:
st.write(f'Number of samples: {featured_df.shape[0]}')
with col3:
st.write(f'Number of missing values: {featured_df.isnull().sum().sum()}')
# Make divider line
st.write('---')
# Create an array for the categorical variables with description
categorical_variables = ["",
'LB: FHR baseline (beats per minute)',
'AC: # of accelerations per second',
'FM: # of fetal movements per second',
'UC: # of uterine contractions per second',
'DL: # of light decelerations per second',
'DS: # of severe decelerations per second',
'DP: # of prolongued decelerations per second',
'ASTV: % time with abnormal short-term variability',
'MSTV: mean value of short term variability',
'ALTV: % time with abnormal long-term variability',
'MLTV: mean value of long term variability',
'Width: width of FHR histogram',
'Min: minimum of FHR histogram',
'Max: maximum of FHR histogram',
'Nmax: # of histogram peaks',
'Nzeros: # of histogram zeros',
'Mode: histogram mode',
'Mean: histogram mean',
'Median: histogram median',
'Variance: histogram variance',
'Tendency: histogram tendency']
# Introduction and explanation of PCA
st.markdown('### PCA - Explained Variance per Measurement')
st.markdown('Principal Component Analysis (PCA) is a mathematical reduction technique that allows to illuminate the most important measurements in the big datasets. The graph below shows the explained variance for each measurement of a patient. The higher the explained variance, the more important that measurement could be for further treatment.')
# Perform PCA
X = featured_df
pca = PCA(n_components=len(X.columns))
X_pca = pca.fit_transform(X)
# Sorting the explained variance ratios and corresponding feature names
explained_variances = pca.explained_variance_ratio_
features = X.columns
indices = np.argsort(explained_variances)[::-1] # Get the indices that would sort the array
sorted_variances = explained_variances[indices]
sorted_features = features[indices]
# Create bar plot for the sorted explained variances
fig, ax = plt.subplots()
bars = ax.barh(sorted_features, sorted_variances, color='green')
ax.set_xlabel('Explained Variance')
ax.set_title('PCA - Explained Variance per Feature')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)
# Add text labels to the bars
for bar in bars:
width = bar.get_width()
label_x_pos = width + 0.02 # adjust this value for label positioning
ax.text(label_x_pos, bar.get_y() + bar.get_height() / 2, f'{width:.2f}', va='center')
# Inverting y-axis to show the largest bar on top
ax.invert_yaxis()
# Display the plot
st.pyplot(fig)
# Make divider line
st.write('---')
# Description of all features
st.markdown('### Description of all features')
all_features = featured_df.select_dtypes(include=[np.number]).columns.tolist()
col1_desc, col2_desc = st.columns(2)
# Show all features button and reset button for the feature descriptions
show_all_features_desc = col1_desc.button('Show all features (desc)', key='show_all_features_desc', help='Click to show all features')
if show_all_features_desc:
selected_features_desc = st.multiselect("Choose features (desc):", all_features, default=all_features, key='multiselect_all_features_desc')
if col2_desc.button('Reset selection (desc)', key='reset_selection_desc', help='Click to reset the selection'):
selected_features_desc = st.multiselect("Choose features (desc):", all_features, default=all_features[:5], key='multiselect_reset_desc')
elif not show_all_features_desc:
selected_features_desc = st.multiselect("Choose features (desc):", all_features, default=all_features[:5], key='multiselect_default_desc')
# Descriptions and sources for the features
feature_descriptions = {
'LB': 'Baseline value: The average heart rate during a 10-minute window, excluding accelerations and decelerations.',
'AC': 'Accelerations: Temporary increases in FHR of at least 15 beats per minute above the baseline, lasting for at least 15 seconds.',
'FM': 'Fetal movements: The number of times the fetus moves during the monitoring period.',
'UC': 'Uterine contractions: The number of contractions during the monitoring period, used to correlate with FHR patterns.',
'ASTV': 'Abnormal Short-Term Variability: The percentage of time with abnormal short term variability, indicating potential distress.',
'MSTV': 'Mean Short-Term Variability: The average beat-to-beat variability of the fetal heart rate.',
'ALTV': 'Abnormal Long-Term Variability: The percentage of time with abnormal long term variability, indicating potential distress.',
'MLTV': 'Mean Long-Term Variability: The average variability over longer periods.',
'DL': 'Light Decelerations: Temporary decreases in FHR, indicating potential distress but usually less severe.',
'DS': 'Severe Decelerations: More significant decreases in FHR, indicating a higher level of distress.',
'DP': 'Prolonged Decelerations: Extended periods of decreased FHR, indicating potential sustained distress.',
'DR': 'Rebound Decelerations: Decreases in FHR that are followed by an increase in FHR, indicating potential sustained distress.',
'Width' : 'The width of the FHR histogram represents the range of heart rate values observed over the monitoring period. It provides an indication of the variability in fetal heart rate, which is crucial for assessing fetal well-being.',
'Min': 'The minimum (low frequency) of the FHR histogram represents the lowest recorded fetal heart rate during the monitoring period. It helps in identifying instances of significant bradycardia, which could indicate fetal distress.',
'Max': 'The maximum (high frequency) of the FHR histogram represents the highest recorded fetal heart rate during the monitoring period. This can highlight episodes of fetal tachycardia, which may be associated with fetal or maternal conditions.',
'NMax': 'number of histogram peaks',
'Nzeros': 'number of histogram zeros',
'Mode': 'The mode of the histogram is the most frequently occurring fetal heart rate value during the monitoring period. It represents the central tendency of the fetal heart rate distribution.',
'Mean': 'mean of the histogram',
'Median': 'median of the histogram',
'Variance': 'The variance of the histogram measures the dispersion of the fetal heart rate values around the mean. High variance indicates greater variability in the fetal heart rate, which can be a sign of fetal well-being or distress.',
'Tendency': 'Histogram tendency indicates the skewness of the fetal heart rate distribution. A value of -1 indicates left asymmetry, 0 indicates symmetry, and 1 indicates right asymmetry. This feature helps in understanding the distribution pattern of the heart rate.',
}
sources = {
'LB': 'Source: [Bioengineering Journal](https://www.mdpi.com/2306-5354/11/4/368#B30-bioengineering-11-00368)',
'AC': 'Source: [Frontiers in Bioengineering](https://www.frontiersin.org/articles/10.3389/fbioe.2022.887549/full)',
'FM': 'Source: [Springer Books](https://link.springer.com/chapter/10.1007/978-3-030-54403-4_4)',
'UC': 'Source: [Bioengineering Journal](https://www.mdpi.com/2306-5354/11/4/368#B30-bioengineering-11-00368)',
'ASTV': 'Source: [Frontiers in Bioengineering](https://www.frontiersin.org/articles/10.3389/fbioe.2022.887549/full)',
'MSTV': 'Source: [Springer Books](https://link.springer.com/chapter/10.1007/978-3-030-54403-4_4)',
'ALTV': 'Source: [Bioengineering Journal](https://www.mdpi.com/2306-5354/11/4/368#B30-bioengineering-11-00368)',
'MLTV': 'Source: [Frontiers in Bioengineering](https://www.frontiersin.org/articles/10.3389/fbioe.2022.887549/full)',
'DL': 'Source: [Springer Books](https://link.springer.com/chapter/10.1007/978-3-030-54403-4_4)',
'DS': 'Source: [Bioengineering Journal](https://www.mdpi.com/2306-5354/11/4/368#B30-bioengineering-11-00368)',
'DP': 'Source: [Frontiers in Bioengineering](https://www.frontiersin.org/articles/10.3389/fbioe.2022.887549/full)',
'DR': 'Source: [Springer Books](https://link.springer.com/chapter/10.1007/978-3-030-54403-4_4)',
'Width' : 'Source: [Journal of Advanced Analytics in Healthcare Management](https://research.tensorgate.org/index.php/JAAHM/article/view/38/44)',
'Min': 'Source: [Journal of Advanced Analytics in Healthcare Management](https://research.tensorgate.org/index.php/JAAHM/article/view/38/44)',
'Max': 'Source: [Journal of Advanced Analytics in Healthcare Management](https://research.tensorgate.org/index.php/JAAHM/article/view/38/44)',
'NMax': 'Source: [Research Article](https://www.researchgate.net/publication/357179891_Investigating_the_interpretability_of_fetal_status_assessment_using_antepartum_cardiotocographic_records)',
'Nzeros': 'Source: [Research Article](https://www.researchgate.net/publication/357179891_Investigating_the_interpretability_of_fetal_status_assessment_using_antepartum_cardiotocographic_records )',
'Mode': 'Source: [Journal of Advanced Analytics in Healthcare Management](https://research.tensorgate.org/index.php/JAAHM/article/view/38/44)',
'Mean': 'Source: [Springer Books](https://link.springer.com/chapter/10.1007/978-3-030-54403-4_4)',
'Median': 'Source: [Springer Books](https://link.springer.com/chapter/10.1007/978-3-030-54403-4_4)',
'Variance': 'Source: [Journal of Advanced Analytics in Healthcare Management](https://research.tensorgate.org/index.php/JAAHM/article/view/38/44)',
'Tendency': 'Source: [Journal of Advanced Analytics in Healthcare Management](https://research.tensorgate.org/index.php/JAAHM/article/view/38/44)',
}
if not selected_features_desc:
st.markdown('Please select features to see their descriptions.')
else:
st.markdown('#### Detailed Descriptions of Selected Features')
st.markdown("""**💡 How to use this feature description?**
This section provides detailed descriptions of the selected features in the dataset.
The descriptions include information about the measurement, its significance, and potential sources for further reading.
Use this information to understand the context of the measurements and their importance in the dataset.""")
st.markdown('')
# Display the descriptions of the selected features
for feature in selected_features_desc:
description = feature_descriptions.get(feature, 'No description available.')
source = sources.get(feature, 'No source available.')
st.markdown(f"**{feature}**: {description}\n{source}")
# Make divider line
st.write('---')
st.markdown('### Overview of all measurements distribution')
all_features = featured_df.select_dtypes(include=[np.number]).columns.tolist()
col1_overview, col2_overview = st.columns(2)
# Show all features button and reset button
show_all_features_overview = col1_overview.button('Show all features', key='show_all_features', help='Click to show all features')
if show_all_features_overview:
selected_features_overview = st.multiselect("Choose features:", all_features, default=all_features)
if col2_overview.button('Reset selection', key='reset_selection', help='Click to reset the selection'):
selected_features_overview = st.multiselect("Choose features:", all_features, default=all_features[:5])
elif not show_all_features_overview:
selected_features_overview = st.multiselect("Choose features:", all_features, default=all_features[:5])
# Intermittent red lines for normal reference values
red_lines = {
'LB': [110, 150],
'AC': [0, 0.013],
'UC': [0, 0.0083],
'DL': [0.00167, 0],
'ASTV': [20, 58],
'MSTV': [0.5, 2.5],
'ALTV': [0, 13],
'MLTV': [4, 17],
'Width': [25, 140],
}
st.markdown("""**💡 How to use this overview?**""")
st.markdown("""This overview displays the distribution of selected measurements in the dataset and should give you an idea of the range and spread of the data.
In some cases, normal reference values are indicated by intermittent red lines. These values can help you interpret the data in the context of typical measurements.""")
if len(selected_features_overview) > 1:
# Density plot for selected features
n_cols = 2
n_rows = (len(selected_features_overview) + 1) // n_cols
fig, axes = plt.subplots(n_rows, n_cols, figsize=(18, n_rows * 6), constrained_layout=True)
# Letter size for the subplots
for ax in axes.flatten():
for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] +
ax.get_xticklabels() + ax.get_yticklabels()):
item.set_fontsize(18)
handles_dict = {
'Normal': Rectangle((0, 0), 2, 1, color=desaturate_color('green', 0.5)),
'Suspect': Rectangle((0, 0), 2, 1, color=desaturate_color('blue', 0.5)),
'Pathologic': Rectangle((0, 0), 2, 1, color=desaturate_color('red', 0.5)),
'Normal reference value': plt.Line2D([0], [0], color='red', linestyle='--', linewidth=1)
}
for i, column in enumerate(featured_df[selected_features_overview].columns):
if n_rows == 1:
ax = axes[i % n_cols]
else:
ax = axes[i // n_cols, i % n_cols]
description = next((desc for desc in categorical_variables if desc.startswith(column)), column)
sns.kdeplot(data=featured_df, x=column, hue=target_df['NSP_Label'], fill=True,
palette={'Normal': 'green', 'Suspect': 'blue', 'Pathologic': 'red'}, ax=ax)
# Add intermittent red lines
if column in red_lines:
for line in red_lines[column]:
ax.axvline(line, color='red', linestyle='--', linewidth=1, label='Normal reference value')
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_title(f'Distribution of {description}', fontsize=18, fontweight='bold')
# Prepare the ordered handles and labels for the legend
labels_order = ['Normal', 'Suspect', 'Pathologic', 'Normal reference value']
handles = [handles_dict[label] for label in labels_order]
ax.get_legend().remove()
# Add legend outside of the subplots
if n_rows == 1:
fig.legend(handles=handles, labels=labels_order, loc='upper left', bbox_to_anchor=(0, 1.38), fontsize=18, title='NSP Label', title_fontsize='18')
else:
fig.legend(handles=handles, labels=labels_order, loc='upper left', bbox_to_anchor=(0, 1.2 - (len(selected_features_overview) / 2) * 0.015), fontsize=18, title='NSP Label', title_fontsize='18')
# Hide any unused subplots
for j in range(i + 1, n_rows * n_cols):
fig.delaxes(axes.flatten()[j])
st.pyplot(fig)
else:
# Density plot for selected features
fig, ax = plt.subplots(figsize=(12, 6))
for column in selected_features_overview:
description = next((desc for desc in categorical_variables if desc.startswith(column)), column)
sns.kdeplot(data=featured_df, x=column, hue=target_df['NSP_Label'], fill=True,
palette={'Normal': 'green', 'Suspect': 'blue', 'Pathologic': 'red'}, ax=ax)
# Add intermittent red lines
if column in red_lines:
for line in red_lines[column]:
ax.axvline(line, color='red', linestyle='--', linewidth=1)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.set_title(f'Distribution of {description}', fontsize=18, fontweight='bold')
# add line for normal reference values to the legend
handles_dict = {
'Normal': Rectangle((0, 0), 2, 1, color=desaturate_color('green', 0.5)),
'Suspect': Rectangle((0, 0), 2, 1, color=desaturate_color('blue', 0.5)),
'Pathologic': Rectangle((0, 0), 2, 1, color=desaturate_color('red', 0.5)),
'Normal reference value': plt.Line2D([0], [0], color='red', linestyle='--', linewidth=1)
}
# Prepare the ordered handles and labels for the legend
labels_order = ['Normal', 'Suspect', 'Pathologic', 'Normal reference value']
handles = [handles_dict[label] for label in labels_order]
ax.get_legend().remove()
# Add legend inside the plot
fig.legend(handles=handles, labels=labels_order, loc='upper right', bbox_to_anchor=(0.9, 0.8), fontsize=11, title='NSP Label', title_fontsize='13')
st.pyplot(fig)
#### Heatmap
## Dropdown for correlation heatmap
show_correlation = st.selectbox('Are you interested to learn more about correlation in measurements?', ('Maybe later', 'Yes'))
if show_correlation == 'Yes':
st.write('---')
st.markdown('### Correlation Heatmap')
col1_corr, col2_corr = st.columns(2)
# Show all features button and reset button
show_all_features_corr = col1_corr.button('Show all features', key='show_all_features_corr', help='Click to show all features')
if show_all_features_corr:
selected_features_corr = st.multiselect("Choose features:", all_features, default=all_features, key='multiselect_all_features_corr')
if col2_corr.button('Reset selection', key='reset_selection_corr', help='Click to reset the selection'):
selected_features_corr = st.multiselect("Choose features:", all_features, default=all_features[:5], key='multiselect_reset_corr')
elif not show_all_features_corr:
selected_features_corr = st.multiselect("Choose features:", all_features, default=all_features[:5], key='multiselect_default_corr')
# Correlation heatmap
if len(selected_features_corr) > 1:
st.markdown('#### Correlation Heatmap of Selected Measurements')
st.markdown("""
**💡 How to use this correlation matrix?**
This correlationheatmap displays the relationships between various measurements in a patient's CTG data.
Darker shades of red signify stronger positive correlations, while lighter shades indicate weaker correlations.
Look for strong positive or negative correlations, as they may indicate significant information.
1 means positive correlation, -1 represents negative correlation, 0 indicates no correlation.
""")
corr_matrix = featured_df[selected_features_corr].corr()
corr_matrix = corr_matrix.round(2)
heatmap_fig = px.imshow(corr_matrix, text_auto=True, labels=dict(x="Feature", y="Feature", color="Correlation"), aspect="auto", color_continuous_scale='RdBu_r', zmin=-1, zmax=1)
st.plotly_chart(heatmap_fig, use_container_width=True)
# show button to localhost:8501/tryout
st.link_button('Try your own data', '/tryout')
if __name__ == '__main__':
featured_df, target_df = helpers.loaddata()
main(featured_df, target_df)