-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreal_hil_example_cfg.yml
112 lines (97 loc) · 3.99 KB
/
real_hil_example_cfg.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
general_paths:
state_dict_path: "absolut_path_to_the_folder_containing_the_dino_model/ctc_dino.pt"
# Example path pattern for DataFrames of train and test cases (case = patient):
# It's assumed that all case-specific DataFrames follow a consistent directory pattern.
# Labeled and unlabeled DataFrames should be in the same directory for a case.
# Each DataFrame for a case must include the following columns:
# - "case": unique case identifier
# - "label": image label
# - "dapi_path": path to the DAPI channel image (cropped image (e.g. png), not raw cartridge image)
# - "ck_path": path to the CK channel image (cropped image (e.g. png), not raw cartridge image)
# - "cd45_path": path to the CD45 channel image (cropped image (e.g. png), not raw cartridge image)
# Use root_path if channel paths in the dataframes are relative; set to "" if absolute paths are used.
# Here are two example paths:
example_path_to_df_of_train_case: "absolute_path_to/{train_case_id}/dataframe/{train_case_id}_labeled_dataframe"
example_path_to_df_of_test_case: "absolute_path_to/{test_case_id}/dataframe/{test_case_id}_labeled_dataframe"
# Root path for channel images if their paths ("dapi_path", "ck_path", "cd45_path") in the DataFrames
# (meaning: example_path_to_df_of_train_case and example_path_to_df_of_test_case) are relative.
# If absolute paths are used for the channel columns, leave root_path as an empty string: "".
root_path: ""
######################################################################################################################
# OPTIONAL:
# Absolute path to the dataframe used to train the Noisy Classifier.
# Should include columns like "label", "dapi_path", "ck_path", "cd45_path", and optionally "case" as a case identifier.
path_to_df_train_noisy_classifier: "absolute_path_to_the_dataframe"
######################################################################################################################
# Absolute path for storing results. Use the same path to resume a paused relabeling process.
path_to_results_folder: "absolute_path_to_your_results_folder"
# Provide lists of case IDs for test and training cases.
# The IDs should match those in the example_path_to_df_of_train_case (and for example_path_to_df_of_test_case).
test_cases: [
'case_1',
'case_2',
'case_3',
'case_4',
'case_5',
'case_6',
'case_7',
'case_8',
'case_9',
'case_10'
]
train_cases: [
'case_11',
'case_12',
'case_13',
'case_14',
'case_15',
'case_16',
'case_17',
'case_18',
'case_19',
'case_20'
]
device: 0
noisy_classifier:
pos_label: 2 # example label for not noisy image; put your own label
HiL_configurations:
n_runs: 5 # Number of hil runs (=repetitions)
n_loops: ["init", 1, 2, 3, 4] # Initialization and number of relabeling and sampling loops per hil run
class_names:
CTC: "1" # positive class/ label
Non-CTC: "0"
max_relabeling_pool_size: 1000 # Maximum images shown in the relabeling gallery.
# Set this to True if you want to create in each loop a clustering plot. Keep in mind that this could take a while
# depending on your computer and data you provide.
clustering_plot_in_each_loop: False
shuffle_after_one_HiL_run: False # Set this to True if no additional cells are identified for re-labeling after the first complete hil run
experiment_setting:
experiment_name: "random" # Options of sampling strategy: "cluster_specific" or "random".
dataloader_parameters:
batch_size: 1024
num_workers: 2
OPT:
- classifier: SVC
params:
class_weight: balanced
cache_size: 10000
kernel: rbf
max_iter: -1
probability: true
break_ties: true
PCA_parameters:
n_components: 32
UMAP_parameters:
n_components: 2
n_neighbors: 200
min_dist: 0.8
learning_rate: 1.0
metric: "euclidean"
random_state: 42
HDBSCAN_parameters:
min_cluster_size: 15
min_samples: 90
cluster_selection_method: "leaf"
cluster_selection_epsilon: 0.25
metric: "euclidean"
prediction_data: true