forked from AmirHoseinSafari/LRCN-drug-resistance
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpipe_line_gene.py
105 lines (89 loc) · 5.34 KB
/
pipe_line_gene.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
from loading_data import data_preprocess
from models import model_gene_based, wide_n_deep, ML_methods, deepAMR_run
from feature_importance import base_approach, lime
from data_analyzer import source_analysis
from dataset_creator import gene_dataset_creator
from functools import partial
from itertools import repeat
from multiprocessing import Pool, freeze_support
import multiprocessing
epochs = 200
def train():
# deepAMR_run.main()
df_train, labels = data_preprocess.process(38, gene_dataset=True)
# gene_dataset_creator.main()
# df_train, labels = data_preprocess.process(38)
# source_analysis.main(df_train)
# df_train, labels = data_preprocess.process(38, shuffle_operon_group=True)
# ML_methods.model_run(df_train, labels)
# model_gene_based.run_model(df_train, labels, epochs)
# wide_n_deep.run_bayesian(df_train, labels)
# model_gene_based.run_bayesian(df_train, labels, comp_iter=1)
# model_gene_based.run_bayesian_kfold(df_train, labels, comp_iter=1)
# model_gene_based.run_bayesian_single(df_train, labels)
# model_gene_based.run_all(df_train, labels, epochs)
# model_gene_based.run_model_kfold(df_train,labels,epochs)
# base_approach.run_feature_importance(df_train, labels)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/lime_all.csv', k=200)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_all_test.csv', k=200)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_test_200.csv', k=200)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_train.csv', k=200)
#
# print("______")
# base_approach.find_feature_importance(file_name='feature_importance/score_results/lime_all.csv', k=100)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_all_test.csv',
# k=100)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_test_200.csv',
# k=100)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_train.csv',
# k=100)
# print("______")
# base_approach.find_feature_importance(file_name='feature_importance/score_results/lime_all.csv', k=50)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_all_test.csv',
# k=50)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_test_200.csv',
# k=50)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_train.csv',
# k=50)
# print("______")
# base_approach.find_feature_importance(file_name='feature_importance/score_results/lime_all.csv', k=20)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_all_test.csv',
# k=20)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_test_200.csv',
# k=20)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/feature_scores_lime_train.csv',
# k=20)
# for i in range(1, 8):
# print(i)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/split/feature_scores_lime_train_150_' + str(i) + '.csv', k=200)
#
# for i in range(1, 8):
# print(i)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/split/feature_scores_lime_train_150_' + str(i) + '.csv', k=100)
#
# for i in range(1, 8):
# print(i)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/split/feature_scores_lime_train_150_' + str(i) + '.csv', k=50)
#
# for i in range(1, 8):
# print(i)
# base_approach.find_feature_importance(file_name='feature_importance/score_results/split/feature_scores_lime_train_150_' + str(i) + '.csv', k=20)
i2 = []
for i11 in range(0, 25):
i2.append(i11)
print(i2)
with multiprocessing.Pool(processes=25) as pool:
pool.map(lime.main_function, i2)
# lime.main_function(df_train, labels)
def train_shuffle():
df_train, labels = data_preprocess.process(38, shuffle_index=True, index_file=0)
model_gene_based.run_model_kfold(df_train, labels, epochs, index=0)
df_train, labels = data_preprocess.process(38, shuffle_index=True, index_file=1)
model_gene_based.run_model_kfold(df_train, labels, epochs, index=1)
df_train, labels = data_preprocess.process(38, shuffle_index=True, index_file=2)
model_gene_based.run_model_kfold(df_train, labels, epochs, index=2)
df_train, labels = data_preprocess.process(38, shuffle_index=True, index_file=3)
model_gene_based.run_model_kfold(df_train, labels, epochs, index=3)
if __name__ == '__main__':
train()
# train_shuffle()