-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
99 lines (76 loc) · 2.61 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import train
import numpy as np
from PFDFile import *
import os
DIRS = ['./p309p_pfd', './p309n_pfd']
def make_data(dir, pular, most_file_num=1000):
X = 0
cnt = 0
for file in os.listdir(dir):
if cnt >= most_file_num:
break
print(cnt, file)
cnt += 1
cand = PFD(dir + './' + file)
# np.array (32, ?) ?
subband = cand.get_subbands()
subband_sum = np.sum(subband, axis=1)
# np.array (64, ?) ?
#subints = cand.get_subints()
#subints_sum = np.sum(subints, axis=1)
# list of length ?
# each element stands for the peak of the profile
profile = cand.getprofile()
profile_sum = np.sum(profile)
# a number
# the calculated reduced-chi^2 of the current summed profile.
chi2 = cand.calc_redchi2()
# row and column axis number 2x100
#chi2dm = cand.plot_chi2_vs_DM()
x = np.array([profile_sum, chi2])
x = np.hstack((x, subband_sum))
if X is 0:
X = x
else:
X = np.vstack((X, x))
if pular is True:
Y = np.repeat(1, X.shape[0]).reshape(X.shape[0], 1)
else:
Y = np.repeat(0, X.shape[0]).reshape(X.shape[0], 1)
return X, Y
def save_data(X, Y):
from sklearn.externals import joblib
with open('saved_data.fzy', 'wb') as fo:
joblib.dump((X, Y), fo)
print('data saved')
def get_data():
from sklearn.externals import joblib
X, Y = joblib.load('saved_data.fzy')
return X, Y
def main():
X1, Y1 = make_data('./p309p_pfd', pular=True)
X2, Y2 = make_data('./p309n_pfd', pular=False)
X = np.vstack((X1, X2))
Y = np.vstack((Y1, Y2))
Y = Y.reshape(Y.shape[0])
save_data(X, Y)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=.2, random_state=0)
from sklearn.svm import SVC
train.train(X_train, y_train, SVC(), 'SVC')
train.test(X_test, y_test, 'SVC')
from sklearn.ensemble import RandomForestClassifier
train.train(X_train, y_train, RandomForestClassifier(n_estimators=10), 'RandomForest')
tain.test(X_test, y_test, 'RandomForest')
from sklearn.ensemble import GradientBoostingClassifier
clf = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)
train.train(X_train, y_train, clf, 'GradientBoosting')
tain.test(X_test, y_test, 'GradientBoosting')
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
bdt = AdaBoostClassifier(DecisionTreeClassifier(max_depth=3), algorithm="SAMME", n_estimators=200)
train.train(X_train, y_train, bdt, 'AdaBoost')
tain.test(X_test, y_test, 'AdaBoost')
from sklearn.neural_network import MLPClassifier
MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(8, 8, 8), random_state=1)
main()