-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmainPreprocessing.py
32 lines (26 loc) · 996 Bytes
/
mainPreprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import os
from utils.DatasetOptions import DatasetOptions
from preprocessing.Preprocessor import Preprocessor
import helpers.constants as constantsPATREC
# dirProject = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + '/';
dirProject = '/home/thomas/fusessh/scicore/projects/patrec'
dirData = os.path.join(dirProject, 'data');
dict_dataset_options = {
'dir_data': dirData,
'data_prefix': 'patrec',
'dataset': '20122015',
# 'subgroups': ['DK'],
'grouping': 'verylightgrouping',
'encoding': 'categorical',
'newfeatures': {'names': constantsPATREC.NEW_FEATURES},
'featurereduction': None,
'filtering': None
}
options = DatasetOptions(dict_dataset_options);
preproc = Preprocessor(options);
preproc.splitColumns();
preproc.clean()
preproc.group()
preproc.createFeatureSet()
preproc.encodeFeatures();
preproc.fuse();