-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathSAMoSA_data_prepare.py
55 lines (43 loc) · 1.84 KB
/
SAMoSA_data_prepare.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import os
import hydra
import joblib
import numpy as np
import SAMoSA_data_utils.vggish_input as vggish_input
def SAMoSA_audio_data_prep(wave):
return vggish_input.wavform_to_concat_examples(wave,
lower_edge_hertz=10,
upper_edge_hertz=8000,
sr=16000)
@hydra.main(config_path="configs", config_name='config', version_base = '1.3')
def main(cfg):
Dataset_config = cfg.Dataset
seed = getattr(Dataset_config, 'seed', 0)
np.random.seed(seed)
audio_sr = getattr(Dataset_config, 'audio_sr', 16000)
snippet_duration = Dataset_config.duration
dataset_dir = Dataset_config.dataset_dir
if not os.path.exists(dataset_dir):
raise ValueError("The dataset directory does not exist, please run 1-prep_data.py and 2-mel_converter.py first")
audio_dataset_path = f'ad_{audio_sr}_{snippet_duration}.pkl'
print(audio_dataset_path)
audio_dataset_path = os.path.join(dataset_dir, audio_dataset_path)
print(audio_dataset_path)
if not os.path.exists(audio_dataset_path):
raise ValueError("The dataset directory does not exist, please run prep_data.py first")
audio_data = joblib.load(audio_dataset_path)
audio_data = {
user: {
label: {
audio_file: [
SAMoSA_audio_data_prep(audio_data[user][label][audio_file][idx])
for idx in range(len(audio_data[user][label][audio_file]))
]
for audio_file in audio_data[user][label]
}
for label in audio_data[user]
}
for user in audio_data
}
joblib.dump(audio_data, audio_dataset_path.replace('.pkl', '_samosa.pkl'))
if __name__ == '__main__':
main()