Debug preprocessing scripts and restructured the whole directory to b…

…e more readable.
glam-imperial · Jun 9, 2020 · 8090701 · 8090701
1 parent 3e9fdb1
commit 8090701
Show file tree

Hide file tree

Showing 4 changed files with 622 additions and 0 deletions.
diff --git a/config.yaml b/config.yaml
@@ -0,0 +1,61 @@
+model:
+
+      name: "test111"
+      num_feats: 36
+      batch_size: 4
+      use_speaker: False
+      use_dimension: False
+      num_classes: 3
+
+loss:
+
+    lambda_gp: 5
+    lambda_g_emo_cls: 1
+    lambda_g_spk_cls: 1
+    lambda_g_dim_cls: 1
+    lambda_cycle: 3
+    lambda_id: 2
+
+    num_iters: 20
+    num_iters_decay: 10
+    resume_iters: 0
+
+    train_classifier: True
+    c_to_g_ratio: 1
+    c_to_d_ratio: 1
+
+optimizer:
+
+    beta1: 0.5
+    beta2: 0.999
+
+    g_lr: 0.0001
+    d_lr: 0.0001
+    emo_cls_lr: 0.0001
+    speaker_cls_lr: 0.0001
+    dim_cls_lr: 0.0001
+
+
+logs:
+
+    use_tensorboard: True
+
+    log_dir: './logs'
+    sample_dir: './samples'
+    model_save_dir: './checkpoints'
+
+    log_every: 1
+    sample_every: 1000
+    test_every: 1000
+    model_save_every: 1000
+
+data:
+
+    dataset_dir: "../data"
+    sample_set_dir: '../data/samples/originals'
+    train_test_split: 0.9
+    normalise_mels: True
+    type: 'world'
+
+verbose: False
+device: torch.device('cuda')
diff --git a/utils/__init__.py b/utils/__init__.py
diff --git a/utils/data_preprocessing_utils.py b/utils/data_preprocessing_utils.py
@@ -0,0 +1,334 @@
+'''
+data_preprocessing2.py
+
+Author - Max Elliott
+
+Functions for pre-processing the IEMOCAP dataset. Can make mel-specs, WORLD
+features, and labels for each audio clip.
+'''
+
+import torch
+
+from utils import audio_utils
+
+import numpy as np
+import os
+from librosa.util import find_files
+
+
+def get_speaker_from_filename(filename):
+    code = filename[4] + filename[-8]
+
+    conversion = {'1F':0, '1M':1, '2F':2, '2M':3, '3F':4, '3M':5, '4F':6, '4M':7,
+                    '5F': 8, '5M':9}
+
+    label = conversion[code]
+
+    return label
+
+
+def get_emotion_from_label(category):
+
+    if category == 'xxx' or category =='dis' or category =='fea' or category == 'oth':
+        return -1
+    if category == 'exc' or category == 'fru' or category == 'sur':
+        return -1
+
+    conversion = {'ang':0, 'sad':1, 'hap':2, 'neu':3}
+
+    label = conversion[category]
+
+    return label
+
+
+def getOneHot(label, n_labels):
+
+    onehot = np.zeros(n_labels)
+    onehot[label] = 1
+
+    return onehot
+
+
+def cont2list(cont, binned = False):
+
+    list = [0,0,0]
+    list[0] = float(cont[1:6])
+    list[1] = float(cont[9:14])
+    list[2] = float(cont[17:22])
+
+    #Option to make the values discrete: low(0), med(1) or high(2)
+    if binned:
+        for i, val in enumerate(list):
+            if val <= 2:
+                list[i] = 0
+            elif val < 4:
+                list[i] = 1
+            else:
+                list[i] = 2
+        return list
+    else:
+        return list
+
+
+def concatenate_labels(emo, speaker, dims, dims_dis):
+
+    all_labels = torch.zeros(8)
+    # print(all_labels)
+
+    # for i, row in enumerate(all_labels):
+    all_labels[0] = emo
+    all_labels[1] = speaker
+    all_labels[2] = dims[0]
+    all_labels[3] = dims[1]
+    all_labels[4] = dims[2]
+    all_labels[5] = dims_dis[0]
+    all_labels[6] = dims_dis[1]
+    all_labels[7] = dims_dis[2]
+
+
+    return all_labels
+
+
+def get_wav_and_labels(filename, data_dir):
+
+    # folder = filename[:-9]
+    wav_path = os.path.join(data_dir, "audio", filename)
+    label_path = os.path.join(data_dir, "annotations", filename[:-9] + ".txt")
+
+    with open(label_path, 'r') as label_file:
+
+        category = ""
+        dimensions = ""
+        speaker = ""
+
+        for row in label_file:
+            if row[0] == '[':
+                split = row.split("\t")
+                if split[1] == filename[:-4]:
+                    category = get_emotion_from_label(split[2])
+                    dimensions = cont2list(split[3])
+                    dimensions_dis = cont2list(split[3], binned = True)
+                    speaker = get_speaker_from_filename(filename)
+
+
+    audio = audio_utils.load_wav(wav_path)
+    audio = np.array(audio, dtype = np.float32)
+    labels = concatenate_labels(category, speaker, dimensions, dimensions_dis)
+
+    return audio, labels
+
+
+def get_samples_and_labels(filename, config):
+
+    # config = yaml.load(open('./config.yaml', 'r'))
+
+    wav_path = config['data']['sample_set_dir'] + "/" + filename
+    folder = filename[:-9]
+    label_path = config['data']['dataset_dir'] + "/Annotations/" + folder + ".txt"
+
+    with open(label_path, 'r') as label_file:
+
+        category = ""
+        dimensions = ""
+        speaker = ""
+
+        for row in label_file:
+            if row[0] == '[':
+                split = row.split("\t")
+                if split[1] == filename[:-4]:
+                    category = get_emotion_from_label(split[2])
+                    dimensions = cont2list(split[3])
+                    dimensions_dis = cont2list(split[3], binned = True)
+                    speaker = get_speaker_from_filename(filename)
+
+
+    audio = audio_utils.load_wav(wav_path)
+    audio = np.array(audio, dtype = np.float32)
+    labels = concatenate_labels(category, speaker, dimensions, dimensions_dis)
+
+    return audio, labels
+
+
+def get_filenames(data_dir):
+
+    files = find_files(data_dir, ext = 'wav')
+    filenames = []
+
+    for f in files:
+        f = os.path.basename(f)[:-4]
+        filenames.append(f)
+
+    return filenames
+
+
+if __name__ == '__main__':
+
+    min_length = 0 # actual is 59
+    max_length = 688
+
+    data_dir = '/Users/Max/MScProject/data'
+    annotations_dir = os.path.join(data_dir, "audio")
+    files = find_files(annotations_dir, ext = 'wav')
+
+    filenames = []
+    for f in files:
+        f = os.path.basename(f)
+        filenames.append(f)
+
+
+
+    ############################################
+    #      Code for making mels and labels     #
+    ############################################
+    i = 0
+    found = 0
+    lengths = []
+    longest_length = 0
+    longest_name = ""
+    for f in filenames:
+        if i >10000:
+            print(f)
+        wav, labels = get_wav_and_labels(f, data_dir)
+        # mel = audio_utils.wav2melspectrogram(wav)
+        labels = np.array(labels)
+        if labels[0] in range(0,4) and f[0:3] == 'Ses':
+
+            length = wav.shape[0]/16000.
+            lengths.append(length)
+            # np.save(data_dir + "/mels/" + f[:-4] + ".npy", mel)
+            # np.save(data_dir + "/labels/" + f[:-4] + ".npy", labels)
+            found += 1
+
+            if length > longest_length:
+                longest_length = length
+                longest_name = f
+
+        i += 1
+        if i % 100 == 0:
+            print(i, " complete.")
+            print(found, "found.")
+
+    print(found, "found.")
+    print(f"longest + {longest_name}")
+
+    lengths.sort()
+    lengths = lengths[:int(len(lengths)*0.9)]
+    print("Total seconds =", np.sum(lengths))
+
+    # n, bins, patches = plt.hist(lengths, bins = 32)
+    # plt.xlabel('Sequence length / seconds')
+    # plt.xlim(0, 32)
+    # plt.ylabel('Count')
+    # plt.title(r'Histogram of sequence lengths for 4 emotional categories')
+    # plt.show()
+
+    ############################################
+    #      Loop through mels for analysis      #
+    ############################################
+    # files = find_files(data_dir + "/mels", ext = 'npy')
+    # lengths = []
+    # for f in files:
+    #
+    #     mel = np.load(f)
+    #     lengths.append(mel.shape[1])
+    #     # print(mel.shape)
+    #
+    # n, bins, patches = plt.hist(lengths, bins = 22)
+    # plt.xlabel('Sequence length')
+    # plt.ylabel('Count')
+    # plt.title(r'New histogram of sequence lengths for 4 emotional categories')
+    # plt.show()
+
+    ############################################
+    #     Loop through labels for analysis     #
+    ############################################
+    # files = find_files(data_dir + "/labels", ext = 'npy')
+    # category_counts = np.zeros((4))
+    # speaker_counts = np.zeros((10))
+    # for f in files:
+    #
+    #     labels = np.load(f)
+    #     cat = int(labels[0])
+    #     speaker = int(labels[1])
+    #     category_counts[cat] += 1
+    #     speaker_counts[speaker] += 1
+    #
+    # print(category_counts)
+    # print(speaker_counts)
+    # #### RESULTS ####
+    # # [ 549.  890.  996. 1605.] 4040 total
+    # # [416. 425. 353. 364. 448. 480. 342. 370. 473. 369.]
+    # #### # # # # ####
+    #
+    # def make_autopct(values):
+    #
+    #     def my_autopct(pct):
+    #         total = sum(values)
+    #         val = int(round(pct*total/100.0))
+    #         return '{p:.2f}%  ({v:d})'.format(p=pct,v=val)
+    #
+    #     return my_autopct
+    #
+    # plt.pie(category_counts, labels = ['Happy','Sad','Angry','Neutral'],
+    #         autopct =make_autopct(category_counts), shadow=False)
+    # plt.show()
+    #
+    # plt.pie(speaker_counts, labels = ['Ses01F','Ses01M','Ses02F','Ses02M','Ses03F',
+    #                                 'Ses03M','Ses04F','Ses04M','Ses05F','Ses05M'],
+    #         autopct ='%1.1f%%', shadow=False)
+    # plt.show()
+
+    # 1.34591066837310
+
+
+    ############################################
+    #   Finding min and max intensity of mels  #
+    ############################################
+    # i = 0
+    # mels_made = 0
+    # mel_lengths = []
+    #
+    # max_intensity = 0
+    # min_intensity = 99999999
+    #
+    # for f in filenames:
+    #
+    #     wav, labels = get_wav_and_labels(f, data_dir)
+    #     mel = audio_utils.wav2melspectrogram(wav)
+    #     labels = np.array(labels)
+    #     if labels[0] != -1:
+    #
+    #         # mel_lengths.append(mel.shape[1])
+    #         max_val = np.max(mel)
+    #         min_val = np.min(mel)
+    #
+    #         if max_val > max_intensity:
+    #             max_intensity = max_val
+    #         if min_val < min_intensity:
+    #             min_intensity = min_val
+    #         mels_made += 1
+    #
+    #     i += 1
+    #     if i % 100 == 0:
+    #         # print(mel_lengths[mels_made-1])
+    #         print(mel[:, 45])
+    #         print(max_intensity, ", ", min_intensity)
+    #         print(i, " complete.")
+    #         print(mels_made, "mels made.")
+    #
+    # print("max = {}".format(max_intensity))
+    # print("min = {}".format(min_intensity))
+    #
+    # np.save('./stats/all_mel_lengths', np.array(mel_lengths))
+    #
+    # n, bins, patches = plt.hist(mel_lengths, bins = 22)
+    # plt.xlabel('Sequence length')
+    # plt.ylabel('Count')
+    # plt.title(r'Histogram of sequence lengths for 4 emotional categories')
+    # plt.show()
+    #
+    # mel_lengths = sorted(mel_lengths)
+    # print(mel_lengths[0:30])
+    # split_index = int(len(mel_lengths)*0.9)
+    # print(mel_lengths[split_index])  # IS MAX LENGTH OF mels
+    # print(mel_lengths[0])  # IS MIN LENGTH OF mels