Skip to content

Commit

Permalink
Cleaning up....
Browse files Browse the repository at this point in the history
  • Loading branch information
max-elliott committed Jun 13, 2020
1 parent 1c9464d commit bd833ff
Show file tree
Hide file tree
Showing 9 changed files with 268 additions and 356 deletions.
120 changes: 58 additions & 62 deletions stargan/classifiers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
'''
"""
classifiers.py
Author - Max Elliott
Expand All @@ -7,7 +7,7 @@
auxiliary classifier for categorical emotion recognition. Dimension_Classifier
is the auxiliary classifier for dimensional emotion recognition (didn't end up
being used in this thesis).
'''
"""

import torch
import torch.nn as nn
Expand All @@ -17,12 +17,12 @@


class Emotion_Classifier(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, num_classes, bi = False,
device = 'cuda'):
'''
def __init__(self, input_size, hidden_size, num_layers, num_classes, bi=False,
device='cuda'):
"""
NOTE: input size must be directly divisible by 4
Is also used for speaker classifier
'''
"""
super(Emotion_Classifier, self).__init__()
self.hidden_size = hidden_size
self.input_size = input_size # == n_mels/world feats
Expand All @@ -35,21 +35,21 @@ def __init__(self, input_size, hidden_size, num_layers, num_classes, bi = False,

kernel = 7
padding = int((kernel-1)/2)
self.conv1 = nn.Conv2d(1, 16, kernel, padding = padding)
self.maxpool1 = nn.MaxPool2d(2, stride = 2)
self.conv2 = nn.Conv2d(16, 24, kernel, padding = padding)
self.maxpool2 = nn.MaxPool2d(2, stride = 2)
self.conv3 = nn.Conv2d(24, self.num_outchannels, kernel, padding = padding)
self.maxpool3 = nn.MaxPool2d(2, stride = 2)

self.lstm1 = nn.LSTM(input_size = self.num_outchannels*(self.input_size//8),
hidden_size = self.hidden_size, num_layers = self.num_layers,
batch_first = True, bidirectional = bi)
self.conv1 = nn.Conv2d(1, 16, kernel, padding=padding)
self.maxpool1 = nn.MaxPool2d(2, stride=2)
self.conv2 = nn.Conv2d(16, 24, kernel, padding=padding)
self.maxpool2 = nn.MaxPool2d(2, stride=2)
self.conv3 = nn.Conv2d(24, self.num_outchannels, kernel, padding=padding)
self.maxpool3 = nn.MaxPool2d(2, stride=2)

self.lstm1 = nn.LSTM(input_size=self.num_outchannels*(self.input_size//8),
hidden_size=self.hidden_size, num_layers=self.num_layers,
batch_first=True, bidirectional=bi)
self.att = Average_Weighted_Attention(self.hidden_size*self.m_factor)

self.fc = nn.Linear(self.m_factor*hidden_size, 64)
self.drop = nn.Dropout(p = 0.2)
self.out = nn.Linear(64,self.num_classes)
self.drop = nn.Dropout(p=0.2)
self.out = nn.Linear(64, self.num_classes)

def forward(self, x_data, x_lens):
"""
Expand All @@ -63,7 +63,6 @@ def forward(self, x_data, x_lens):
curr_device = x_data.device

# Convolutional layers
# x_data = x.unsqueeze(1)
x_data = self.maxpool1(F.relu(self.conv1(x_data)))
x_data = self.maxpool2(F.relu(self.conv2(x_data)))
x_data = self.maxpool3(F.relu(self.conv3(x_data)))
Expand All @@ -75,7 +74,6 @@ def forward(self, x_data, x_lens):
x_data = x_data.contiguous().view(batch_size, -1, self.num_outchannels*(no_features//8))
# Now x = (B, max_l//8, channels*(n_mels//8))


x_data = nn.utils.rnn.pack_padded_sequence(x_data, x_lens,
batch_first=True,
enforce_sorted=True)
Expand All @@ -86,8 +84,8 @@ def forward(self, x_data, x_lens):
c0 = torch.zeros(self.m_factor*self.num_layers, batch_size,
self.hidden_size).to(device=curr_device, dtype=torch.float)

#LSTM returns: (seq_len, batch, num_directions * hidden_size),
# ((num_layers * num_directions, batch, hidden_size), c_n)
# LSTM returns: (seq_len, batch, num_directions * hidden_size),
# ((num_layers * num_directions, batch, hidden_size), c_n)
x_data, _ = self.lstm1(x_data, (h0, c0))

x_data, x_lens = torch.nn.utils.rnn.pad_packed_sequence(x_data, batch_first=True)
Expand All @@ -110,113 +108,111 @@ class Dimension_Classifier(nn.Module):
Uses three conv2d->maxpooling layers, into two separate sequential modelling
networks for prediction of valence and arousal.
"""
def __init__(self, input_size, hidden_size, num_layers, bi = False, device = 'cuda'):
'''
def __init__(self, input_size, hidden_size, num_layers, bi=False, device='cuda'):
"""
NOTE: input size must be directly divisible by 4
'''
"""
super(Dimension_Classifier, self).__init__()
self.hidden_size = hidden_size
self.input_size = input_size # == n_mels
self.input_size = input_size # == n_mels
self.num_layers = num_layers
self.num_outchannels = 32
self.m_factor = 2 if bi else 1

kernel = 7
padding = int((kernel-1)/2)
self.conv1 = nn.Conv2d(1, 16, kernel, padding = padding)
self.maxpool1 = nn.MaxPool2d(2, stride = 2)
self.conv2 = nn.Conv2d(16, 24, kernel, padding = padding)
self.maxpool2 = nn.MaxPool2d(2, stride = 2)
self.conv3 = nn.Conv2d(24, self.num_outchannels, kernel, padding = padding)
self.maxpool3 = nn.MaxPool2d(2, stride = 2)
self.conv1 = nn.Conv2d(1, 16, kernel, padding=padding)
self.maxpool1 = nn.MaxPool2d(2, stride=2)
self.conv2 = nn.Conv2d(16, 24, kernel, padding=padding)
self.maxpool2 = nn.MaxPool2d(2, stride=2)
self.conv3 = nn.Conv2d(24, self.num_outchannels, kernel, padding=padding)
self.maxpool3 = nn.MaxPool2d(2, stride=2)

self.valence_predictor = Single_Dimension_Classifier(
input_size = self.input_size*(self.num_outchannels//8),
hidden_size = self.hidden_size,
num_layers = self.num_layers,
bi = bi, device = device)
input_size=self.input_size*(self.num_outchannels//8),
hidden_size=self.hidden_size,
num_layers=self.num_layers,
bi=bi, device=device)
self.arousal_predictor = Single_Dimension_Classifier(
input_size = self.input_size*(self.num_outchannels//8),
hidden_size = self.hidden_size,
num_layers = self.num_layers,
bi = bi, device = device)
input_size=self.input_size*(self.num_outchannels//8),
hidden_size=self.hidden_size,
num_layers=self.num_layers,
bi=bi, device=device)
# self.dominance_predictor = Single_Dimension_Classifier(
# input_size = (self.input_size*self.num_outchannels)//8,
# hidden_size = self.hidden_size,
# num_layers = self.num_layers,
# bi = bi, device = device)

def forward(self, x_data, x_lens):
'''
"""
x[0] is size (batch_size, max_seq_length, feature_dim)
x[1] is size (batch_size, 1), contains seq_lens
batch is in descending seq_len order
'''
"""

batch_size = x_data.size(0)
no_features = x_data.size(2)

#Convolutional layers
# x_data = x_data.unsqueeze(1)
# Convolutional layers
x_data = self.maxpool1(F.relu(self.conv1(x_data)))
x_data = self.maxpool2(F.relu(self.conv2(x_data)))
x_data = self.maxpool3(F.relu(self.conv3(x_data)))
x_lens = x_lens//8 # seq_len have got ~4 times shorted
# x = (B, channels, max_l//4, n_mels//4)

#Recurrent layers
# Recurrent layers

x_data = x_data.permute(0,2,1,3)
x_data = x_data.contiguous().view(batch_size, -1, self.num_outchannels*no_features//8)
#Now x = (B, max_l//4, channels*n_mels//4)

# Now x = (B, max_l//4, channels*n_mels//4)

x_data = nn.utils.rnn.pack_padded_sequence(x_data, x_lens,
batch_first=True,
enforce_sorted=True)

#PASS INTO 3 single_dim_predictors
# Pass into 3 single_dim_predictors
x_val = self.valence_predictor(x_data, batch_size)
x_aro = self.arousal_predictor(x_data, batch_size)
# x_dom = self.dominance_predictor(x_data, batch_size)


return x_val, x_aro#, x_dom
return x_val, x_aro #, x_dom


class Single_Dimension_Classifier(nn.Module):
def __init__(self, input_size, hidden_size, num_layers, bi = False, device = 'cuda'):
def __init__(self, input_size, hidden_size, num_layers, bi=False, device='cuda'):
super(Single_Dimension_Classifier, self).__init__()

self.hidden_size = hidden_size
self.input_size = input_size # == n_mels
self.input_size = input_size # == n_mels
self.num_layers = num_layers
self.num_outchannels = 32
self.m_factor = 2 if bi else 1

self.device = device
self.device = device # Now legacy as isn't used

self.lstm1 = nn.LSTM(input_size = self.input_size,
hidden_size = self.hidden_size, num_layers = self.num_layers,
batch_first = True, bidirectional = bi)
self.lstm1 = nn.LSTM(input_size=self.input_size,
hidden_size=self.hidden_size, num_layers=self.num_layers,
batch_first=True, bidirectional=bi)
self.att = Average_Weighted_Attention(self.hidden_size*self.m_factor)

self.fc1 = nn.Linear(self.hidden_size*self.m_factor, (self.hidden_size*self.m_factor)//2)
self.fc2 = nn.Linear((self.hidden_size*self.m_factor)//2, 3)

def forward(self, x, batch_size):

curr_device = x.device

h0 = torch.zeros(self.m_factor*self.num_layers, batch_size,
self.hidden_size)#.to(device = self.device, dtype=torch.float)
self.hidden_size).to(device=curr_device, dtype=torch.float)

c0 = torch.zeros(self.m_factor*self.num_layers, batch_size,
self.hidden_size)#.to(device = self.device, dtype=torch.float)
self.hidden_size).to(device=curr_device, dtype=torch.float)

#LSTM returns: (seq_len, batch, num_directions * hidden_size),
# ((num_layers * num_directions, batch, hidden_size), c_n)
x_data,_ = self.lstm1(x, (h0,c0))
# x_data,_ = self.gru1(x_data, h0)
# LSTM returns: (seq_len, batch, num_directions * hidden_size),
# ((num_layers * num_directions, batch, hidden_size), c_n)
x_data, _ = self.lstm1(x, (h0, c0))

x_data, x_lens = torch.nn.utils.rnn.pad_packed_sequence(x_data, batch_first=True)

Expand All @@ -229,4 +225,4 @@ def forward(self, x, batch_size):


if __name__ == '__main__':
pass
pass
6 changes: 2 additions & 4 deletions stargan/logger.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
'''
"""
logger.py
Altered version of Logger.py by hujinsen. Original source can be found at:
https://github.com/hujinsen/pytorch-StarGAN-VC
'''
"""
import tensorflow as tf

import os


Expand All @@ -26,4 +25,3 @@ def scalar_summary(self, tag, value, step):
"""Add scalar summary."""
summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)])
self.writer.add_summary(summary, step)
# print("scalar_summary called.")
Loading

0 comments on commit bd833ff

Please sign in to comment.