-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnerve_objective_batches.py
120 lines (101 loc) · 4.2 KB
/
nerve_objective_batches.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import random
import numpy as np
import random
import pickle
import letor_metrics
import pyximport
import sys
from tqdm import tqdm
pyximport.install()
import matplotlib
raw_data_train = np.loadtxt('/data/sidana/recnet_draft/'+sys.argv[1]+'/recnet_batches/train_all_raw.csv', skiprows = 1, delimiter=',')
raw_data_test = np.loadtxt('/data/sidana/recnet_draft/'+sys.argv[1]+'/recnet_batches/test_all_raw.csv', skiprows = 1, delimiter=',')
raw_data = np.concatenate((raw_data_train, raw_data_test))
from dataset_tt_static import TripletsDataset
ds = TripletsDataset(raw_data_train, raw_data_test, threshold_user=60, rnd_seed=42)
ds.train_test_split()
ds.init_cached_random()
import tensorflow as tf
import bprnn
import imp
N_USERS = int(max(raw_data[:, 0])) + 1
N_ITEMS = int(max(raw_data[:, 1])) + 1
N_EMBEDDINGS = 18
import tensorflow.contrib.slim as slim
imp.reload(bprnn)
#%%
def inner_network(user_emb, item_emb):
joined_input = tf.concat(1, [user_emb, item_emb])
net = slim.fully_connected(inputs=joined_input, num_outputs=32, activation_fn=tf.nn.relu)
# net = slim.fully_connected(inputs=joined_input, num_outputs=64, activation_fn=tf.nn.relu)
# net = slim.dro
net = slim.fully_connected(inputs=net, num_outputs=1, activation_fn=None)
return net
model = bprnn.BPR_NN(N_USERS, N_ITEMS, N_EMBEDDINGS, alpha=int(sys.argv[2]), beta=int(sys.argv[3]), alpha_reg=0.01, inner_net=inner_network)
model.build_graph()
model.initialize_session()
losses = []
batch_size = 512
average_objective_function=0.0
for n_batches, cur_optim in [(sys.argv[4], model.trainer_3)]:
for i in tqdm(range(int(n_batches))):
batch = ds.sample_train_batch(n_samples=batch_size)
fd = {
model.user_ids: batch['users'],
model.left_ids: batch['left_items'],
model.right_ids: batch['right_items'],
model.target_y: batch['y'],
}
el, nl, reg, t, _ = model.session.run(
[model.embedding_loss, model.net_loss, model.regularization, model.target, cur_optim],
feed_dict=fd
)
losses.append((el, nl, reg, t))
average_objective_function = average_objective_function + t
if i%500==0:
user_norm = np.linalg.norm(model.weights_u)
item_norm = np.linalg.norm(model.weights_i)
# print('[it {}] weight norms, users: {}, items: {}'.format(i, user_norm, item_norm))
# print('[it {}] metrics (emb_loss, net_loss, reg, target): {}'.format(i, losses[-1]))
average_objective_function = average_objective_function/float(sys.argv[4])
print(average_objective_function)
#%%
# export_basename = '/data/sidana/recnet_draft/'+sys.argv[1]+'/recnet_batches/vectors/'
# export_pred = open(export_basename + 'pr_'+sys.argv[1]+'_'+sys.argv[2]+sys.argv[3], 'w')
# export_basename = '/data/sidana/recnet_draft/'+sys.argv[1]+'/recnet_batches/vectors/'
# export_pred = open(export_basename + 'pr_'+sys.argv[1]+'_'+sys.argv[2]+sys.argv[3], 'w')
# export_true = open(export_basename + 'gt_'+sys.argv[1]+'_'+sys.argv[2]+sys.argv[3], 'w')
#
# ndcg_vals = []
# for u in tqdm(ds.data_keys, desc='Prediction', leave=True):
# if not u in ds.test:
# continue
# response = np.zeros(len(ds.test[u]))
# fd = {
# model.user_ids: (np.ones(len(ds.test[u]))*u).astype(np.int32),
# model.left_ids: np.array([i for (i, r) in ds.test[u]]).astype(np.int32),
# }
# response += model.session.run(model.embedding_left, feed_dict=fd)[:, 0]
# response += model.session.run(model.left_output, feed_dict=fd)[:, 0]
#
#
#
# # make relevances
# relevances = np.array([r for (i, r) in ds.test[u]])
# items = np.array([i for (i, r) in ds.test[u]]) # it's already sorted by true relevance
# itemsGroundTruth = np.array([i for (i,r) in ds.test[u] if r == 1])
# predicted_ranking = np.argsort(-response)
#
# # write down predictions
# export_pred.write(' '.join(map(str, [u] + list(items[predicted_ranking]))) + '\n')
# export_true.write(' '.join(map(str, [u] + list(itemsGroundTruth))) + '\n')
#
# # calc score
# gain = letor_metrics.ndcg_from_ranking(relevances, predicted_ranking, 10)
# ndcg_vals.append(gain)
#
#
# # In[32]:
#
# print(ndcg_vals)
# In[ ]: