-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnerve_alpha.py
109 lines (86 loc) · 3.73 KB
/
nerve_alpha.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import random
import numpy as np
import random
import pickle
import letor_metrics
import pyximport
import sys
from tqdm import tqdm
pyximport.install()
import matplotlib
raw_data_train = np.loadtxt('/data/sidana/recnet/recnet_alpha/'+sys.argv[1]+'/train_all_raw.csv', skiprows = 1, delimiter=',')
raw_data_test = np.loadtxt('/data/sidana/recnet/recnet_alpha/'+sys.argv[1]+'/test_all_raw.csv', skiprows = 1, delimiter=',')
raw_data = np.concatenate((raw_data_train, raw_data_test))
from dataset_tt_static import TripletsDataset
ds = TripletsDataset(raw_data_train, raw_data_test, threshold_user=60, rnd_seed=42)
ds.train_test_split()
ds.init_cached_random()
import tensorflow as tf
import bprnn
import imp
N_USERS = int(max(raw_data[:, 0])) + 1
N_ITEMS = int(max(raw_data[:, 1])) + 1
N_EMBEDDINGS = int(sys.argv[4])
import tensorflow.contrib.slim as slim
imp.reload(bprnn)
#%%
def inner_network(user_emb, item_emb):
joined_input = tf.concat([user_emb, item_emb], 1)
net = slim.fully_connected(inputs=joined_input, num_outputs=int(sys.argv[6]), activation_fn=tf.nn.relu)
# net = slim.fully_connected(inputs=joined_input, num_outputs=64, activation_fn=tf.nn.relu)
# net = slim.dro
net = slim.fully_connected(inputs=net, num_outputs=1, activation_fn=None)
return net
model = bprnn.BPR_NN(N_USERS, N_ITEMS, N_EMBEDDINGS, alpha=float(sys.argv[2]), beta=float(sys.argv[3]), alpha_reg=float(sys.argv[5]), inner_net=inner_network)
model.build_graph()
model.initialize_session()
losses = []
batch_size = 512
for n_batches, cur_optim in [(10000, model.trainer_3)]:
for i in tqdm(range(int(n_batches))):
batch = ds.sample_train_batch(n_samples=batch_size)
fd = {
model.user_ids: batch['users'],
model.left_ids: batch['left_items'],
model.right_ids: batch['right_items'],
model.target_y: batch['y'],
}
el, nl, reg, t, _ = model.session.run(
[model.embedding_loss, model.net_loss, model.regularization, model.target, cur_optim],
feed_dict=fd
)
losses.append((el, nl, reg, t))
if i%500==0:
user_norm = np.linalg.norm(model.weights_u)
item_norm = np.linalg.norm(model.weights_i)
print('[it {}] weight norms, users: {}, items: {}'.format(i, user_norm, item_norm))
print('[it {}] metrics (emb_loss, net_loss, reg, target): {}'.format(i, losses[-1]))
#%%
export_basename = '/data/sidana/recnet/recnet_alpha/'+sys.argv[1]+'/vectors/'
export_pred = open(export_basename + 'pr_'+sys.argv[1], 'w')
export_true = open(export_basename + 'gt_'+sys.argv[1], 'w')
ndcg_vals = []
for u in tqdm(ds.data_keys, desc='Prediction', leave=True):
if not u in ds.test or not ds.test[u]:
continue
response = np.zeros(len(ds.test[u]))
fd = {
model.user_ids: (np.ones(len(ds.test[u]))*u).astype(np.int32),
model.left_ids: np.array([i for (i, r) in ds.test[u]]).astype(np.int32),
}
response += model.session.run(model.embedding_left, feed_dict=fd)[:, 0]
response += model.session.run(model.left_output, feed_dict=fd)[:, 0]
# make relevances
relevances = np.array([r for (i, r) in ds.test[u]])
items = np.array([i for (i, r) in ds.test[u]]) # it's already sorted by true relevance
itemsGroundTruth = np.array([i for (i,r) in ds.test[u] if r == 1])
predicted_ranking = np.argsort(-response)
# write down predictions
export_pred.write(' '.join(map(str, [u] + list(items[predicted_ranking]))) + '\n')
export_true.write(' '.join(map(str, [u] + list(itemsGroundTruth))) + '\n')
# calc score
gain = letor_metrics.ndcg_from_ranking(relevances, predicted_ranking, 10)
ndcg_vals.append(gain)
# In[32]:
print(ndcg_vals)
# In[ ]: