-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcart_prod_form.py
43 lines (34 loc) · 1.3 KB
/
cart_prod_form.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import pandas as pd
import numpy as np
import sys
df = pd.read_csv('/data/sidana/recnet_draft/'+sys.argv[1]+'/recnet_all/test_all_raw.csv',sep=',',header=0)
users = set(df['userId'])
users = list(users)
items_all = set(df['movieId'])
items_all = list(items_all)
user_item_dict = {}
new_df_data = []
ts = 1
for user in users:
df_user = df[df['userId']==user].sort_values(by='timestamp')#subdataset for each user, sorted by timestamp
click = df_user['rating']
click = list(click)
it_for_u = df_user['movieId']
it_for_u = list(it_for_u)
clicks = []
for i in click:
if (i >= 4):
clicks = clicks + [1]
else:
clicks = clicks + [0]
for n in range(len(clicks)):#saving all negative for each user for the next training
if (clicks[n] == 1):
user_item_dict[(user,it_for_u[n])] = 1
for item in items_all:
if (user, item) in user_item_dict:
new_df_data.append([user,item,4,ts])
else:
new_df_data.append([user,item,1,ts])
ts += 1
df2 = pd.DataFrame(new_df_data,columns=['userId', 'movieId', 'rating', 'timestamp'])
df2.to_csv('/data/sidana/recnet_draft/'+sys.argv[1]+'/recnet_all/test_all_cart_raw.csv', index = False)