-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_deal.py
112 lines (93 loc) · 2.56 KB
/
data_deal.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# -*- coding: utf-8 -*-
"""
Created on Thu Feb 09 16:53:24 2017
@author: Lily
"""
# -*- coding: utf-8 -*-
import csv
import re
import pandas as pd
f = file('books_2.csv','rb')
reader = csv.reader(f)
id = []
name = []
score = []
pl = []
price = []
web_price = []
for line in reader:
if line[0] == 'book_id':
pass
else:
detail_id = line[0]
mode = re.compile(r'\d+')
if mode is not None:
datas_id = mode.findall(detail_id)
for data_id in datas_id:
id.append(data_id)
if line[1] == 'book_name':
pass
else:
name.append(line[1])
if line[3] == 'book_rating':
pass
elif line[3] == 'None':
score.append(None)
else:
score_num = float(line[3])
score.append(score_num)
#take out the numbers
detail_pl = line[4]
mode_pl = re.compile(r'\d+')
if mode_pl is not None:
datas_pl = mode_pl.findall(detail_pl)
for data_pl in datas_pl:
data_pl_num = int(data_pl)
pl.append(data_pl_num)
else:
pl.append(None)
detail_wp = line[5]
if detail_wp == 'None':
web_price.append(None)
else:
mode_wp = re.compile(r'\d+.\d*')
datas_wp = mode_wp.findall(detail_wp)
for data_wp in datas_wp:
data_wp_num = float(data_wp)
web_price.append(data_wp_num)
if line[2] == 'book_pub':
pass
else:
detail_price = line[2]
rate = 6.875
if 'USD' in detail_price:
m = '1'
elif '$' in detail_price:
m = '1'
else:
m = '0'
mode_price = re.compile(r'\d+\.\d*')
datas_price = mode_price.findall(detail_price)
if datas_price == []:
price.append(None)
else:
data_price_num = float(datas_price[-1])
#exclude the number in USD
if m == '1':
data_price_num = data_price_num * rate
price.append(None)
else:
price.append(data_price_num)
s_name = pd.Series(name)
s_price = pd.Series(price)
s_wp = pd.Series(web_price)
s_score = pd.Series(score)
s_pl = pd.Series(pl)
s_id = pd.Series(id)
df = pd.DataFrame({'ID':s_id,
'NAME':s_name,
'PRICE':s_price,
'WEB_PRICE':s_wp,
'SCORE':s_score,
'PL':s_pl})
df.to_csv('book_data_2.csv')