-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathstats.py
36 lines (24 loc) · 1.12 KB
/
stats.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import pandas as pd
import json
import seaborn
import matplotlib.pyplot as plt
pd.set_option('display.expand_frame_repr', False)
data = pd.read_csv("data/product_data.csv")
data = data[data["volume"] != "{}"]
def get_json_property(series, field_name):
return series.apply(lambda x: x.replace("\'", "\"")) \
.apply(lambda x: x.replace("\\", "")) \
.apply(lambda x: json.loads(x)[field_name])
alcohol = get_json_property(data["alcohol"], "value")
price = get_json_property(data["price"], "value")
volume = get_json_property(data["volume"], "value")
price_per_liter = price / volume
ml_alcohol_per_krone = (alcohol / 100) / price_per_liter * 1000
data["ml_alcohol_per_krone"] = ml_alcohol_per_krone
data["link"] = data["code"].apply(lambda code: f"https://www.vinmonopolet.no/p/{code}")
data = data.sort_values(by="ml_alcohol_per_krone", ascending=False)
print(data[["code", "name", "ml_alcohol_per_krone", "link"]])
data["category"] = get_json_property(data["main_category"], "name")
data = data.sort_values(by="category")
seaborn.catplot(data=data, x="ml_alcohol_per_krone", y="category", kind="box")
plt.show()