-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathexists_temperature.py
82 lines (63 loc) · 2.56 KB
/
exists_temperature.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import collections
import csv
import os
import requests
# stations = sys.argv[1].split(",")
# years = [int(year) for year in sys.argv[2].split("-")]
# start_year = years[0]
# end_year = years[1]
TEMPLATE_URL = "https://www.ncei.noaa.gov/data/global-hourly/access/{year}/{station}.csv"
TEMPLATE_FILE = "station_{station}_{year}.csv"
def download_data(station, year):
my_url = TEMPLATE_URL.format(station=station, year=year)
req = requests.get(my_url)
if req.status_code != 200:
return # not found
w = open(TEMPLATE_FILE.format(station=station, year=year), "wt")
w.write(req.text)
w.close()
def download_all_data(stations, start_year, end_year):
for station in stations:
for year in range(start_year, end_year + 1):
if not os.path.exists(TEMPLATE_FILE.format(station=station, year=year)):
download_data(station, year)
# pandas would be more standard
def get_file_temperatures(file_name):
with open(file_name, "rt") as f:
reader = csv.reader(f)
header = next(reader)
for row in reader:
station = row[header.index("STATION")]
# date = datetime.datetime.fromisoformat(row[header.index('DATE')])
tmp = row[header.index("TMP")]
temperature, status = tmp.split(",")
if status != "1":
continue
temperature = int(temperature) / 10
yield temperature
def get_all_temperatures(stations, start_year, end_year):
temperatures = collections.defaultdict(list)
for station in stations:
for year in range(start_year, end_year + 1):
for temperature in get_file_temperatures(TEMPLATE_FILE.format(station=station, year=year)):
temperatures[station].append(temperature)
return temperatures
stations = ['01044099999']
start_year = 2005
end_year = 2021
download_all_data(stations, start_year, end_year)
all_temperatures = get_all_temperatures(stations, start_year, end_year)
first_all_temperatures = all_temperatures[stations[0]]
print(len(first_all_temperatures), max(first_all_temperatures), min(first_all_temperatures))
%timeit (-10.7 in first_all_temperatures)
%timeit (-100 in first_all_temperatures)
set_first_all_temperatures = set(first_all_temperatures)
print(len(set_first_all_temperatures))
%timeit (-10.7 in set_first_all_temperatures)
%timeit (-100 in set_first_all_temperatures)
a_list_range = list(range(100000))
a_set_range = set(a_list_range)
%timeit 50000 in a_list_range
%timeit 50000 in a_set_range
%timeit 500000 in a_list_range
%timeit 500000 in a_set_range