-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.py
190 lines (164 loc) · 9.26 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import threading
import logging
import requests
import time
import datetime as dt
from datetime import time as dtTime
import json
from math import floor
from line_manager import Line_Manager
import pprint
class Scraper:
data_folder = "/home/pi/Documents/mvg_departure_monitor/data/"
#s8_into_city_stations = ["Herrsching", "Weßling", "Gilching-Argelsried", "Pasing", "Ostbahnhof", "Leuchtenbergring"]
#s8_into_city_warning = ["Ostbahnhof", "Leuchtenbergring", "Rosenheimer", "Isartor"]
#s8_to_airport_stations = ["Flughafen", "Ismaning", "Unterföhring", "Johanneskirchen"]
#s8_to_airport_warning = ["Unterföhring"]
daglfing_sbahn_api = "https://www.mvg.de/api/fahrinfo/departure/de:09162:700"
#s8_city_min_list = list()
#s8_airport_min_list = list()
last_refresh = None
minutes_since_last_refresh = None
raw_api_data = dict()
def __init__(self, line_manager: Line_Manager) -> None:
self.line_manager = line_manager
def get_data(self):
while True:
self.fetch_data()
#self.s8_city_min_list = self.get_minutes(self.s8_into_city_stations)
#self.s8_airport_min_list = self.get_minutes(self.s8_to_airport_stations)
#print(str(self.s8_city_min_list))
time.sleep(self.get_adaptive_period())
def fetch_data(self):
resp = None
while True:
try:
resp: requests.Response = requests.get(self.daglfing_sbahn_api, timeout=15.0)
if resp == None or resp.content == None or len(resp.content) == 0:
print("Failed to fetch at " + str(dt.datetime.now()))
self.minutes_since_last_refresh = dt.datetime.now() - self.last_refresh
time.sleep(10)
else:
break
except requests.exceptions.RequestException as e:
print("Failed to fetch at " + str(dt.datetime.now()) + "\n" +
"Reason: " + str(e))
self.minutes_since_last_refresh = dt.datetime.now() - self.last_refresh
time.sleep(10)
logging.debug("Fetched data at " +
str(dt.datetime.now().strftime("%H:%M:%S")) + "!")
self.raw_api_data = json.loads(resp.content)
#pprint.pprint(self.raw_api_data)
self.last_refresh = dt.datetime.now()
self.minutes_since_last_refresh = dt.datetime.now() - self.last_refresh
self.line_manager.new_data(json.loads(resp.content))
# def get_minutes(self, search_for):
# min_list = list()
# now = dt.datetime.now()
# for departure in self.raw_api_data["departures"]:
# abfahrt_dict = dict()
# for current_search in search_for:
# if departure["destination"].find(current_search) != -1:
# destination = departure["destination"]
# abfahrt_dict["destination"] = departure["destination"]
# cancelled = departure["cancelled"]
# delayKey = "delay"
# live = True
# if delayKey in departure.keys():
# delay = departure["delay"]
# abfahrt_dict["delay"] = departure["delay"]
# else:
# delay = 0
# live = False
# sev: bool = departure["sev"]
# if not cancelled:
# abfahrt_dict["time"] = dt.datetime.fromtimestamp(departure["departureTime"]/1000) + dt.timedelta(minutes = delay)
# abfahrt_dict["as_usual"] = self.check_as_usual(abfahrt_dict["time"], abfahrt_dict["destination"])
# seconds = floor((dt.datetime.fromtimestamp(floor(
# departure["departureTime"]/1000)) - now).total_seconds()) + (delay * 60)
# if seconds > 0:
# minutes = floor(seconds / 60)
# else:
# minutes = 0
# #departureTimeDisplay = "Jetzt"
# abfahrt_dict["minutes"] = minutes
# min_list.append(abfahrt_dict)
# if sev:
# destination += " SEV"
# else:
# abfahrt_dict["time"] = "X"
# abfahrt_dict["minutes"] = "X"
# abfahrt_dict["as_usual"] = False
# min_list.append(abfahrt_dict)
# #departureTimeDisplay = "X"
# if live:
# delay = str(delay) + "m"
# else:
# delay = "Not Live"
# return min_list
# def check_as_usual(self, time, direction):
# as_usual = False
# next_possible_departures = self.get_next_exptected_s8_times(direction)
# for next_possible_departure in next_possible_departures:
# #print(str(time) + " Possible: " + str(next_possible_departure) + "; Difference: " + str((next_possible_departure - time).total_seconds()))
# if (time - next_possible_departure).total_seconds() <= 100 and (time - next_possible_departure).total_seconds() >= 0:
# as_usual = True
# #print(str(time) + " Possible: " + str(next_possible_departure) + "; Difference: " + str((time - next_possible_departure).total_seconds()))
# return as_usual
# def get_next_exptected_s8_times(self, direction):
# now = dt.datetime.now()
# next_possible_departures = list()
# #print("Now: " + str(now))
# if now.hour == 23:
# add_delta = -23
# today = dt.date(now.year, now.month, now.day+1)
# else:
# add_delta = 1
# today = dt.date.today()
# for cur_end_station in self.s8_into_city_stations:
# if direction.find(cur_end_station) != -1:
# next_possible_departures = [dt.datetime.combine(today, dt.time(now.hour, 9)), dt.datetime.combine(today, dt.time(now.hour, 29)), dt.datetime.combine(today, dt.time(now.hour, 49)),
# dt.datetime.combine(today, dt.time(now.hour+add_delta, 9)), dt.datetime.combine(today, dt.time(now.hour+add_delta, 29)), dt.datetime.combine(today, dt.time(now.hour+add_delta, 49))]
# if (now.minute > 49 and now.minute <= 59) or (now.minute >= 0 and now.minute <= 9):
# next_possible_departures = next_possible_departures[3:]
# #next_possible_departures.append(dt.datetime.combine(today, dt.time(now.hour+1, 9)))
# if now.minute >= 0 and now.minute <= 9:
# pass
# #next_possible_departures.append(dt.datetime.combine(today, dt.time(now.hour, 9)))
# if now.minute > 9 and now.minute <= 29:
# next_possible_departures = next_possible_departures[1:4]
# #next_possible_departures.append(dt.datetime.combine(today, dt.time(now.hour, 29)))
# if now.minute > 29 and now.minute <= 49:
# next_possible_departures = next_possible_departures[2:5]
# #next_possible_departures.append(dt.datetime.combine(today, dt.time(now.hour, 49)))
# #print("dir: " + direction + " cur end station: " + cur_end_station)
# for cur_end_station in self.s8_to_airport_stations:
# if direction.find(cur_end_station) != -1:
# #print("dir: " + direction + " cur end station: " + cur_end_station)
# next_possible_departures = [dt.datetime.combine(today, dt.time(now.hour, 11)), dt.datetime.combine(today, dt.time(now.hour, 31)), dt.datetime.combine(today, dt.time(now.hour, 51)),
# dt.datetime.combine(today, dt.time(now.hour+add_delta, 11)), dt.datetime.combine(today, dt.time(now.hour+add_delta, 31)), dt.datetime.combine(today, dt.time(now.hour+add_delta, 51))]
# if (now.minute > 51 and now.minute <= 59) or (now.minute >= 0 and now.minute <= 11):
# next_possible_departures = next_possible_departures[3:]
# if now.minute > 11 and now.minute <= 31:
# next_possible_departures = next_possible_departures[1:4]
# if now.minute > 31 and now.minute <= 51:
# next_possible_departures = next_possible_departures[2:5]
# #print(str(next_possible_departures))
# return next_possible_departures
def get_adaptive_period(self):
now_time = dt.datetime.utcnow().time()
if now_time >= dtTime(2, 30) and now_time <= dtTime(5, 10):
return 57
else:
return 29
if __name__ == "__main__":
from departures import Departures
from line_manager import Line_Manager
from stations import s8
s8_city = Departures(s8.name, s8.into_city, s8.into_city_warning, s8.into_city_times)
s8_airport = Departures(s8.name, s8.to_airport_times, s8.to_airport_warning, s8.to_airport_times)
lines = Line_Manager([s8_city, s8_airport])
scraper = Scraper(lines)
scraper.get_data()