Skip to content

Commit

Permalink
Leo+Arriva dataframes
Browse files Browse the repository at this point in the history
  • Loading branch information
michalkasparek committed Nov 8, 2024
1 parent 58c01ae commit 05a2f2a
Show file tree
Hide file tree
Showing 18 changed files with 173,276 additions and 111 deletions.
3 changes: 2 additions & 1 deletion 001_cd_scrapovani.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ def cd(odkud, kam, pocet_dni=0):
("České Budějovice", "Zürich"),
("Praha", "Záhřeb"),
("Praha", "Lublaň"),
("Praha", "Amsterdam")
("Praha", "Amsterdam"),
("Praha","Krakow")
]

random.shuffle(trasy_b)
Expand Down
2 changes: 1 addition & 1 deletion 004_rj_cisteni.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2762,7 +2762,7 @@
" kam = \"data\" \n",
" os.makedirs(kam, exist_ok=True)\n",
" den = []\n",
" rj = [y for y in os.listdir(f\"downloads/{x}\") if \"rj_\" in y] \n",
" rj = [y for y in os.listdir(f\"downloads/{x}\") if y[0:3] == \"rj_\"] \n",
" print(f\"{x}: {len(rj)}\")\n",
" for y in rj:\n",
" print(y)\n",
Expand Down
62 changes: 46 additions & 16 deletions 005_le_scrapovani.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,19 @@
from selenium.webdriver.common.keys import Keys
from pyvirtualdisplay import Display

def leo(odkud, kam, pocet_dni):
def leo(odkud1, odkud2, kam1, kam2, pocet_dni):

try:

display = Display(visible=0, size=(1920, 1080))
display.start()
driver = webdriver.Chrome()

# driver = webdriver.Chrome()

driver = webdriver.Firefox(executable_path="/usr/local/bin/geckodriver")

sablona = "https://www.leoexpress.com/cs/vysledky-vyhledavani?date=KDY_POJEDEME&from=MISTO_ODKUD1&fromCountry=&fromName=MISTO_ODKUD2&persons=%5B%7B%22adult%22%3A%5B%5D%2C%22combine%22%3A1%2C%22count%22%3A1%2C%22parentTariffs%22%3Anull%7D%5D&returnDate=&services=%5B%7B%22service_id%22%3A%223%22%2C%22count%22%3A0%7D%2C%7B%22service_id%22%3A%224%22%2C%22count%22%3A0%7D%5D&to=MISTO_KAM1&toCountry=&toName=MISTO_KAM2&toggleDiscounts=false"""
den = data(pocet_dni)
url = sablona.replace('KDY_POJEDEME',den).replace('MISTO_ODKUD1',odkud.upper()).replace('MISTO_ODKUD2',odkud).replace('MISTO_KAM1',kam.upper()).replace('MISTO_KAM2',kam)
url = sablona.replace('KDY_POJEDEME',den).replace('MISTO_ODKUD1',odkud1).replace('MISTO_ODKUD2',odkud2).replace('MISTO_KAM1',kam1).replace('MISTO_KAM2',kam2)

slozka = f"downloads/{datetime.now().strftime('%Y-%m-%d')}"
os.makedirs(slozka, exist_ok=True)
Expand All @@ -30,17 +32,41 @@ def leo(odkud, kam, pocet_dni):
wait = WebDriverWait(driver, 10)
sleep(random.randint(4, 6))

try:
driver.execute_script(
"""document.getElementById('CybotCookiebotDialog').style.display = 'none';"""
)
except:
pass

element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, f"//*[text()='Přímý spoj']"))
)
element.click()

try:

elements = driver.find_elements(By.XPATH, "//*[contains(text(), 'Přímý spoj')]")
for element in elements[1:]:
sleep(1)
element.click()

except:
pass

print("Dál už to nejde.")

with open(
os.path.join(
slozka,
f"le_{odkud}_{kam}_D{pocet_dni:02}_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.html",
f"le_{odkud2}_{kam2}_D{pocet_dni:02}_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.html",
),
"w+",
encoding="utf-8",
) as ven:
ven.write(driver.page_source)

print(f"Uloženo: {odkud}-{kam} {pocet_dni} D")
print(f"Uloženo: {odkud2}-{kam2} {pocet_dni} D")

except Exception as E:
print(E)
Expand All @@ -59,25 +85,29 @@ def leo(odkud, kam, pocet_dni):

def data(days):
future_date = date.today() + timedelta(days=days)
formatted_date = future_date.strftime("%d.%m.%Y") # Change "." to "-" for DD-MM-YYYY
formatted_date = future_date.strftime("%d.%m.%Y")
return formatted_date

odstup = [0, 1, 2, 3, 4]
odstup.append(random.randint(5, 7))
odstup.append(random.randint(8, 10))
odstup.append(random.randint(11, 15))
odstup = [0, 1, 2, 3]
odstup.append(random.randint(4, 7))
odstup.append(random.randint(8, 15))
odstup.append(random.randint(16, 30))
odstup.append(random.randint(31, 90))
odstup

trasy = [
('Praha','Ostrava'),
('Pardubice','Košice')
]
(['5457076','Praha'],['OSTRAVA','Ostrava'])
]

trasy_b = [(['5457076','Praha'],['5100028','Krak%C3%B3w%20G%C5%82%C3%B3wny']),
(['PARDUBICE','Pardubice'],['KOSICE','Ko%C5%A1ice'])]

random.shuffle(trasy)
random.shuffle(trasy_b)

trasy = trasy + [trasy_b[0]]

for t in trasy:
for o in odstup:
leo(t[0], t[1], o)
leo(t[1], t[0], o)
leo(t[0][0], t[0][1], t[1][0], t[1][1], o)
leo(t[1][0], t[1][1], t[0][0], t[0][1], o)
Loading

0 comments on commit 05a2f2a

Please sign in to comment.