Skip to content

Commit

Permalink
finální update 4. 12. 2024
Browse files Browse the repository at this point in the history
  • Loading branch information
michalkasparek committed Dec 4, 2024
1 parent 37614da commit bc4aa57
Show file tree
Hide file tree
Showing 10 changed files with 272,021 additions and 286,078 deletions.
12 changes: 6 additions & 6 deletions 002_cd_cisteni.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 1,
"id": "e27c863b-fd97-4b32-b4b3-0825cb67f49e",
"metadata": {},
"outputs": [],
Expand All @@ -15,7 +15,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 2,
"id": "ad771ad2-9650-42bf-a22b-b9249d883ccd",
"metadata": {},
"outputs": [],
Expand All @@ -30,7 +30,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 3,
"id": "4d3db07f-3600-48ac-b158-70ea455b365a",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -116,7 +116,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 4,
"id": "e297e728-2198-4215-af23-6cf12ab2802f",
"metadata": {
"scrolled": true
Expand All @@ -126,8 +126,8 @@
"name": "stdout",
"output_type": "stream",
"text": [
"cd_2024-11-26.parquet\n",
"2024-11-26: 84\n"
"cd_2024-12-01.parquet\n",
"2024-12-01: 825\n"
]
}
],
Expand Down
8 changes: 4 additions & 4 deletions 004_rj_cisteni.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 1,
"id": "dd9891c3-234b-46f1-8aa5-b37b77890ab0",
"metadata": {},
"outputs": [],
Expand All @@ -15,7 +15,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 2,
"id": "6bf4b9fb-fdf5-40e9-a492-9cfc761a1ec4",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -79,7 +79,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 3,
"id": "4001b228-a1f4-40d8-97b8-e8152ce730dc",
"metadata": {},
"outputs": [
Expand All @@ -93,7 +93,7 @@
"2024-11-03: 0\n",
"2024-11-04: 0\n",
"2024-11-05: 0\n",
"2024-11-26: 673\n"
"2024-12-01: 199\n"
]
}
],
Expand Down
8 changes: 4 additions & 4 deletions 006_le_cisteni.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 1,
"id": "b0a1dca1-f4fa-4174-aa26-f2b8cbb5ea79",
"metadata": {},
"outputs": [],
Expand All @@ -15,7 +15,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 2,
"id": "dcfe8419-4463-470c-aed0-b72de890da5e",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -103,15 +103,15 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 3,
"id": "ace7b62e-9cf2-4268-aac8-29a94271508f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2024-11-26: 254\n"
"2024-12-01: 291\n"
]
}
],
Expand Down
23 changes: 6 additions & 17 deletions 008_ar_cisteni.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 1,
"id": "052ba83e-db70-4a7d-bb1d-df25d257950c",
"metadata": {},
"outputs": [],
Expand All @@ -15,21 +15,10 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 2,
"id": "b595d7f5-6aab-41dd-8a96-32bbef2c4b90",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"<>:11: SyntaxWarning: invalid escape sequence '\\d'\n",
"<>:11: SyntaxWarning: invalid escape sequence '\\d'\n",
"C:\\Users\\micha\\AppData\\Local\\Temp\\ipykernel_15272\\1036985530.py:11: SyntaxWarning: invalid escape sequence '\\d'\n",
" datum = re.search(\"\\d{1,2}\\.\\s\\w{4,15}\\s20\\d{2}\", radek).group(0)\n"
]
}
],
"outputs": [],
"source": [
"def oscrapuj_ar(slozka, soubor):\n",
" with open(os.path.join(slozka,soubor), \"r\", encoding=\"utf-8\") as spojeni:\n",
Expand All @@ -41,7 +30,7 @@
" for index, radek in enumerate(spojeni.split(\">\")):\n",
" if len(radek) < 1000:\n",
" if re.search(r\"\\d{1,2}\\.\\s\\w{4,15}\\s20\\d{2}\", radek):\n",
" datum = re.search(\"\\d{1,2}\\.\\s\\w{4,15}\\s20\\d{2}\", radek).group(0)\n",
" datum = re.search(r\"\\d{1,2}\\.\\s\\w{4,15}\\s20\\d{2}\", radek).group(0)\n",
" datum = datum.replace(\"leden\",\"1.\").replace(\"únor\",\"2.\").replace(\"březen\",\"3.\").replace(\"duben\",\"4.\").replace(\"květen\",\"5.\").replace(\"červen\",\"6.\").replace('červenec','7.').replace('srpen','8.').replace('září','9.').replace('říjen','10.').replace('listopad','11.').replace('prosinec','12.').replace(' ','')\n",
" elif ('class=\"departure\"' in radek) or ('</html' in radek):\n",
" if spoj:\n",
Expand Down Expand Up @@ -81,7 +70,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 3,
"id": "6e76aab9-797d-4448-9284-a6db0dea9b71",
"metadata": {},
"outputs": [
Expand All @@ -96,7 +85,7 @@
"2024-11-04: 0\n",
"2024-11-05: 0\n",
"2024-11-06: 0\n",
"2024-11-26: 659\n"
"2024-12-01: 686\n"
]
}
],
Expand Down
Loading

0 comments on commit bc4aa57

Please sign in to comment.