From 87b2379523b746c9891281f8bb0a9bcbb1b2d491 Mon Sep 17 00:00:00 2001
From: "Li, Jiayi" <76764434+jiayili7@users.noreply.github.com>
Date: Sat, 23 Nov 2024 12:33:44 -0800
Subject: [PATCH] fix: comment out code for milestone 1 release
---
yellow_taxi_analysis.ipynb | 1001 +++++++++++++++++++++++++++++++++++-
yellow_taxi_analysis.pdf | Bin 169802 -> 2177156 bytes
2 files changed, 977 insertions(+), 24 deletions(-)
diff --git a/yellow_taxi_analysis.ipynb b/yellow_taxi_analysis.ipynb
index 1d86d1f..61b5e12 100644
--- a/yellow_taxi_analysis.ipynb
+++ b/yellow_taxi_analysis.ipynb
@@ -112,9 +112,208 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " VendorID | \n",
+ " tpep_pickup_datetime | \n",
+ " tpep_dropoff_datetime | \n",
+ " passenger_count | \n",
+ " trip_distance | \n",
+ " RatecodeID | \n",
+ " store_and_fwd_flag | \n",
+ " PULocationID | \n",
+ " DOLocationID | \n",
+ " payment_type | \n",
+ " fare_amount | \n",
+ " extra | \n",
+ " mta_tax | \n",
+ " tip_amount | \n",
+ " tolls_amount | \n",
+ " improvement_surcharge | \n",
+ " total_amount | \n",
+ " congestion_surcharge | \n",
+ " Airport_fee | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2688292 | \n",
+ " 2 | \n",
+ " 2024-01-30 17:40:12 | \n",
+ " 2024-01-30 17:47:05 | \n",
+ " 2.0 | \n",
+ " 0.78 | \n",
+ " 1.0 | \n",
+ " N | \n",
+ " 230 | \n",
+ " 186 | \n",
+ " 2 | \n",
+ " 7.90 | \n",
+ " 2.5 | \n",
+ " 0.5 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 14.40 | \n",
+ " 2.5 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2956481 | \n",
+ " 2 | \n",
+ " 2024-01-29 12:25:03 | \n",
+ " 2024-01-29 12:53:32 | \n",
+ " NaN | \n",
+ " 7.21 | \n",
+ " NaN | \n",
+ " None | \n",
+ " 236 | \n",
+ " 211 | \n",
+ " 0 | \n",
+ " 34.73 | \n",
+ " 0.0 | \n",
+ " 0.5 | \n",
+ " 0.00 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 38.73 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2203446 | \n",
+ " 2 | \n",
+ " 2024-01-25 16:21:38 | \n",
+ " 2024-01-25 16:36:41 | \n",
+ " 1.0 | \n",
+ " 2.21 | \n",
+ " 1.0 | \n",
+ " N | \n",
+ " 211 | \n",
+ " 68 | \n",
+ " 1 | \n",
+ " 15.60 | \n",
+ " 2.5 | \n",
+ " 0.5 | \n",
+ " 4.42 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 26.52 | \n",
+ " 2.5 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2436695 | \n",
+ " 2 | \n",
+ " 2024-01-27 20:06:28 | \n",
+ " 2024-01-27 20:41:05 | \n",
+ " 1.0 | \n",
+ " 4.85 | \n",
+ " 1.0 | \n",
+ " N | \n",
+ " 68 | \n",
+ " 236 | \n",
+ " 1 | \n",
+ " 32.40 | \n",
+ " 1.0 | \n",
+ " 0.5 | \n",
+ " 3.74 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 41.14 | \n",
+ " 2.5 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ " 2063582 | \n",
+ " 2 | \n",
+ " 2024-01-24 09:47:54 | \n",
+ " 2024-01-24 10:03:31 | \n",
+ " 1.0 | \n",
+ " 1.40 | \n",
+ " 1.0 | \n",
+ " N | \n",
+ " 161 | \n",
+ " 186 | \n",
+ " 1 | \n",
+ " 14.20 | \n",
+ " 0.0 | \n",
+ " 0.5 | \n",
+ " 3.64 | \n",
+ " 0.0 | \n",
+ " 1.0 | \n",
+ " 21.84 | \n",
+ " 2.5 | \n",
+ " 0.0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " VendorID tpep_pickup_datetime tpep_dropoff_datetime passenger_count \\\n",
+ "2688292 2 2024-01-30 17:40:12 2024-01-30 17:47:05 2.0 \n",
+ "2956481 2 2024-01-29 12:25:03 2024-01-29 12:53:32 NaN \n",
+ "2203446 2 2024-01-25 16:21:38 2024-01-25 16:36:41 1.0 \n",
+ "2436695 2 2024-01-27 20:06:28 2024-01-27 20:41:05 1.0 \n",
+ "2063582 2 2024-01-24 09:47:54 2024-01-24 10:03:31 1.0 \n",
+ "\n",
+ " trip_distance RatecodeID store_and_fwd_flag PULocationID \\\n",
+ "2688292 0.78 1.0 N 230 \n",
+ "2956481 7.21 NaN None 236 \n",
+ "2203446 2.21 1.0 N 211 \n",
+ "2436695 4.85 1.0 N 68 \n",
+ "2063582 1.40 1.0 N 161 \n",
+ "\n",
+ " DOLocationID payment_type fare_amount extra mta_tax tip_amount \\\n",
+ "2688292 186 2 7.90 2.5 0.5 0.00 \n",
+ "2956481 211 0 34.73 0.0 0.5 0.00 \n",
+ "2203446 68 1 15.60 2.5 0.5 4.42 \n",
+ "2436695 236 1 32.40 1.0 0.5 3.74 \n",
+ "2063582 186 1 14.20 0.0 0.5 3.64 \n",
+ "\n",
+ " tolls_amount improvement_surcharge total_amount \\\n",
+ "2688292 0.0 1.0 14.40 \n",
+ "2956481 0.0 1.0 38.73 \n",
+ "2203446 0.0 1.0 26.52 \n",
+ "2436695 0.0 1.0 41.14 \n",
+ "2063582 0.0 1.0 21.84 \n",
+ "\n",
+ " congestion_surcharge Airport_fee \n",
+ "2688292 2.5 0.0 \n",
+ "2956481 NaN NaN \n",
+ "2203446 2.5 0.0 \n",
+ "2436695 2.5 0.0 \n",
+ "2063582 2.5 0.0 "
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"import pandas as pd\n",
"import altair as alt\n",
@@ -138,7 +337,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
@@ -158,9 +357,280 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " VendorID | \n",
+ " tpep_pickup_datetime | \n",
+ " tpep_dropoff_datetime | \n",
+ " passenger_count | \n",
+ " trip_distance | \n",
+ " RatecodeID | \n",
+ " PULocationID | \n",
+ " DOLocationID | \n",
+ " payment_type | \n",
+ " fare_amount | \n",
+ " extra | \n",
+ " mta_tax | \n",
+ " tip_amount | \n",
+ " tolls_amount | \n",
+ " improvement_surcharge | \n",
+ " total_amount | \n",
+ " congestion_surcharge | \n",
+ " Airport_fee | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 21000.000000 | \n",
+ " 21000 | \n",
+ " 21000 | \n",
+ " 19973.000000 | \n",
+ " 21000.000000 | \n",
+ " 19973.000000 | \n",
+ " 21000.000000 | \n",
+ " 21000.000000 | \n",
+ " 21000.000000 | \n",
+ " 21000.000000 | \n",
+ " 21000.000000 | \n",
+ " 21000.000000 | \n",
+ " 21000.000000 | \n",
+ " 21000.000000 | \n",
+ " 21000.000000 | \n",
+ " 21000.000000 | \n",
+ " 19973.000000 | \n",
+ " 19973.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 1.752190 | \n",
+ " 2024-01-17 01:47:00.986333 | \n",
+ " 2024-01-17 02:02:19.647285 | \n",
+ " 1.333250 | \n",
+ " 3.205553 | \n",
+ " 2.033695 | \n",
+ " 165.763762 | \n",
+ " 165.775762 | \n",
+ " 1.169524 | \n",
+ " 18.099862 | \n",
+ " 1.453586 | \n",
+ " 0.483095 | \n",
+ " 3.318175 | \n",
+ " 0.521250 | \n",
+ " 0.973790 | \n",
+ " 26.693330 | \n",
+ " 2.251089 | \n",
+ " 0.142730 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 1.000000 | \n",
+ " 2024-01-01 00:06:10 | \n",
+ " 2024-01-01 00:10:03 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " -150.000000 | \n",
+ " -7.500000 | \n",
+ " -0.500000 | \n",
+ " 0.000000 | \n",
+ " -21.940000 | \n",
+ " -1.000000 | \n",
+ " -155.250000 | \n",
+ " -2.500000 | \n",
+ " -1.750000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 2.000000 | \n",
+ " 2024-01-09 17:18:02 | \n",
+ " 2024-01-09 17:37:35.750000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 132.000000 | \n",
+ " 114.000000 | \n",
+ " 1.000000 | \n",
+ " 8.600000 | \n",
+ " 0.000000 | \n",
+ " 0.500000 | \n",
+ " 0.932500 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 15.400000 | \n",
+ " 2.500000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 2.000000 | \n",
+ " 2024-01-17 11:00:56.500000 | \n",
+ " 2024-01-17 11:16:55 | \n",
+ " 1.000000 | \n",
+ " 1.680000 | \n",
+ " 1.000000 | \n",
+ " 162.000000 | \n",
+ " 163.000000 | \n",
+ " 1.000000 | \n",
+ " 12.800000 | \n",
+ " 1.000000 | \n",
+ " 0.500000 | \n",
+ " 2.720000 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 20.120000 | \n",
+ " 2.500000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 2.000000 | \n",
+ " 2024-01-24 18:57:54 | \n",
+ " 2024-01-24 19:12:33.250000 | \n",
+ " 1.000000 | \n",
+ " 3.090000 | \n",
+ " 1.000000 | \n",
+ " 234.000000 | \n",
+ " 235.000000 | \n",
+ " 1.000000 | \n",
+ " 19.985000 | \n",
+ " 2.500000 | \n",
+ " 0.500000 | \n",
+ " 4.140000 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 28.400000 | \n",
+ " 2.500000 | \n",
+ " 0.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 6.000000 | \n",
+ " 2024-01-31 23:56:14 | \n",
+ " 2024-02-01 00:15:56 | \n",
+ " 8.000000 | \n",
+ " 68.480000 | \n",
+ " 99.000000 | \n",
+ " 265.000000 | \n",
+ " 265.000000 | \n",
+ " 4.000000 | \n",
+ " 600.000000 | \n",
+ " 11.750000 | \n",
+ " 4.000000 | \n",
+ " 93.270000 | \n",
+ " 62.750000 | \n",
+ " 1.000000 | \n",
+ " 601.000000 | \n",
+ " 2.500000 | \n",
+ " 1.750000 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 0.433951 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 0.826226 | \n",
+ " 4.344470 | \n",
+ " 9.661687 | \n",
+ " 63.727982 | \n",
+ " 69.307081 | \n",
+ " 0.600034 | \n",
+ " 18.255949 | \n",
+ " 1.814640 | \n",
+ " 0.122477 | \n",
+ " 3.857285 | \n",
+ " 2.133357 | \n",
+ " 0.226317 | \n",
+ " 22.870276 | \n",
+ " 0.839181 | \n",
+ " 0.489739 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " VendorID tpep_pickup_datetime tpep_dropoff_datetime \\\n",
+ "count 21000.000000 21000 21000 \n",
+ "mean 1.752190 2024-01-17 01:47:00.986333 2024-01-17 02:02:19.647285 \n",
+ "min 1.000000 2024-01-01 00:06:10 2024-01-01 00:10:03 \n",
+ "25% 2.000000 2024-01-09 17:18:02 2024-01-09 17:37:35.750000 \n",
+ "50% 2.000000 2024-01-17 11:00:56.500000 2024-01-17 11:16:55 \n",
+ "75% 2.000000 2024-01-24 18:57:54 2024-01-24 19:12:33.250000 \n",
+ "max 6.000000 2024-01-31 23:56:14 2024-02-01 00:15:56 \n",
+ "std 0.433951 NaN NaN \n",
+ "\n",
+ " passenger_count trip_distance RatecodeID PULocationID \\\n",
+ "count 19973.000000 21000.000000 19973.000000 21000.000000 \n",
+ "mean 1.333250 3.205553 2.033695 165.763762 \n",
+ "min 0.000000 0.000000 1.000000 1.000000 \n",
+ "25% 1.000000 1.000000 1.000000 132.000000 \n",
+ "50% 1.000000 1.680000 1.000000 162.000000 \n",
+ "75% 1.000000 3.090000 1.000000 234.000000 \n",
+ "max 8.000000 68.480000 99.000000 265.000000 \n",
+ "std 0.826226 4.344470 9.661687 63.727982 \n",
+ "\n",
+ " DOLocationID payment_type fare_amount extra mta_tax \\\n",
+ "count 21000.000000 21000.000000 21000.000000 21000.000000 21000.000000 \n",
+ "mean 165.775762 1.169524 18.099862 1.453586 0.483095 \n",
+ "min 1.000000 0.000000 -150.000000 -7.500000 -0.500000 \n",
+ "25% 114.000000 1.000000 8.600000 0.000000 0.500000 \n",
+ "50% 163.000000 1.000000 12.800000 1.000000 0.500000 \n",
+ "75% 235.000000 1.000000 19.985000 2.500000 0.500000 \n",
+ "max 265.000000 4.000000 600.000000 11.750000 4.000000 \n",
+ "std 69.307081 0.600034 18.255949 1.814640 0.122477 \n",
+ "\n",
+ " tip_amount tolls_amount improvement_surcharge total_amount \\\n",
+ "count 21000.000000 21000.000000 21000.000000 21000.000000 \n",
+ "mean 3.318175 0.521250 0.973790 26.693330 \n",
+ "min 0.000000 -21.940000 -1.000000 -155.250000 \n",
+ "25% 0.932500 0.000000 1.000000 15.400000 \n",
+ "50% 2.720000 0.000000 1.000000 20.120000 \n",
+ "75% 4.140000 0.000000 1.000000 28.400000 \n",
+ "max 93.270000 62.750000 1.000000 601.000000 \n",
+ "std 3.857285 2.133357 0.226317 22.870276 \n",
+ "\n",
+ " congestion_surcharge Airport_fee \n",
+ "count 19973.000000 19973.000000 \n",
+ "mean 2.251089 0.142730 \n",
+ "min -2.500000 -1.750000 \n",
+ "25% 2.500000 0.000000 \n",
+ "50% 2.500000 0.000000 \n",
+ "75% 2.500000 0.000000 \n",
+ "max 2.500000 1.750000 \n",
+ "std 0.839181 0.489739 "
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"train_df.describe()"
]
@@ -181,9 +651,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "DataTransformerRegistry.enable('vegafusion')"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# !pip install \"vegafusion[embed]>=1.5.0\"\n",
"\n",
@@ -192,9 +673,88 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "\n",
+ "