-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathapp.py
165 lines (131 loc) · 6.08 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
import pandas as pd
import plotly.express as px
import numpy as np
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
data = pd.read_csv("dynamic_pricing.csv")
# Calculate demand_multiplier based on percentile for high and low demand
high_demand_percentile = 75
low_demand_percentile = 25
data['demand_multiplier'] = np.where(data['Number_of_Riders'] > np.percentile(data['Number_of_Riders'], high_demand_percentile),
data['Number_of_Riders'] / np.percentile(data['Number_of_Riders'], high_demand_percentile),
data['Number_of_Riders'] / np.percentile(data['Number_of_Riders'], low_demand_percentile))
# Calculate supply_multiplier based on percentile for high and low supply
high_supply_percentile = 75
low_supply_percentile = 25
data['supply_multiplier'] = np.where(data['Number_of_Drivers'] > np.percentile(data['Number_of_Drivers'], low_supply_percentile),
np.percentile(data['Number_of_Drivers'], high_supply_percentile) / data['Number_of_Drivers'],
np.percentile(data['Number_of_Drivers'], low_supply_percentile) / data['Number_of_Drivers'])
# Define price adjustment factors for high and low demand/supply
demand_threshold_high = 1.2 # Higher demand threshold
demand_threshold_low = 0.8 # Lower demand threshold
supply_threshold_high = 0.8 # Higher supply threshold
supply_threshold_low = 1.2 # Lower supply threshold
# Calculate adjusted_ride_cost for dynamic pricing
data['adjusted_ride_cost'] = data['Historical_Cost_of_Ride'] * (
np.maximum(data['demand_multiplier'], demand_threshold_low) *
np.maximum(data['supply_multiplier'], supply_threshold_high)
)
# Calculate the profit percentage for each ride
data['profit_percentage'] = ((data['adjusted_ride_cost'] - data['Historical_Cost_of_Ride']) / data['Historical_Cost_of_Ride']) * 100
# Identify profitable rides where profit percentage is positive
profitable_rides = data[data['profit_percentage'] > 0]
# Identify loss rides where profit percentage is negative
loss_rides = data[data['profit_percentage'] < 0]
# Calculate the count of profitable and loss rides
profitable_count = len(profitable_rides)
loss_count = len(loss_rides)
# Create a donut chart to show the distribution of profitable and loss rides
labels = ['Profitable Rides', 'Loss Rides']
values = [profitable_count, loss_count]
def data_preprocessing_pipeline(data):
#Identify numeric and categorical features
numeric_features = data.select_dtypes(include=['float', 'int']).columns
categorical_features = data.select_dtypes(include=['object']).columns
#Handle missing values in numeric features
data[numeric_features] = data[numeric_features].fillna(data[numeric_features].mean())
#Detect and handle outliers in numeric features using IQR
for feature in numeric_features:
Q1 = data[feature].quantile(0.25)
Q3 = data[feature].quantile(0.75)
IQR = Q3 - Q1
lower_bound = Q1 - (1.5 * IQR)
upper_bound = Q3 + (1.5 * IQR)
data[feature] = np.where((data[feature] < lower_bound) | (data[feature] > upper_bound),
data[feature].mean(), data[feature])
#Handle missing values in categorical features
data[categorical_features] = data[categorical_features].fillna(data[categorical_features].mode().iloc[0])
return data
data["Vehicle_Type"] = data["Vehicle_Type"].map({"Premium": 1,
"Economy": 0})
#splitting data
from sklearn.model_selection import train_test_split
x = np.array(data[["Number_of_Riders",
"Number_of_Drivers",
"Vehicle_Type",
"Expected_Ride_Duration"]])
y = np.array(data[["adjusted_ride_cost"]])
x_train, x_test, y_train, y_test = train_test_split(x,
y,
test_size=0.2,
random_state=42)
# Reshape y to 1D array
y_train = y_train.ravel()
y_test = y_test.ravel()
# Training a random forest regression model
model = RandomForestRegressor()
model.fit(x_train, y_train)
def get_vehicle_type_numeric(vehicle_type):
vehicle_type_mapping = {
"Premium": 1,
"Economy": 0
}
vehicle_type_numeric = vehicle_type_mapping.get(vehicle_type)
return vehicle_type_numeric
# Predicting using user input values
def predict_price(number_of_riders, number_of_drivers, vehicle_type, Expected_Ride_Duration):
vehicle_type_numeric = get_vehicle_type_numeric(vehicle_type)
if vehicle_type_numeric is None:
raise ValueError("Invalid vehicle type")
input_data = np.array([[number_of_riders,
number_of_drivers,
vehicle_type_numeric,
Expected_Ride_Duration]])
predicted_price = model.predict(input_data)
return predicted_price
# Example prediction using user input values
user_number_of_riders = 50
user_number_of_drivers = 25
user_vehicle_type = "Economy"
Expected_Ride_Duration = 30
predicted_price = predict_price(user_number_of_riders,
user_number_of_drivers,
user_vehicle_type,
Expected_Ride_Duration)
print("Predicted price:", predicted_price)
# Predict on the test set
y_pred = model.predict(x_test)
# Create a scatter plot with actual vs predicted values
fig = go.Figure()
fig.add_trace(go.Scatter(
x=y_test.flatten(),
y=y_pred,
mode='markers',
name='Actual vs Predicted'
))
# Add a line representing the ideal case
fig.add_trace(go.Scatter(
x=[min(y_test.flatten()), max(y_test.flatten())],
y=[min(y_test.flatten()), max(y_test.flatten())],
mode='lines',
name='Ideal',
line=dict(color='red', dash='dash')
))
fig.update_layout(
title='Actual vs Predicted Values',
xaxis_title='Actual Values',
yaxis_title='Predicted Values',
showlegend=True,
)
fig.show()