-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
97 lines (81 loc) · 4.11 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pdk
# Specify path to files
city_data_csv = "./data/city_data.csv"
ride_data_csv = "./data/ride_data.csv"
# read the data from the files into resp dataframes
city_data = pdk.read_csv(city_data_csv, low_memory=False)
ride_data = pdk.read_csv(ride_data_csv, low_memory=False)
# merge both dataframes on city column, & how='left' is to match the screenshot as mentioned in problem stmt
ride_city_merge = pdk.merge(ride_data, city_data, on="city", how="left")
print(ride_city_merge.head(5))
# -----------------Bubble Plot of Ride Sharing Data----------------------------------------------------------------
# Obtain the x and y coordinates for each of the three city types
# y-axis: Average Fare Per City
average_fare = ride_city_merge.groupby("city")["fare"].mean()
average_fare_df = pdk.DataFrame(average_fare).reset_index()
average_fare_df = average_fare_df.rename(columns = {'fare': 'avg_fare'})
# x-axis: Total Number of Rides Per City
total_rides = ride_city_merge.groupby("city")["ride_id"].count()
total_rides_df = pdk.DataFrame(total_rides).reset_index()
total_rides_df = total_rides_df.rename(columns = {'ride_id': 'total_rides'})
# calculate total Drivers
total_drivers = ride_city_merge[["city", "driver_count"]].drop_duplicates("city")
# get city Type
city_type = ride_city_merge[["city", "type"]].drop_duplicates("city")
final_df = pdk.merge(pdk.merge(pdk.merge(average_fare_df, total_rides_df, on="city"),
total_drivers, on="city"), city_type, on="city")
# Build the scatter plots for each city types
urban_group = final_df.loc[final_df['type'] == 'Urban']
suburban_group = final_df.loc[final_df['type'] == 'Suburban']
rural_group = final_df.loc[final_df['type'] == 'Rural']
# Incorporate the other graph properties
ax1 = urban_group.plot(kind='scatter',x='total_rides', y='avg_fare',
color='lightcoral', s=final_df['driver_count']*5, label = 'Urban',
alpha = 0.5, edgecolor = "black", linewidths = 1)
ax2 = suburban_group.plot(kind='scatter', x='total_rides', y='avg_fare',
color='lightskyblue',s=final_df['driver_count']*5, label = 'Suburban',
alpha = 0.5, edgecolor = "black", linewidths = 1, ax=ax1)
ax3 = rural_group.plot(kind='scatter', x='total_rides', y='avg_fare',
color='gold', s=final_df['driver_count']*5, label = 'Rural',
alpha = 0.5, edgecolor = "black", linewidths = 1, ax=ax1)
plt.title("Pyber Ride Sharing Data (2016)")
plt.xlabel("Total Number of Rides (Per City)")
plt.ylabel("Average Fare ($)")
# Incorporate a text label regarding circle size
textstr = 'Note:\nCircle size correlates with driver count per city.'
plt.xlim(0, 42)
plt.ylim(19, 45)
print(textstr)
plt.text(43, 32, textstr, fontsize=10)
plt.subplots_adjust(right=0.55)
# Create a legend
plt.legend(title = 'City Types')
plt.grid(True)
# Save Figure
plt.savefig("./data/01_Bubble_ride_share.png")
plt.show()
# ------------------Total Fares by City Type ------------------------------------------------------------
# Calculate Type Percents
total_fares = ride_city_merge["fare"].sum()
urban_fare = ride_city_merge.loc[ride_city_merge["type"] == "Urban", "fare"].sum()
rural_fare = ride_city_merge.loc[ride_city_merge["type"] == "Rural", "fare"].sum()
suburban_fare = ride_city_merge.loc[ride_city_merge["type"] == "Suburban", "fare"].sum()
urban_fare_p = round(urban_fare/total_fares, 2) *100
rural_fare_p = round(rural_fare/total_fares, 2) *100
suburban_fare_p = round(suburban_fare/total_fares, 2) *100
# Build Pie Chart
labels = 'Urban', 'Rural', 'Suburban'
sizes = [urban_fare_p, rural_fare_p, suburban_fare_p]
explode = (0.1, 0, 0)
fig1, ax1 = plt.subplots()
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
shadow=True, startangle=275, colors = ["lightcoral", "gold", "lightskyblue"])
ax1.axis('equal') # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title("% of Total Fares by City Type")
# Save Figure
plt.savefig("./data/02_Pie_city_type.png")
# Show Figure
plt.show()