Distribution of Client types by time
dfv7 %>%
group_by(member_casual) %>%
summarise(count = length(ride_id),
pct = length(ride_id)/nrow(dfv7)) %>%
ggplot(aes(x=member_casual, y =count, fill = member_casual))+
geom_col()+
scale_fill_manual(values = c("#B67B49", "#4984B6"))+
labs(title = "Proportion of client type",
subtitle = "Time period (Aug 21 - Jul 22)",
x = "Client Type",
y = "Count") +
scale_y_continuous(labels = comma)+
theme_economist()+
theme(legend.position = "none",
axis.title.y = element_text(vjust = 3),
axis.title = element_text(face = "bold", size = 12),
axis.text = element_text(face = "bold", size = 12),
title = element_text(face = "bold",size = 14),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "whitesmoke"),
plot.background = element_rect(fill = "whitesmoke"))+
annotate(geom = "text", x= 1.00, y =1200000, label = "2.2 M\n 40% ",
color = "white", size = 15, fontface = "bold")+
annotate(geom = "text", x = 2.03, y= 1700000, label = "3.3 M \n 60 %",
color = "white", size = 15, fontface = "bold")+
scale_x_discrete(labels=c("casual" = "Casual", "member" = "Member"))

table_1 <- dfv7 %>%
group_by(member_casual) %>%
summarise(Count = length(ride_id),Pct = Count/nrow(dfv7)*100) %>%
adorn_totals(where = "row", name = "Totals")
table_1 <- table_1 %>%
gt() %>%
tab_header(title = "Proportions of client type") %>%
cols_label(member_casual = "Client", Count = "Count", Pct = "%") %>%
tab_style(style = list(cell_fill(color = "#F4F4F4")),
locations = cells_body(columns = member_casual))
table_1
| Client |
Count |
% |
| casual |
2225158 |
40.02074 |
| member |
3334854 |
59.97926 |
| Totals |
5560012 |
100.00000 |
From a total of around 5.5 M rides over the past 12 months, this is
the client type split for the period Aug 21 to Jul 22. Casual clients
were at 40 % and Member clients 60 %.
#Reorder months
dfv7$month_year <- format(as.Date(dfv7$date),"%h-%y")
dfv7$month_year <- ordered(dfv7$month_year,levels=c ("Aug-21",
"Sep-21",
"Oct-21",
"Nov-21",
"Dec-21",
"Jan-22",
"Feb-22",
"Mar-22",
"Apr-22",
"May-22",
"Jun-22",
"Jul-22" ))
dfv7 <- dfv7 %>%
mutate(month_year_int = as.numeric(month_year))
dfv7 %>%
group_by(member_casual,month_year)%>%
summarise(count_month_year = length(ride_id),
PCT = count_month_year/nrow(dfv7)*100) %>%
ggplot(aes(x = month_year, y = count_month_year,
fill = member_casual, width = .9))+
geom_col(position = "dodge" )+
theme_economist()+
scale_fill_manual(values = c("#B67B49", "#4984B6"))+
labs(title = "Distribution of client type by month",
subtitle = "Time period (Aug 21 - Jul 22)",
x = " Month",
y = " Count")+
theme(legend.title = element_blank(),
axis.title = element_text(face = "bold", size = 11),
axis.text = element_text(face = "bold"),
axis.text.y = element_text(vjust = .2, hjust = 1),
axis.text.x = element_text(vjust = 2, hjust = .5),
title = element_text(face = "bold", size = 12),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "whitesmoke"),
plot.background = element_rect(fill = "whitesmoke"),
axis.title.x = element_text(vjust = -2),
axis.title.y = element_text(vjust = 3))+
scale_y_continuous(labels = comma)

casual_month <- dfv7%>%
group_by(month_year,member_casual)%>%
summarise(Count = length(ride_id),
PCT = Count/nrow(dfv7)*100) %>%
subset(member_casual=="casual")
member_month <- dfv7 %>%
group_by(month_year,member_casual)%>%
summarise(Count = length(ride_id),
PCT = Count/nrow(dfv7)*100) %>%
subset(member_casual=="member")
member_month <- member_month %>%
rename(count_member= Count, PCT_member = PCT, month = month_year) %>%
select(-c(member_casual))
casual_month<- casual_month %>%
rename(count_casual=Count, PCT_casual = PCT, month = month_year) %>%
select(-c(member_casual))
clients_month <- full_join(member_month,casual_month)
clients_month <- clients_month %>%
adorn_totals(where = "row", name = "Totals")%>%
gt()%>%
tab_header(title = "Distribution of client type by the month") %>%
cols_label(month = "Month",count_casual = "Casual", PCT_casual = "Casual %",
count_member = "Member", PCT_member= " Member %")%>%
tab_style(
style = list(cell_fill(color = "#F4F4F4")),
locations = cells_body(columns = month )
)
clients_month
| Month |
Member |
Member % |
Casual |
Casual % |
| Aug-21 |
386882 |
6.958294 |
355522 |
6.3942668 |
| Sep-21 |
387479 |
6.969032 |
318686 |
5.7317502 |
| Oct-21 |
369507 |
6.645795 |
228278 |
4.1057106 |
| Nov-21 |
250175 |
4.499541 |
97175 |
1.7477480 |
| Dec-21 |
175778 |
3.161468 |
63346 |
1.1393141 |
| Jan-22 |
84035 |
1.511418 |
16983 |
0.3054490 |
| Feb-22 |
92624 |
1.665896 |
19314 |
0.3473734 |
| Mar-22 |
191643 |
3.446809 |
78927 |
1.4195473 |
| Apr-22 |
241330 |
4.340458 |
110603 |
1.9892583 |
| May-22 |
349650 |
6.288655 |
245153 |
4.4092171 |
| Jun-22 |
394367 |
7.092916 |
327750 |
5.8947715 |
| Jul-22 |
411384 |
7.398977 |
363421 |
6.5363348 |
| Totals |
3334854 |
59.979259 |
2225158 |
40.0207410 |
Looking at plot 2 “Distribution of client type by month”, we can see
there is a predictable decline throughout the winter months for both
client types. We can observe the usage of members remain proportionally
higher than that of the casuals. We could hypothesize that this is the
result of the members clients using the bikes to get to work.
#Reordering days of week
dfv7$weekday <-ordered(dfv7$weekday,levels=c("Monday","Tuesday","Wednesday",
"Thursday","Friday","Saturday",
"Sunday"))
plot2 <-dfv7 %>%
group_by(weekday,member_casual)%>%
summarise(count=length(ride_id),
pct = (count/nrow(dfv7)*100))
plot2$pct <- paste(round(plot2$pct,0),"%")
# dfv7 %>%
# group_by(member_casual,weekday)%>%
# summarise(count=length(ride_id),
# pct = count/nrow(dfv7)*100)
#pct = count/nrow(dfv7)*100)
plot2 %>%
ggplot((aes(x=weekday,y=count, fill= member_casual, width = .9)))+
geom_bar(stat="identity", position = "dodge")+
scale_fill_manual(values = c("#B67B49", "#4984B6"))+
labs(title = "Distribution of client type per day of week",
subtitle = "Time period (Aug 21 - Jul 22)",
x = "Day of Week",
y = " Count")+
theme_economist()+
theme(legend.title = element_blank(),
axis.title.x = element_text(vjust = -2),
axis.title = element_text(face = "bold", size = 11),
axis.title.y = element_text(vjust = 3),
title = element_text(face = "bold",size = 12),
axis.text = element_text(face = "bold", size = 10),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "whitesmoke"),
plot.background = element_rect(fill = "whitesmoke"))+
scale_y_continuous(labels = comma)+
geom_text(aes(label = plot2$pct), position = position_dodge(0.8),hjust =.4, vjust = -0.2,
size = 4, color = "black", fontface= "bold")

dfv7$weekday <-ordered(dfv7$weekday,levels=c("Monday","Tuesday","Wednesday",
"Thursday","Friday","Saturday",
"Sunday"))
Casualv2 <- dfv7 %>%
group_by(weekday,member_casual) %>%
summarise(Count_casual = length(ride_id),
PCT_casual = Count_casual/nrow(dfv7)*100)%>%
subset(member_casual == "casual")
Casualv2 <- Casualv2 %>%
select(-c(member_casual))
Memberv2 <- dfv7 %>%
group_by(weekday,member_casual) %>%
summarise(Count_member = length(ride_id),
PCT_member = Count_member/nrow(dfv7)*100) %>%
subset(member_casual == "member")
Memberv2 <- Memberv2 %>%
select(-c(member_casual))
weekday_clients <- full_join(Casualv2,Memberv2)
weekday_clients<- weekday_clients %>%
adorn_totals(where = "row", name = "Totals") %>%
gt()%>%
tab_header(title = "Distribution of client type per day of week") %>%
cols_label(weekday = "Weekday",Count_casual = "Casual", PCT_casual = "Casual %",
Count_member = "Member", PCT_member= " Member %")%>%
tab_style(
style = list(cell_fill(color = "#F4F4F4")),
locations = cells_body(columns = weekday )
)
weekday_clients
| Weekday |
Casual |
Casual % |
Member |
Member % |
| Monday |
263032 |
4.730781 |
466454 |
8.389442 |
| Tuesday |
246600 |
4.435242 |
517047 |
9.299386 |
| Wednesday |
255259 |
4.590979 |
516049 |
9.281437 |
| Thursday |
285665 |
5.137849 |
516334 |
9.286563 |
| Friday |
310285 |
5.580653 |
460561 |
8.283453 |
| Saturday |
456117 |
8.203525 |
446757 |
8.035180 |
| Sunday |
408200 |
7.341711 |
411652 |
7.403797 |
| Totals |
2225158 |
40.020741 |
3334854 |
59.979259 |
Plot 3, “Distribution of client type per day of week” shows
distribution of both client types throughout the week. We can observe
relative stability of usage from the member clients on weekdays and a
drop in usage on the weekend. Causal clients show a trend of increased
usage though the week, peaking on the weekend. This might further
suggest that the member clients are using the service to commute to and
from work.
dfv7 %>%
group_by(member_casual, start_hour) %>%
summarise(count_hour = length(ride_id)) %>%
ggplot(aes(x=start_hour, y=count_hour, color = member_casual))+
geom_line( size = 3.5, alpha =.85) +
theme_economist()+
scale_color_manual(values = c("#B67B49", "#4984B6"))+
labs(title = "Distribution of clients by start hour",
subtitle = "Time period (Aug 21 - Jul 22)",
x = "Start Hour (24H Time)",
y = "Count")+
theme(legend.title = element_blank(),
axis.title = element_text(face = "bold"),
axis.text = element_text(face = "bold", size =13),
axis.text.x = element_text(size=13, vjust = 2),
axis.text.y = element_text(hjust =1.2, vjust = -.1),
title = element_text(face = "bold", size = 14),
axis.title.y = element_text(vjust = 3, size = 14),
axis.title.x = element_text(vjust = -2, size = 14),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "whitesmoke"),
plot.background = element_rect(fill = "whitesmoke"))+
scale_y_continuous(labels = comma)

Plot 4 is the “Distribution of both client types by start hour”. We
can see the highest period of activity for both member and casual
clients is between 3pm - 8pm. Member clients have a noticeable increase
relative to the casual clients at the time periods of 6am - 9am and
3pm-8pm.
Average ride times
average_ride_times_biketype <- dfv7 %>%
group_by(member_casual,rideable_type)%>%
summarize(ave_type=mean(ride_time))
average_ride_times_biketype$ave_type <- round(average_ride_times_biketype$ave_type, digits = 2)
ggplot(average_ride_times_biketype,aes(x=rideable_type,y = ave_type,
fill=member_casual))+
theme_economist()+
geom_col(position = "dodge")+
geom_text(aes(x = rideable_type, y = ave_type, label = ave_type, size = 14),
stat = "sum", position = position_dodge(.9),vjust=-.2, show.legend = F)+
labs(title = "Average ride time for client and bike types",
subtitle = "Time period (Aug 21 - Jul 22)",
x = "Bike Type",
y = "Average ride minutes")+
scale_fill_manual(values = c("#B67B49", "#4984B6"))+
theme(legend.title = element_blank(),
axis.title = element_text(face = "bold", size = 10),
title = element_text(face = "bold", size = 12),
axis.title.y = element_text(vjust = 3, size = 12, face = "bold"),
axis.title.x = element_text(size = 12),
axis.text.x = element_text(size = 10.5, face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "whitesmoke"),
plot.background = element_rect(fill = "whitesmoke"))+
scale_x_discrete(labels = c("classic_bike" = " Classic Bike", "electric_bike" = "Electric Bike"))

#table showing average ride times.
Plot 6 shows the average ride time for each client and bike type. We
can see a higher average ride time for casual clients compared with
member clients. Lets explore how this trend plays out by the month and
weekday .
dfv7$month_year <- format(as.Date(dfv7$date),"%h-%y")
dfv7$month_year <- ordered(dfv7$month_year,levels=c ("Jul-22",
"Jun-22",
"May-22",
"Apr-22",
"Mar-22",
"Feb-22",
"Jan-22",
"Dec-21",
"Nov-21",
"Oct-21",
"Sep-21",
"Aug-21" ))
month_year_ride <- dfv7 %>%
group_by(month_year, member_casual, rideable_type) %>%
summarize(count = n(),
ave = mean(ride_time))
ggplot(month_year_ride, mapping = aes(x = month_year, y = ave,
fill = member_casual))+
geom_col(position = "dodge")+
theme_economist()+
scale_fill_manual(values = c("#B67B49", "#4984B6"))+
labs(title = "classic")+
facet_wrap(~rideable_type)+
labs(title = "Average ride time by month for client and bike types",
subtitle = "Time period (Aug 21 - Jul - 22)",
y = "Average ride minutes",
x = "Month")+
theme(legend.title = element_blank(),
axis.title.y = element_text(vjust = 3,size = 13),
axis.title.x = element_text(vjust = -1),
title = element_text(face = "bold", size = 13),
axis.text = element_text(size = 12, face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "whitesmoke"),
plot.background = element_rect(fill = "whitesmoke"))+
coord_flip()

Plot 7 shows the average ride time in minutes for each month for both
client types. The chart on the left represents the classic bike, on the
right the electric bike.
#col
dfv7$weekday <-ordered(dfv7$weekday,levels=c("Sunday","Saturday","Friday",
"Thursday","Wednesday","Tuesday",
"Monday"))
dfv7 %>%
group_by(weekday,rideable_type,member_casual) %>%
summarize(ave_ride_week = mean(ride_time),
count_ = length(ride_id)) %>%
ggplot(mapping = aes(x = weekday, y = ave_ride_week,fill = member_casual))+
geom_col(position = "dodge")+
facet_wrap(~rideable_type)+
labs(title = "Average ride time by weekday for client and bike type",
subtitle = "Time period (Aug 21 - Jul 22)",
x="Weekday",
y="Average ride minutes")+
theme_economist()+
scale_fill_manual(values = c("#B67B49", "#4984B6"))+
theme(legend.title = element_blank(),
axis.title = element_text(face = "bold", size = 12),
title = element_text(face = "bold", size = 13),
axis.title.x = element_text(vjust = -1),
axis.title.y = element_text(size =13),
axis.text.y = element_text(hjust = 1.2),
axis.text = element_text(face = "bold"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_rect(fill = "whitesmoke"),
plot.background = element_rect(fill = "whitesmoke"))+
coord_flip()

Plot 8 shows the average ride time by weekday for both client and
bike types. We can see the average ride time for member clients
throughout the week is around 13 mins for both bike types, peaking
slightly on the weekend. Casual clients have a higher overall ride time
than that of members about 18 mins with average ride time on the classic
bike`s being the longest.
Geo Location
library(sp)
library(leaflet)
#Random Sample
random_sample <- sample_n(dfv7,100000)
random_sample$start_lat_r <- round(random_sample$start_lat, digits = 3)
random_sample$start_lon_r <- round(random_sample$start_lng, digits = 3)
#Subset of each client tyoe
ran_cas <- random_sample %>%
filter(member_casual == "casual")
ran_mem <- random_sample %>%
filter(member_casual == "member")
factpal <- colorFactor(topo.colors(2), random_sample$rideable_type)
#Leaflet
m <- leaflet() %>%
setView(lat = 41.888,lng = -87.629, zoom=11)%>%
addTiles(group = "OSM (default)")%>%
addProviderTiles(providers$Stamen.TonerLite, group = "Light") %>%
addProviderTiles(providers$CartoDB.DarkMatter, group = "Dark") %>%
addCircles(data = ran_cas, lng = ~start_lon_r,
lat = ~start_lat_r,
popup = ~paste("<h3> Info:</h3>","<b>Bike Type:</b>",
rideable_type,"<br>","<b>Ride Time:</b>",
ride_time, sep = " "),color = "#B67B49",opacity = .01,
radius = 150, group = "Casual",fillOpacity = .01,stroke = F) %>%
addCircles(data = ran_mem, lng = ~start_lon_r,
lat = ~start_lat_r,
popup = ~paste("<h3> Info:</h3>","<b>Bike Type:</b>",
rideable_type,"<br>","<b>Ride Time:</b>",
ride_time, sep = " "),color = "#4984B6",
opacity = .01, radius = 150,group = "Member",fillOpacity = .01,
stroke = F) %>%
addCircleMarkers(data = ran_cas, lng = ~start_lon_r,
lat = ~start_lat_r,
popup = ~paste("<h3> Info:</h3>","<b>Bike Type:</b>",
rideable_type,"<br>","<b>Ride Time:</b>",
ride_time, sep = " "),
clusterOptions = markerClusterOptions(), group = "Casual_Cluster")%>%
addCircleMarkers(data = ran_mem, lng = ~start_lon_r,
lat = ~start_lat_r,
popup = ~paste("<h3> Info:</h3>","<b>Bike Type:</b>",
rideable_type,"<br>","<b>Ride Time:</b>",
ride_time, sep = " "),
clusterOptions = markerClusterOptions(), group = "Member_Cluster") %>%
addCircleMarkers(data = random_sample,lng = ~start_lon_r,lat=~start_lat_r,
color = ~factpal(rideable_type) ,
popup = ~paste("<h3> Info:</h3>","<b>Bike Type:</b>",
rideable_type,"<br>","<b>Ride Time:</b>",
ride_time, sep = " "),
group = "Bike Type",opacity = 0.01,fillOpacity = 0.01, stroke = F) %>%
addLegend(pal = factpal, position = "bottomright", values= random_sample$rideable_type, group = "Bike Type")%>%
addLayersControl(overlayGroups = c("Casual","Casual_Cluster", "Member","Member_Cluster","Bike Type"),
baseGroups = c("OSM (default)", "Light", "Dark"),
options=layersControlOptions(collapsed = F)) %>%
hideGroup(c("Member", "Member_Cluster", "Bike Type"))
#Call map
m