library(tidyverse)
library(skimr)
library(janitor)
library(scales)
library(ggpubr)
library(cowplot)
Assigned monthly data into variables for easier manipulation
Included only ride id, ride type, membership and trip times
jan <- subset(read_csv("202401-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
feb <- subset(read_csv("202402-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
mar <- subset(read_csv("202403-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
apr <- subset(read_csv("202404-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
may <- subset(read_csv("202405-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
jun <- subset(read_csv("202406-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
jul <- subset(read_csv("202407-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
aug <- subset(read_csv("202408-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
sep <- subset(read_csv("202409-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
oct <- subset(read_csv("202410-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
nov <- subset(read_csv("202411-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
dec <- subset(read_csv("202412-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
Stacked Months together into annual quarter periods
Q1_2024 <- bind_rows(jan, feb, mar)
Q2_2024 <- bind_rows(apr, may, jun)
Q3_2024 <- bind_rows(jul, aug, sep)
Q4_2024 <- bind_rows(oct, nov, dec)
Included a new column called ‘ride_length’ by calculating the difference between ‘ended_at’ and ‘started_at’
Q1_2024 <- Q1_2024 %>% mutate(ride_length = ended_at - started_at)
Q2_2024 <- Q2_2024 %>% mutate(ride_length = ended_at - started_at)
Q3_2024 <- Q3_2024 %>% mutate(ride_length = ended_at - started_at)
Q4_2024 <- Q4_2024 %>% mutate(ride_length = ended_at - started_at)
Included a new column called ‘day_of_week’ by assigning each trip to a day
Q1_2024 <- Q1_2024 %>% mutate(day_of_week = weekdays(Q1_2024$started_at))
Q2_2024 <- Q2_2024 %>% mutate(day_of_week = weekdays(Q2_2024$started_at))
Q3_2024 <- Q3_2024 %>% mutate(day_of_week = weekdays(Q3_2024$started_at))
Q4_2024 <- Q4_2024 %>% mutate(day_of_week = weekdays(Q4_2024$started_at))
Omitted any trips which included negative ride_lengths due to possible missinput
Q1_2024 <- Q1_2024 %>% filter(ride_length > 0)
Q2_2024 <- Q2_2024 %>% filter(ride_length > 0)
Q3_2024 <- Q3_2024 %>% filter(ride_length > 0)
Q4_2024 <- Q4_2024 %>% filter(ride_length > 0)
Converted ride_length from ‘difftime’ to ‘numeric’ to allow for conversion to minutes
Q1_2024$ride_length <- as.numeric(Q1_2024$ride_length)
Q2_2024$ride_length <- as.numeric(Q2_2024$ride_length)
Q3_2024$ride_length <- as.numeric(Q3_2024$ride_length)
Q4_2024$ride_length <- as.numeric(Q4_2024$ride_length)
Convert ‘ride_length’ to minutes for manageable scales
Q1_2024$ride_length <- round(Q1_2024$ride_length/60, 1)
Q2_2024$ride_length <- round(Q2_2024$ride_length/60, 1)
Q3_2024$ride_length <- round(Q3_2024$ride_length/60, 1)
Q4_2024$ride_length <- round(Q4_2024$ride_length/60, 1)
Plotted average ‘ride_lengths’ between casual and membered riders for
each annual quarter of 2024
Q1
plt_Q1_time <- ggplot(Q1_2024, aes(x = member_casual, y = ride_length)) +
geom_bar(stat = "summary", fun = "mean") + # Calculate mean values
labs(y = "Mean Ride Length (minutes)", x = "Membership Type", title = "Mean Ride Lengths Between Casual and Membered Riders Q1 2024") +
theme_minimal() + theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10))
Q2
plt_Q2_time <- ggplot(Q2_2024, aes(x = member_casual, y = ride_length)) +
geom_bar(stat = "summary", fun = "mean") + # Calculate mean values
labs(y = "Mean Ride Length (minutes)", x = "Membership Type", title = "Mean Ride Lengths Between Casual and Membered Riders Q2 2024") +
theme_minimal() + theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10))
Q3
plt_Q3_time <- ggplot(Q3_2024, aes(x = member_casual, y = ride_length)) +
geom_bar(stat = "summary", fun = "mean") + # Calculate mean values
labs(y = "Mean Ride Length (minutes)", x = "Membership Type", title = "Mean Ride Lengths Between Casual and Membered Riders Q3 2024") +
theme_minimal() + theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10))
Q4
plt_Q4_time <- ggplot(Q4_2024, aes(x = member_casual, y = ride_length)) +
geom_bar(stat = "summary", fun = "mean") + # Calculate mean values
labs(y = "Mean Ride Length (minutes)", x = "Membership Type", title = "Mean Ride Lengths Between Casual and Membered Riders Q4 2024") +
theme_minimal() + theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10))
Arranged all plots in a 2x2 grid
ggarrange(plt_Q1_time, plt_Q2_time, plt_Q3_time, plt_Q4_time, ncol=2, nrow=2, widths=c(1.2, 1.2))
Next plotted how casual riders and members differed in transportation methods (classic, electric, electric_scooter)
colors <- c("classic_bike" = "#1b9e77",
"electric_bike" = "#d95f02",
"electric_scooter" = "#7570b3"
)
Q1
plt_Q1_ride <- ggplot(Q1_2024, aes(x=member_casual, fill = rideable_type)) +
geom_bar(stat = "count", position="stack") +
scale_y_continuous(labels=comma) +
labs(y="Count", x="Membership Type", title="Proportion of Ride Types Between Casual and Membered Riders Q1 2024") +
theme_minimal() +
theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10)) +
scale_fill_manual(values = colors)
Q2
plt_Q2_ride <- ggplot(Q2_2024, aes(x=member_casual, fill = rideable_type)) +
geom_bar(stat = "count", position="stack") +
scale_y_continuous(labels=comma) +
labs(y="Count", x="Membership Type", title="Proportion of Ride Types Between Casual and Membered Riders Q2 2024") +
theme_minimal() + theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10)) +
scale_fill_manual(values = colors)
Q3
plt_Q3_ride <- ggplot(Q3_2024, aes(x=member_casual, fill = rideable_type)) +
geom_bar(stat = "count", position="stack") +
scale_y_continuous(labels=comma) +
labs(y="Count", x="Membership Type", title="Proportion of Ride Types Between Casual and Membered Riders Q3 2024") +
theme_minimal() +
theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10)) +
scale_fill_manual(values = colors)
Q4
plt_Q4_ride <- ggplot(Q4_2024, aes(x=member_casual, fill = rideable_type)) +
geom_bar(stat = "count", position="stack") +
scale_y_continuous(labels=comma) +
labs(y="Count", x="Membership Type", title="Proportion of Ride Types Between Casual and Membered Riders Q4 2024") +
theme_minimal() +
theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10)) +
scale_fill_manual(values = colors)
Arranged all plots in a 2x2 grid
ggarrange(plt_Q1_ride, plt_Q2_ride, plt_Q3_ride, plt_Q4_ride, ncol=2, nrow=2, legend = "right")
Next plotted on which days of the week do members and casual riders
differ
Define weekday order:
weekday_order <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
Q1
plt_Q1_day <- ggplot(Q1_2024, aes(x = factor(day_of_week, levels = weekday_order), fill = member_casual)) +
geom_bar(position = "dodge") +
labs(title = "Q1 2024", x = "Day of Week", y = "Number of Rides") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Q2
plt_Q2_day <- ggplot(Q2_2024, aes(x = factor(day_of_week, levels = weekday_order), fill = member_casual)) +
geom_bar(position = "dodge") +
labs(title = "Q2 2024", x = "Day of Week", y = "Number of Rides") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Q3
plt_Q3_day <- ggplot(Q3_2024, aes(x = factor(day_of_week, levels = weekday_order), fill = member_casual)) +
geom_bar(position = "dodge") +
labs(title = "Q3 2024", x = "Day of Week", y = "Number of Rides") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Q4
plt_Q4_day <- ggplot(Q4_2024, aes(x = factor(day_of_week, levels = weekday_order), fill = member_casual)) +
geom_bar(position = "dodge") +
labs(title = "Q4 2024", x = "Day of Week", y = "Number of Rides") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Arranged all plots in a 2x2 grid
ggarrange(
plt_Q1_day, plt_Q2_day, plt_Q3_day, plt_Q4_day,
ncol = 2, nrow = 2,
common.legend = TRUE, legend = "right"
)