Loaded Libraries

library(tidyverse)
library(skimr)
library(janitor)
library(scales)
library(ggpubr)
library(cowplot)

Assigned monthly data into variables for easier manipulation
Included only ride id, ride type, membership and trip times

jan <- subset(read_csv("202401-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
feb <- subset(read_csv("202402-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
mar <- subset(read_csv("202403-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
apr <- subset(read_csv("202404-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
may <- subset(read_csv("202405-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
jun <- subset(read_csv("202406-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
jul <- subset(read_csv("202407-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
aug <- subset(read_csv("202408-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
sep <- subset(read_csv("202409-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
oct <- subset(read_csv("202410-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
nov <- subset(read_csv("202411-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))
dec <- subset(read_csv("202412-divvy-tripdata.csv"), select = c(ride_id, rideable_type, started_at, ended_at, member_casual))

Cleaning and Processing

Stacked Months together into annual quarter periods

Q1_2024 <- bind_rows(jan, feb, mar)
Q2_2024 <- bind_rows(apr, may, jun)
Q3_2024 <- bind_rows(jul, aug, sep)
Q4_2024 <- bind_rows(oct, nov, dec)

Included a new column called ‘ride_length’ by calculating the difference between ‘ended_at’ and ‘started_at’

Q1_2024 <- Q1_2024 %>% mutate(ride_length = ended_at - started_at)
Q2_2024 <- Q2_2024 %>% mutate(ride_length = ended_at - started_at)
Q3_2024 <- Q3_2024 %>% mutate(ride_length = ended_at - started_at)
Q4_2024 <- Q4_2024 %>% mutate(ride_length = ended_at - started_at)

Included a new column called ‘day_of_week’ by assigning each trip to a day

Q1_2024 <- Q1_2024 %>% mutate(day_of_week = weekdays(Q1_2024$started_at))
Q2_2024 <- Q2_2024 %>% mutate(day_of_week = weekdays(Q2_2024$started_at))
Q3_2024 <- Q3_2024 %>% mutate(day_of_week = weekdays(Q3_2024$started_at))
Q4_2024 <- Q4_2024 %>% mutate(day_of_week = weekdays(Q4_2024$started_at))

Omitted any trips which included negative ride_lengths due to possible missinput

Q1_2024 <- Q1_2024 %>% filter(ride_length > 0)
Q2_2024 <- Q2_2024 %>% filter(ride_length > 0)
Q3_2024 <- Q3_2024 %>% filter(ride_length > 0)
Q4_2024 <- Q4_2024 %>% filter(ride_length > 0)

Converted ride_length from ‘difftime’ to ‘numeric’ to allow for conversion to minutes

Q1_2024$ride_length <- as.numeric(Q1_2024$ride_length)
Q2_2024$ride_length <- as.numeric(Q2_2024$ride_length)
Q3_2024$ride_length <- as.numeric(Q3_2024$ride_length)
Q4_2024$ride_length <- as.numeric(Q4_2024$ride_length)

Convert ‘ride_length’ to minutes for manageable scales

Q1_2024$ride_length <- round(Q1_2024$ride_length/60, 1)
Q2_2024$ride_length <- round(Q2_2024$ride_length/60, 1)
Q3_2024$ride_length <- round(Q3_2024$ride_length/60, 1)
Q4_2024$ride_length <- round(Q4_2024$ride_length/60, 1)

Data Visualisation

Difference in trip times

Plotted average ‘ride_lengths’ between casual and membered riders for each annual quarter of 2024
Q1

plt_Q1_time <- ggplot(Q1_2024, aes(x = member_casual, y = ride_length, fill=member_casual)) +
  geom_bar(stat = "summary", fun = "mean") +  # Calculate mean values
  labs(y = "Mean Ride Length (minutes)", x = "Membership Type", title = "Mean Ride Lengths Between Casual and Membered Riders Q1 2024") +
  theme_minimal() + theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10))

Q2

plt_Q2_time <- ggplot(Q2_2024, aes(x = member_casual, y = ride_length, fill=member_casual)) +
  geom_bar(stat = "summary", fun = "mean") +  # Calculate mean values
  labs(y = "Mean Ride Length (minutes)", x = "Membership Type", title = "Mean Ride Lengths Between Casual and Membered Riders Q2 2024") +
  theme_minimal() + theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10))

Q3

plt_Q3_time <- ggplot(Q3_2024, aes(x = member_casual, y = ride_length, fill=member_casual)) +
  geom_bar(stat = "summary", fun = "mean") +  # Calculate mean values
  labs(y = "Mean Ride Length (minutes)", x = "Membership Type", title = "Mean Ride Lengths Between Casual and Membered Riders Q3 2024") +
  theme_minimal() + theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10))

Q4

plt_Q4_time <- ggplot(Q4_2024, aes(x = member_casual, y = ride_length, fill=member_casual)) +
  geom_bar(stat = "summary", fun = "mean") +  # Calculate mean values
  labs(y = "Mean Ride Length (minutes)", x = "Membership Type", title = "Mean Ride Lengths Between Casual and Membered Riders Q4 2024") +
  theme_minimal() + theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10))

Arranged all plots in a 2x2 grid

plt_time_arrange <- ggarrange(plt_Q1_time, plt_Q2_time, plt_Q3_time, plt_Q4_time, ncol=2, nrow=2, widths=c(1.2, 1.2))
plt_time_arrange

Difference in Classic vs. Electric Bike usage

Next plotted how casual riders and members differed in transportation methods (classic, electric, electric_scooter)

colors_bike <- c("classic_bike" = "#1b9e77",
            "electric_bike" = "#d95f02",
            "electric_scooter" = "#7570b3"
)

Q1

plt_Q1_ride <- ggplot(Q1_2024, aes(x=member_casual, fill = rideable_type)) +
  geom_bar(stat = "count", position="stack") + 
  scale_y_continuous(labels=comma) + 
  labs(y="Count", x="Membership Type", title="Proportion of Ride Types Between Casual and Membered Riders Q1 2024") +
  theme_minimal() + 
  theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10)) + 
  scale_fill_manual(values = colors_bike)

Q2

plt_Q2_ride <- ggplot(Q2_2024, aes(x=member_casual,  fill = rideable_type)) + 
  geom_bar(stat = "count", position="stack") + 
  scale_y_continuous(labels=comma) + 
  labs(y="Count", x="Membership Type", title="Proportion of Ride Types Between Casual and Membered Riders Q2 2024") + 
  theme_minimal() + theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10)) + 
  scale_fill_manual(values = colors_bike)

Q3

plt_Q3_ride <- ggplot(Q3_2024, aes(x=member_casual,  fill = rideable_type)) + 
  geom_bar(stat = "count", position="stack") + 
  scale_y_continuous(labels=comma) + 
  labs(y="Count", x="Membership Type", title="Proportion of Ride Types Between Casual and Membered Riders Q3 2024") + 
  theme_minimal() + 
  theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10)) + 
  scale_fill_manual(values = colors_bike)

Q4

plt_Q4_ride <- ggplot(Q4_2024, aes(x=member_casual,  fill = rideable_type)) + 
  geom_bar(stat = "count", position="stack") + 
  scale_y_continuous(labels=comma) + 
  labs(y="Count", x="Membership Type", title="Proportion of Ride Types Between Casual and Membered Riders Q4 2024") + 
  theme_minimal() + 
  theme(plot.title = element_text(size=11), plot.margin = margin(10,10,10,10)) + 
  scale_fill_manual(values = colors_bike)

Arranged all plots in a 2x2 grid

plt_ride_arrange <- ggarrange(plt_Q1_ride, plt_Q2_ride, plt_Q3_ride, plt_Q4_ride, ncol=2, nrow=2, legend = "right")
plt_ride_arrange

Difference via day of week

Next plotted on which days of the week do members and casual riders differ
Define weekday order:

weekday_order <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")

Q1

plt_Q1_day <- ggplot(Q1_2024, aes(x = factor(day_of_week, levels = weekday_order), fill = member_casual)) +
  geom_bar(position = "dodge") +
  scale_y_continuous(labels=comma) +
  labs(title = "No. Rides per day Q1 2024", x = "Day of Week", y = "Number of Rides") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Q2

plt_Q2_day <- ggplot(Q2_2024, aes(x = factor(day_of_week, levels = weekday_order), fill = member_casual)) +
  geom_bar(position = "dodge") +
  scale_y_continuous(labels=comma) +
  labs(title = "No. Rides per day Q2 2024", x = "Day of Week", y = "Number of Rides") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Q3

plt_Q3_day <- ggplot(Q3_2024, aes(x = factor(day_of_week, levels = weekday_order), fill = member_casual)) +
  geom_bar(position = "dodge") +
  scale_y_continuous(labels=comma) +
  labs(title = "No. Rides per day Q3 2024", x = "Day of Week", y = "Number of Rides") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Q4

plt_Q4_day <- ggplot(Q4_2024, aes(x = factor(day_of_week, levels = weekday_order), fill = member_casual)) +
  geom_bar(position = "dodge") +
  scale_y_continuous(labels=comma) +
  labs(title = "No. Rides per day Q4 2024", x = "Day of Week", y = "Number of Rides") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Arranged all plots in a 2x2 grid

plt_day_arrange <- ggarrange(
  plt_Q1_day, plt_Q2_day, plt_Q3_day, plt_Q4_day,
  ncol = 2, nrow = 2,
  common.legend = TRUE, legend = "right"
)
plt_day_arrange