N.B. * In the data, members are referred to as a “Subscriber” and casual riders are referred to as a “Customer”.
Summarize mean_trip_duration and group by user_type
bike_usage_usertype_avg <- bike_trips_2019_2020 %>%
group_by(user_type) %>%
drop_na() %>%
summarize(mean_trip_duration = mean(trip_duration), .groups = "drop")
bike_usage_usertype_avg %>%
rename(
User_Type = user_type,
Mean_Trip_Duration = mean_trip_duration
) %>%
kable(caption = "Table 1: Mean Trip Duration by User Type") %>%
kable_styling(full_width = FALSE) %>%
scroll_box(width = "100%")
| User_Type | Mean_Trip_Duration |
|---|---|
| Customer | 1822.938 |
| Subscriber | 659.095 |
ggplot(bike_usage_usertype_avg, aes(x = user_type, y = mean_trip_duration / 60, fill = user_type)) +
geom_col(width = 0.6) +
labs(title = "Graph 1: Average Trip Duration by User Type",
x = "User Type", y = "Mean Trip Duration (minutes)") +
scale_fill_manual(values = c("Customer" = "#E69F00", "Subscriber" = "#56B4E9")) +
theme_minimal()
Key Takeaway:
Possible reasons:
Summarize mean_trip_duration and group by user_type and year to compare
bike_usage_usertype_year_avg <- bike_trips_2019_2020_v2 %>%
group_by(user_type, year) %>%
drop_na() %>%
summarize(mean_trip_duration = mean(trip_duration), .groups = "drop")
bike_usage_usertype_year_avg %>%
rename(
User_Type = user_type,
Year = year,
Mean_Trip_Duration = mean_trip_duration
) %>%
kable(caption = "Table 2: Mean Trip Duration by User Type and Year") %>%
kable_styling(full_width = FALSE) %>%
scroll_box(width = "100%")
| User_Type | Year | Mean_Trip_Duration |
|---|---|---|
| Customer | 2019 | 1911.8329 |
| Customer | 2020 | 1780.0174 |
| Subscriber | 2019 | 648.7644 |
| Subscriber | 2020 | 668.4237 |
bike_usage_usertype_year_avg$year <- as.factor(bike_usage_usertype_year_avg$year)
ggplot(bike_usage_usertype_year_avg, aes(x = year, y = mean_trip_duration / 60, fill = user_type)) +
geom_col(position = "dodge") +
labs(title = "Graph 2: Average Trip Duration by Year and User Type",
x = "Year", y = "Mean Trip Duration (minutes)") +
scale_fill_manual(values = c("Customer" = "#E69F00", "Subscriber" = "#56B4E9")) +
theme_minimal()
Key Takeaways:
Count number of rides for each day of the week, calculate the average ride_length for users by each day of the week
bike_usage_usertype_dow_count_avg <- bike_trips_2019_2020_v2 %>%
group_by(user_type, day_of_week) %>%
drop_na() %>%
summarize(trip_count = n(), mean_trip_duration = mean(trip_duration), .groups = "drop")
Use pivot_wider() and kable() to make table more readable, use mutate() to add columns for total usage minutes for usertype to compare
bike_usage_usertype_dow_count_avg_wide3 <- bike_usage_usertype_dow_count_avg %>%
pivot_wider(
names_from = user_type,
values_from = c(trip_count, mean_trip_duration),
names_sep = "_"
) %>%
mutate(
across(starts_with("trip_count_"), round),
across(starts_with("mean_trip_duration_"), round)
) %>%
mutate(
total_minutes_Customer = trip_count_Customer * mean_trip_duration_Customer,
total_minutes_Subscriber = trip_count_Subscriber * mean_trip_duration_Subscriber
)
bike_usage_usertype_dow_count_avg_wide3 %>%
rename(
Day = day_of_week,
Customer_Trips = trip_count_Customer,
Subscriber_Trips = trip_count_Subscriber,
Customer_Avg_Sec = mean_trip_duration_Customer,
Subscriber_Avg_Sec = mean_trip_duration_Subscriber,
Customer_Total_Sec = total_minutes_Customer,
Subscriber_Total_Sec = total_minutes_Subscriber
) %>%
kable(caption = "Table 3: Bike Usage Metrics by User Type and Day of Week") %>%
kable_styling(full_width = FALSE) %>%
scroll_box(width = "100%")
| Day | Customer_Trips | Subscriber_Trips | Customer_Avg_Sec | Subscriber_Avg_Sec | Customer_Total_Sec | Subscriber_Total_Sec |
|---|---|---|---|---|---|---|
| Monday | 6632 | 110350 | 1353 | 643 | 8973096 | 70955050 |
| Tuesday | 7881 | 127861 | 1572 | 650 | 12388932 | 83109650 |
| Wednesday | 8254 | 121816 | 1867 | 650 | 15410218 | 79180400 |
| Thursday | 7665 | 125137 | 1549 | 643 | 11873085 | 80463091 |
| Friday | 8378 | 115056 | 1618 | 637 | 13555604 | 73290672 |
| Saturday | 13297 | 59333 | 1984 | 709 | 26381248 | 42067097 |
| Sunday | 18434 | 60146 | 2170 | 754 | 40001780 | 45350084 |
ggplot(bike_usage_usertype_dow_count_avg, aes(x = day_of_week, y = trip_count, fill = user_type)) +
geom_col(position = "dodge") +
labs(
title = "Graph 3: Trip Count by Day of Week and User Type",
x = "Day of Week", y = "Trip Count", fill = "User Type"
) +
scale_y_continuous(labels = scales::label_comma()) +
scale_fill_manual(values = c("Customer" = "#E69F00", "Subscriber" = "#56B4E9")) +
theme_minimal()
bike_usage_usertype_dow_count_avg_wide3 %>%
rename(
Day = day_of_week
) %>%
pivot_longer(cols = c(total_minutes_Customer, total_minutes_Subscriber),
names_to = "User_Type", values_to = "Total_Minutes") %>%
mutate(User_Type = ifelse(User_Type == "total_minutes_Customer", "Customer", "Subscriber")) %>%
ggplot(aes(x = Day, y = Total_Minutes, fill = User_Type)) +
geom_col(position = "dodge") +
labs(title = "Graph 4: Total Usage Minutes by Day and User Type",
x = "Day of Week", y = "Total Minutes") +
scale_fill_manual(values = c("Customer" = "#E69F00", "Subscriber" = "#56B4E9")) +
scale_y_continuous(labels = scales::label_comma()) +
theme_minimal()
bike_usage_usertype_dow_count_avg %>%
ggplot(aes(x = day_of_week, y = mean_trip_duration / 60, color = user_type, group = user_type)) +
geom_line(linewidth = 1.2) +
geom_point(size = 2) +
labs(title = "Graph 5: Average Trip Duration by Day and User Type",
x = "Day of Week", y = "Mean Trip Duration (minutes)", color = "User Type") +
scale_color_manual(values = c("Customer" = "#E69F00", "Subscriber" = "#56B4E9")) +
theme_minimal()
Key Takeaways:
Average Trip Duration:
Count:
Key takeaways (total minutes):
Key Insights on Consumer Behaviour (Subscribers vs. Customers):
Trip Duration Differences:
Trip Frequency:
Total Usage Time:
Weekday vs. Weekend Behaviour:
Recommendations:
Target Weekday-Riding Customers with promotional marketing for subscription model
Serve repeat-customers prompts to subscribe, persnalized offers based on consumption patterns
Target recreational weekend customers with promotional material about using bikes during the week, promote commuter usage
Future Data Collection Suggestions: