Goals:

  1. Calculate the average trip duration for members (Subscribers) and casual riders (Customers)
  2. Compare difference in usage between years
  3. Calculate the number of trips & average trip length for different user types on different days of the week

N.B. * In the data, members are referred to as a “Subscriber” and casual riders are referred to as a “Customer”.

1. Calculate the average trip_duration for members (Subscribers) and casual riders (Customers) & compare across years

Summarize mean_trip_duration and group by user_type


bike_usage_usertype_avg <- bike_trips_2019_2020 %>% 
  group_by(user_type) %>% 
  drop_na() %>% 
  summarize(mean_trip_duration = mean(trip_duration), .groups = "drop")

bike_usage_usertype_avg %>%
  rename(
    User_Type = user_type,
    Mean_Trip_Duration = mean_trip_duration
  ) %>%
  kable(caption = "Table 1: Mean Trip Duration by User Type") %>%
  kable_styling(full_width = FALSE) %>%
  scroll_box(width = "100%")
Table 1: Mean Trip Duration by User Type
User_Type Mean_Trip_Duration
Customer 1822.938
Subscriber 659.095


ggplot(bike_usage_usertype_avg, aes(x = user_type, y = mean_trip_duration / 60, fill = user_type)) +
  geom_col(width = 0.6) +
  labs(title = "Graph 1: Average Trip Duration by User Type",
       x = "User Type", y = "Mean Trip Duration (minutes)") +
  scale_fill_manual(values = c("Customer" = "#E69F00", "Subscriber" = "#56B4E9")) +
  theme_minimal()


Key Takeaway:

Possible reasons:

2. Compare difference in usage between years

Summarize mean_trip_duration and group by user_type and year to compare


bike_usage_usertype_year_avg <- bike_trips_2019_2020_v2 %>% 
  group_by(user_type, year) %>% 
  drop_na() %>% 
  summarize(mean_trip_duration = mean(trip_duration), .groups = "drop")

bike_usage_usertype_year_avg %>%
  rename(
    User_Type = user_type,
    Year = year,
    Mean_Trip_Duration = mean_trip_duration
  ) %>%
  kable(caption = "Table 2: Mean Trip Duration by User Type and Year") %>%
  kable_styling(full_width = FALSE) %>%
  scroll_box(width = "100%")
Table 2: Mean Trip Duration by User Type and Year
User_Type Year Mean_Trip_Duration
Customer 2019 1911.8329
Customer 2020 1780.0174
Subscriber 2019 648.7644
Subscriber 2020 668.4237


bike_usage_usertype_year_avg$year <- as.factor(bike_usage_usertype_year_avg$year)

ggplot(bike_usage_usertype_year_avg, aes(x = year, y = mean_trip_duration / 60, fill = user_type)) +
  geom_col(position = "dodge") +
  labs(title = "Graph 2: Average Trip Duration by Year and User Type",
       x = "Year", y = "Mean Trip Duration (minutes)") +
  scale_fill_manual(values = c("Customer" = "#E69F00", "Subscriber" = "#56B4E9")) +
  theme_minimal()


Key Takeaways:

3. Calculate the number of trips & average trip length for different user types on different days of the week

Count number of rides for each day of the week, calculate the average ride_length for users by each day of the week

bike_usage_usertype_dow_count_avg <- bike_trips_2019_2020_v2 %>% 
  group_by(user_type, day_of_week) %>% 
  drop_na() %>% 
  summarize(trip_count = n(), mean_trip_duration = mean(trip_duration), .groups = "drop")

Use pivot_wider() and kable() to make table more readable, use mutate() to add columns for total usage minutes for usertype to compare


bike_usage_usertype_dow_count_avg_wide3 <- bike_usage_usertype_dow_count_avg %>%
  pivot_wider(
    names_from = user_type,
    values_from = c(trip_count, mean_trip_duration),
    names_sep = "_"
  ) %>%
  mutate(
    across(starts_with("trip_count_"), round),
    across(starts_with("mean_trip_duration_"), round)
  ) %>%
  mutate(
    total_minutes_Customer = trip_count_Customer * mean_trip_duration_Customer,
    total_minutes_Subscriber = trip_count_Subscriber * mean_trip_duration_Subscriber
  )

bike_usage_usertype_dow_count_avg_wide3 %>%
  rename(
    Day = day_of_week,
    Customer_Trips = trip_count_Customer,
    Subscriber_Trips = trip_count_Subscriber,
    Customer_Avg_Sec = mean_trip_duration_Customer,
    Subscriber_Avg_Sec = mean_trip_duration_Subscriber,
    Customer_Total_Sec = total_minutes_Customer,
    Subscriber_Total_Sec = total_minutes_Subscriber
  ) %>%
  kable(caption = "Table 3: Bike Usage Metrics by User Type and Day of Week") %>%
  kable_styling(full_width = FALSE) %>%
  scroll_box(width = "100%")
Table 3: Bike Usage Metrics by User Type and Day of Week
Day Customer_Trips Subscriber_Trips Customer_Avg_Sec Subscriber_Avg_Sec Customer_Total_Sec Subscriber_Total_Sec
Monday 6632 110350 1353 643 8973096 70955050
Tuesday 7881 127861 1572 650 12388932 83109650
Wednesday 8254 121816 1867 650 15410218 79180400
Thursday 7665 125137 1549 643 11873085 80463091
Friday 8378 115056 1618 637 13555604 73290672
Saturday 13297 59333 1984 709 26381248 42067097
Sunday 18434 60146 2170 754 40001780 45350084


ggplot(bike_usage_usertype_dow_count_avg, aes(x = day_of_week, y = trip_count, fill = user_type)) +
  geom_col(position = "dodge") +
  labs(
    title = "Graph 3: Trip Count by Day of Week and User Type",
    x = "Day of Week", y = "Trip Count", fill = "User Type"
  ) +
  scale_y_continuous(labels = scales::label_comma()) +
  scale_fill_manual(values = c("Customer" = "#E69F00", "Subscriber" = "#56B4E9")) +
  theme_minimal()

bike_usage_usertype_dow_count_avg_wide3 %>%
  rename(
    Day = day_of_week
  ) %>%
  pivot_longer(cols = c(total_minutes_Customer, total_minutes_Subscriber),
               names_to = "User_Type", values_to = "Total_Minutes") %>%
  mutate(User_Type = ifelse(User_Type == "total_minutes_Customer", "Customer", "Subscriber")) %>%
  ggplot(aes(x = Day, y = Total_Minutes, fill = User_Type)) +
  geom_col(position = "dodge") +
  labs(title = "Graph 4: Total Usage Minutes by Day and User Type",
       x = "Day of Week", y = "Total Minutes") +
  scale_fill_manual(values = c("Customer" = "#E69F00", "Subscriber" = "#56B4E9")) +
  scale_y_continuous(labels = scales::label_comma()) +
  theme_minimal()

bike_usage_usertype_dow_count_avg %>%
  ggplot(aes(x = day_of_week, y = mean_trip_duration / 60, color = user_type, group = user_type)) +
  geom_line(linewidth = 1.2) +
  geom_point(size = 2) +
  labs(title = "Graph 5: Average Trip Duration by Day and User Type",
       x = "Day of Week", y = "Mean Trip Duration (minutes)", color = "User Type") +
  scale_color_manual(values = c("Customer" = "#E69F00", "Subscriber" = "#56B4E9")) +
  theme_minimal()

Key Takeaways:

Average Trip Duration:

Count:

Key takeaways (total minutes):

Final Conclusions

Key Insights on Consumer Behaviour (Subscribers vs. Customers):

Trip Duration Differences:

Trip Frequency:

Total Usage Time:

Weekday vs. Weekend Behaviour:

Recommendations:

  1. Target Weekday-Riding Customers with promotional marketing for subscription model

    • Workweek Plans / Commuter Plans - allow users to save money on regular travel, but encourage more frequent usage
  2. Serve repeat-customers prompts to subscribe, persnalized offers based on consumption patterns

    • Customers who take multiple trips within a week, especially on weekdays, are high-conversion prospects
    • Send a notification after a 2nd or 3rd trip in a short period of time, notifying them that they could save on overall costs with a subscription - encourage increased quantity of commuter/errand rides
  3. Target recreational weekend customers with promotional material about using bikes during the week, promote commuter usage

    • Promote regular usage and convenience factor

Future Data Collection Suggestions: