knitr::opts_chunk$set(echo = TRUE)
#install packages
#install.packages("tidyverse")
#install.packages("janitor")
#install.packages("lubridate")
#install.packages("skimr")
#install.packages("readr")
#load packages
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.1 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.2.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(lubridate)
library(skimr)
library(readr)
#load Data
trips_2019_q1<-read_csv("Divvy_Trips_2019_Q1 - Divvy_Trips_2019_Q1.csv")
## Rows: 365069 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): start_time, end_time, from_station_name, to_station_name, usertype,...
## dbl (5): trip_id, bikeid, from_station_id, to_station_id, birthyear
## num (1): tripduration
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
trips_2020_q1 <- read_csv("Divvy_Trips_2020_Q1 - Divvy_Trips_2020_Q1.csv")
## Rows: 426887 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): ride_id, rideable_type, started_at, ended_at, start_station_name, e...
## dbl (6): start_station_id, end_station_id, start_lat, start_lng, end_lat, en...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Glimpse
glimpse(trips_2019_q1)
## Rows: 365,069
## Columns: 12
## $ trip_id <dbl> 21742443, 21742444, 21742445, 21742446, 21742447, 21…
## $ start_time <chr> "2019-01-01 0:04:37", "2019-01-01 0:08:13", "2019-01…
## $ end_time <chr> "2019-01-01 0:11:07", "2019-01-01 0:15:34", "2019-01…
## $ bikeid <dbl> 2167, 4386, 1524, 252, 1170, 2437, 2708, 2796, 6205,…
## $ tripduration <dbl> 390, 441, 829, 1783, 364, 216, 177, 100, 1727, 336, …
## $ from_station_id <dbl> 199, 44, 15, 123, 173, 98, 98, 211, 150, 268, 299, 2…
## $ from_station_name <chr> "Wabash Ave & Grand Ave", "State St & Randolph St", …
## $ to_station_id <dbl> 84, 624, 644, 176, 35, 49, 49, 142, 148, 141, 295, 4…
## $ to_station_name <chr> "Milwaukee Ave & Grand Ave", "Dearborn St & Van Bure…
## $ usertype <chr> "Subscriber", "Subscriber", "Subscriber", "Subscribe…
## $ gender <chr> "Male", "Female", "Female", "Male", "Male", "Female"…
## $ birthyear <dbl> 1989, 1990, 1994, 1993, 1994, 1983, 1984, 1990, 1995…
glimpse(trips_2020_q1)
## Rows: 426,887
## Columns: 13
## $ ride_id <chr> "EACB19130B0CDA4A", "8FED874C809DC021", "789F3C21E4…
## $ rideable_type <chr> "docked_bike", "docked_bike", "docked_bike", "docke…
## $ started_at <chr> "2020-01-21 20:06:59", "2020-01-30 14:22:39", "2020…
## $ ended_at <chr> "2020-01-21 20:14:30", "2020-01-30 14:26:22", "2020…
## $ start_station_name <chr> "Western Ave & Leland Ave", "Clark St & Montrose Av…
## $ start_station_id <dbl> 239, 234, 296, 51, 66, 212, 96, 96, 212, 38, 117, 1…
## $ end_station_name <chr> "Clark St & Leland Ave", "Southport Ave & Irving Pa…
## $ end_station_id <dbl> 326, 318, 117, 24, 212, 96, 212, 212, 96, 100, 632,…
## $ start_lat <dbl> 41.9665, 41.9616, 41.9401, 41.8846, 41.8856, 41.889…
## $ start_lng <dbl> -87.6884, -87.6660, -87.6455, -87.6319, -87.6418, -…
## $ end_lat <dbl> 41.9671, 41.9542, 41.9402, 41.8918, 41.8899, 41.884…
## $ end_lng <dbl> -87.6674, -87.6644, -87.6530, -87.6206, -87.6343, -…
## $ member_casual <chr> "member", "member", "member", "member", "member", "…
#clean column names
trips_2019_q1 <- trips_2019_q1 %>% clean_names()
trips_2020_q1 <- trips_2020_q1 %>% clean_names()
# Transform 2019 to match 2020 schema ----
trips_2019_clean <- trips_2019_q1 %>%
mutate(
ride_id = as.character(trip_id),
rideable_type = "docked_bike",
started_at = start_time,
ended_at = end_time,
start_station_name = from_station_name,
start_station_id = from_station_id,
end_station_name = to_station_name,
end_station_id = to_station_id,
member_casual = if_else(usertype == "Subscriber", "member", "casual")
) %>%
select(
ride_id, rideable_type, started_at, ended_at,
start_station_name, start_station_id,
end_station_name, end_station_id,
member_casual
)
# Make sure 2020 has the same columns
trips_2020_clean <- trips_2020_q1 %>%
select(
ride_id, rideable_type, started_at, ended_at,
start_station_name, start_station_id,
end_station_name, end_station_id,
member_casual
)
# Bind rows into one dataset ----
all_trips <- bind_rows(trips_2019_clean, trips_2020_clean)
all_trips <- all_trips %>%
mutate(
started_at = ymd_hms(started_at),
ended_at = ymd_hms(ended_at),
ride_length = as.numeric(difftime(ended_at, started_at, units = "mins")),
date = as.Date(started_at),
day_of_week = wday(started_at, label = TRUE, abbr = FALSE),
month = month(started_at, label = TRUE, abbr = TRUE)
) %>%
# remove bad/strange rides
filter(
ride_length > 0,
ride_length < 24 * 60
)
# Ride counts and basic stats by rider type
summary_by_type <- all_trips %>%
group_by(member_casual) %>%
summarise(
n_rides = n(),
mean_length = mean(ride_length),
median_length= median(ride_length),
max_length = max(ride_length),
min_length = min(ride_length)
)
summary_by_type
## # A tibble: 2 × 6
## member_casual n_rides mean_length median_length max_length min_length
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 casual 71138 36.5 22.0 1436. 0.0167
## 2 member 720126 11.4 8.47 1433. 0.0167
rides_by_day <- all_trips %>%
count(member_casual, day_of_week)
rides_by_day
## # A tibble: 14 × 3
## member_casual day_of_week n
## <chr> <ord> <int>
## 1 casual Sunday 18578
## 2 casual Monday 6672
## 3 casual Tuesday 7949
## 4 casual Wednesday 8328
## 5 casual Thursday 7729
## 6 casual Friday 8466
## 7 casual Saturday 13416
## 8 member Sunday 60178
## 9 member Monday 110412
## 10 member Tuesday 127946
## 11 member Wednesday 121879
## 12 member Thursday 125198
## 13 member Friday 115132
## 14 member Saturday 59381
# Plot Rides by day of week
all_trips %>%
count(member_casual, day_of_week) %>%
ggplot(aes(x = day_of_week, y = n, fill = member_casual)) +
geom_col(position = "dodge") +
labs(
title = "Rides by Day of Week and Rider Type",
x = "Day of Week",
y = "Number of Rides",
fill = "Rider Type"
)+
theme_minimal()
# Plot Average ride length by day of week
all_trips %>%
group_by(member_casual, day_of_week) %>%
summarise(mean_length = mean(ride_length), .groups = "drop") %>%
ggplot(aes(x = day_of_week, y = mean_length, color = member_casual, group = member_casual)) +
geom_line(linewidth = 1) +
geom_point() +
labs(
title = "Average Ride Length by Day of Week",
x = "Day of Week",
y = "Average Ride Length (minutes)",
color = "Rider Type"
)
# Plot Rides per month by type ----
all_trips %>%
count(member_casual, month) %>%
ggplot(aes(x = month, y = n, fill = member_casual)) +
geom_col(position = "dodge") +
labs(
title = "Rides by Month and Rider Type",
x = "Month",
y = "Number of Rides",
fill = "Rider Type"
)
Based on the combined 2019 Q1 and 2020 Q1 data, annual members and casual riders show clearly different usage patterns:
Ride duration: Casual riders take longer rides on average, while annual members take shorter, more frequent trips.
Day of week: Casual riders’ usage peaks on weekends, suggesting more recreational and leisure rides. Annual members ride more consistently on weekdays, which aligns with commuting and regular transportation.
Consistency: Members generate a larger number of total rides and use the service more regularly across the quarter, whereas casual riders appear more occasional.
These patterns support the idea that casual riders largely behave like tourists or leisure users, while annual members behave like daily commuters or utility riders.
The ride patterns suggest several reasons why casual riders might convert to annual memberships:
Frequent usage becomes expensive: Casual riders who take repeated rides (especially on weekends or over several weeks) would save money with a fixed-price annual membership instead of paying per ride.
Longer leisure trips: Because casual riders tend to ride longer, they are more exposed to overtime fees. An annual membership with included ride time would reduce the cost of these longer rides.
Convenience: Members do not need to think about per-ride payments and can unlock bikes quickly. For casual riders who start using the service regularly, this convenience becomes valuable.
Lifestyle fit: Casual riders who use Cyclistic for exercise, local travel, or regular weekend activities may find that a membership better supports their ongoing habits.
In summary, casual riders are most likely to buy an annual membership when they ride often enough that the membership would save them money and make their experience more convenient.
Using the behavioral insights from the data, Cyclistic can design targeted digital campaigns:
Personalized in-app and email messaging
After a casual rider completes several rides within a month, the app or email can show a message such as: “You rode 6 times this month – with an annual membership, you could have saved $X.”
After long rides, emphasize that membership reduces or simplifies extra time fees.
Segmented campaigns by behavior
Weekend-focused riders: Promote recreational benefits and “unlimited weekend rides” messaging.
Frequent short-trip riders: Highlight commuting benefits, reliability, and cost savings compared to other transport modes.
#Social media and location-based ads
Use platforms like Instagram and Facebook to target people living or working near high-usage stations.
#Show simple visuals comparing the cost of a few casual rides vs the cost of a monthly or annual membership.
Limited-time offers and trials
Offer a discounted first month or seasonal pass to lower the barrier to trying membership.
Combine offers with referral programs so existing members are rewarded for bringing in casual riders.
By combining behavior-based personalization (using trip data) with clear, simple marketing messages about cost and convenience, Cyclistic can increase the likelihood that casual riders upgrade to annual memberships.