library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lubridate)
## Loading required package: timechange
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
bike <- read_rds("data-processed/01-bike.rds")
bike %>% head()
Filtering the trip duration to 1 hour and under
bike_trip <- bike %>%
filter(
trip_duration_minutes <= 60,
trip_duration_minutes >= 1
)
bike_trip %>% head()
Creating a separate hour column
bike_hour <- bike_trip %>%
mutate(
hour = str_sub(checkout_time, start = 1, end = 2)
)
Getting rid of the bad year column and creating a clean yr one
bike_yr <- bike_hour %>%
mutate(
yr = year(checkout_date)
) %>%
select(!c(year)) %>%
filter(yr < 2023, yr >= 2013)
bike_yr
Finding what bikes have been checked out the most overall.
bike_yr %>%
group_by(bicycle_id) %>%
summarize(checkout_amount = n()) %>%
arrange(desc(checkout_amount))
bike_yr %>%
filter(membership_category == "student membership") %>%
group_by(bicycle_id) %>%
summarize(checkout_amount = n()) %>%
arrange(desc(checkout_amount))
Bike 599 was the bike most checked out overall. Bike 19271 was the bike most checked out by students.
Finding the bike that has traveled the most minutes
bike_yr %>%
group_by(bicycle_id) %>%
summarize(total_trip_time = sum(trip_duration_minutes))
Bike 004G has traveled the most minutes.
Finding what kiosks in Austin bikes are checked out the most overall
bike_yr %>%
group_by(checkout_kiosk_clean) %>%
summarize(checkout_number = n()) %>%
arrange(desc(checkout_number))
Finding what kiosks in Austin bikes are checked out the most by students
bike_yr %>%
filter(membership_category == "student membership") %>%
group_by(checkout_kiosk_clean) %>%
summarize(checkout_number = n()) %>%
arrange(desc(checkout_number))
Finding what kiosks in Austin bikes are returned to the most overall
bike_yr %>%
group_by(return_kiosk_clean) %>%
summarize(return_number = n()) %>%
arrange(desc(return_number))
Finding what kiosks in Austin bikes are returned to the most by students
bike_yr %>%
filter(membership_category == "student membership") %>%
group_by(return_kiosk_clean) %>%
summarize(return_number = n()) %>%
arrange(desc(return_number))
The 21st & Speedway at the PCL kiosk is the kiosk with the most checked out and returned bikes, both overall and for students.
Finding the most common kiosk pair for checkouts and returns overall
bike_yr %>%
group_by(checkout_kiosk_clean, return_kiosk_clean) %>%
count(checkout_kiosk_clean, return_kiosk_clean) %>%
arrange(desc(n))
Finding the most common kiosk pair for checkouts and returns for students
bike_yr %>%
filter(membership_category == "student membership") %>%
group_by(checkout_kiosk_clean, return_kiosk_clean) %>%
count(checkout_kiosk_clean, return_kiosk_clean) %>%
arrange(desc(n))
The most used checkout/return kiosk pair overall and by students is checkout at 21st/Speedway @ PCL and return at Dean Keeton/Speedway.
Finding how many trips happen each year on all memberships
total_trips <- bike_yr %>%
group_by(yr) %>%
summarize(trips = n())
total_trips
Writing csv for data wrapper
total_trips %>%
write_csv(total_trips, path = "total_trips.csv")
## Warning: The `path` argument of `write_csv()` is deprecated as of readr 1.4.0.
## ℹ Please use the `file` argument instead.
Datawrapper chart 1: https://www.datawrapper.de/_/BDSNw/
Finding how many trips happen each year on student memberships
bike_yr %>%
filter(membership_category == "student membership") %>%
group_by(yr) %>%
summarize(trips = n()) %>%
arrange(desc(trips))
Overall, 2018 had the most trips for 359,936. For students, it was 2018 with 232,923 trips.
Finding how many rides happen on each membership.
bike_yr %>%
group_by(membership_category) %>%
summarize(trips = n()) %>%
arrange(desc(trips))
After the student membership, the most trips happen on single rides and annual memberships.
Finding how many trips happen each month on all memberships
bike_yr %>%
group_by(month) %>%
summarize(trips = n()) %>%
arrange(desc(trips))
Finding how many trips happen each month on student memberships
bike_yr %>%
filter(membership_category == "student membership") %>%
group_by(month) %>%
summarize(trips = n()) %>%
arrange(desc(trips))
For all memberships, March has the most trips overall with 231,284. For just students, it is September with 70,656.
Finding what time bikes are checkout out the most on all memberships
bike_yr %>%
group_by(checkout_time) %>%
summarize(checkout_number = n()) %>%
arrange(desc(checkout_number))
Finding what time bikes are checkout out the most on student memberships
bike_yr %>%
filter(membership_category == "student membership") %>%
group_by(checkout_time) %>%
summarize(checkout_number = n()) %>%
arrange(desc(checkout_number))
Bikes have been checked out the most at 4:12pm overall. For students, they have been checked out the most at 1:51pm.
Finding what hour bikes are checked out the most on all memberships
bike_yr %>%
group_by(hour) %>%
summarize(rides = n())
Finding what hour bikes are checked out the most on student memberships
bike_yr %>%
filter(membership_category == "student membership") %>%
group_by(hour) %>%
summarize(rides = n())
On all memberships, bikes are checked out the most between 5-5:59pm. On student memberships, bikes are checked out the most between 6-6:59pm.
Finding the average number of checkouts each hour on student memberships
average_hour <- bike_yr %>%
filter(membership_category == "student membership") %>%
group_by(checkout_date, hour) %>%
summarize(rides = n()) %>%
group_by(hour) %>%
summarize(average_trips = mean(rides)) %>%
mutate(average_trips = round(average_trips, 1))
## `summarise()` has grouped output by 'checkout_date'. You can override using the
## `.groups` argument.
average_hour
Writing csv for data wrapper
average_hour %>%
write_csv(average_hour, path = "average_hour.csv")
Datawrapper chart 2: https://www.datawrapper.de/_/4iYS7/
On average each day, bikes are checked out the most between 6-6:59pm with an average of 24.4 checkouts a day during this time.
Finding what day had the most rides overall
bike_yr %>%
group_by(checkout_date) %>%
summarize(rides = n()) %>%
slice_max(rides)
Finding what day had the most rides on student memberships
bike_yr %>%
filter(membership_category == "student membership") %>%
group_by(checkout_date) %>%
summarize(rides = n()) %>%
slice_max(rides)
The day with the most rides overall is March 19, 2015. The day with the most student rides overall is March 6, 2018.
Finding the average trip duration overall
bike_yr %>%
summarize(average_trip = mean(trip_duration_minutes))
Finding the average trip duration for students
bike_yr %>%
filter(membership_category == "student membership") %>%
summarize(average_trip = mean(trip_duration_minutes))
The average overall trip duration is 16.2 minutes. For students, it is 8.8 minutes.
Finding the average trip duration overall each year
bike_yr %>%
group_by(yr) %>%
summarize(average_trip = mean(trip_duration_minutes)) %>%
arrange(desc(average_trip))
Finding the average trip duration for students each year
bike_yr %>%
filter(membership_category == "student membership") %>%
group_by(yr) %>%
summarize(average_trip = mean(trip_duration_minutes)) %>%
arrange(desc(average_trip))
For all trips 2020 has the highest average trip duration with 22.7 minutes. For students, it is also 2020 with 10.5 minutes.
Finding the average trip duration overall each month
bike_yr %>%
group_by(month) %>%
summarize(average_trip = mean(trip_duration_minutes)) %>%
arrange(desc(average_trip))
Finding the average trip duration for students each month
bike_yr %>%
filter(membership_category == "student membership") %>%
group_by(month) %>%
summarize(average_trip = mean(trip_duration_minutes)) %>%
arrange(desc(average_trip))
For all trips, July has the highest average trip duration with 17.7 minutes. For student trips, it is March with minutes.
MetroBike use among students is on the rise again after 154,338 student rides were taken last year. This is the second highest number after over 200,000 rides were taken in 2018, when the student membership was introduced.