Ask

Note: The Business task The Company wants to maximize profit by knowing how annual members and casual riders use Cyclistic bikes differently. This would help them make decisions on the next marketing campaign Notes: Consider key stakeholders The main stakeholders here are the executive team, director of marketing, and manager Lily Moreno.

Prepare

Notes: Downloaded 12 months of trip data from 2020 04-2021 03 under the license https://www.divvybikes.com/data-license-agreement

Process

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.3     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
td01<-read_csv("202004-divvy-tripdata.csv")
## Rows: 84776 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): ride_id, rideable_type, start_station_name, end_station_name, memb...
## dbl  (6): start_station_id, end_station_id, start_lat, start_lng, end_lat, e...
## dttm (2): started_at, ended_at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
td02<-read_csv("202005-divvy-tripdata.csv")
## Rows: 200274 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): ride_id, rideable_type, start_station_name, end_station_name, memb...
## dbl  (6): start_station_id, end_station_id, start_lat, start_lng, end_lat, e...
## dttm (2): started_at, ended_at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
td03<-read_csv("202006-divvy-tripdata.csv")
## Rows: 343005 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): ride_id, rideable_type, start_station_name, end_station_name, memb...
## dbl  (6): start_station_id, end_station_id, start_lat, start_lng, end_lat, e...
## dttm (2): started_at, ended_at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
td04<-read_csv("202007-divvy-tripdata.csv")
## Rows: 551480 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): ride_id, rideable_type, start_station_name, end_station_name, memb...
## dbl  (6): start_station_id, end_station_id, start_lat, start_lng, end_lat, e...
## dttm (2): started_at, ended_at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
td05<-read_csv("202008-divvy-tripdata.csv")
## Rows: 622361 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): ride_id, rideable_type, start_station_name, end_station_name, memb...
## dbl  (6): start_station_id, end_station_id, start_lat, start_lng, end_lat, e...
## dttm (2): started_at, ended_at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
td06<-read_csv("202009-divvy-tripdata.csv")
## Rows: 532958 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): ride_id, rideable_type, start_station_name, end_station_name, memb...
## dbl  (6): start_station_id, end_station_id, start_lat, start_lng, end_lat, e...
## dttm (2): started_at, ended_at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
td07<-read_csv("202010-divvy-tripdata.csv")
## Rows: 388653 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): ride_id, rideable_type, start_station_name, end_station_name, memb...
## dbl  (6): start_station_id, end_station_id, start_lat, start_lng, end_lat, e...
## dttm (2): started_at, ended_at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
td08<-read_csv("202011-divvy-tripdata.csv")
## Rows: 259716 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): ride_id, rideable_type, start_station_name, end_station_name, memb...
## dbl  (6): start_station_id, end_station_id, start_lat, start_lng, end_lat, e...
## dttm (2): started_at, ended_at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
td09<-read_csv("202012-divvy-tripdata.csv")
## Rows: 131573 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): ride_id, rideable_type, start_station_name, start_station_id, end_...
## dbl  (4): start_lat, start_lng, end_lat, end_lng
## dttm (2): started_at, ended_at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
td10<-read_csv("202101-divvy-tripdata.csv")
## Rows: 96834 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): ride_id, rideable_type, start_station_name, start_station_id, end_...
## dbl  (4): start_lat, start_lng, end_lat, end_lng
## dttm (2): started_at, ended_at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
td11<-read_csv("202102-divvy-tripdata.csv")
## Rows: 49622 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): ride_id, rideable_type, start_station_name, start_station_id, end_...
## dbl  (4): start_lat, start_lng, end_lat, end_lng
## dttm (2): started_at, ended_at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
td12<-read_csv("202103-divvy-tripdata.csv")
## Rows: 228496 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): ride_id, rideable_type, start_station_name, start_station_id, end_...
## dbl  (4): start_lat, start_lng, end_lat, end_lng
## dttm (2): started_at, ended_at
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

combined all the 12 dataset with ‘rbind’

all_rides<-rbind(td01,td02,td03,td04,td05,td06,td07,td08,td09,td10,td11,td12)

Analyze

Notes: identify trends and relationships

all_rides2<-all_rides%>%rename(user_type=member_casual)
all_rides2<-janitor::remove_empty(all_rides2,which = c("cols"))
all_rides2<-janitor::remove_empty(all_rides2,which = c("rows"))
all_rides2$started_at<-lubridate::ymd_hms(all_rides2$started_at)
all_rides2$ended_at<-lubridate::ymd_hms(all_rides2$ended_at)

Created another columns for start hour and end hour

all_rides2$start_hour<-lubridate::hour(all_rides2$started_at)
all_rides2$end_hour<-lubridate::hour(all_rides2$ended_at)

Calculate the mean ride length

all_rides_r3<-all_rides2%>%group_by(user_type,start_hour)%>%summarise(mean_end_hour=mean(end_hour,na.rm =TRUE))
## `summarise()` has grouped output by 'user_type'. You can override using the `.groups` argument.

Share

Notes: Data visualization with ggplot

ggplot(data=all_rides_r3)+geom_col(mapping=aes(x=start_hour,y=mean_end_hour,color=user_type))+labs(title="Cyclistic Tripdata:start vs. ride length")+facet_wrap(~user_type)

## With this data viz we can see that the casual riders use the rides as much as riders with membership. The casual riders use the bikes more for leisure during weekends, while the riders with membership use the bikes for daily commute to work and weekends for leisure.

Act