In 2016, Cyclistic launched a successful bike-share offering. Since then, the program has grown to a fleet of 5,824 bicycles that are geotracked and locked into a network of 692 stations across Chicago. Lily Moreno is the director of marketing. Annual members are much more profitable than casual riders and Moreno believes that maximizing the number of annual members will be key to future growth of the company.
Draw insights from data to identify trends in use of bike sharing to help convert Casual riders to Annual members.
library(tidyverse)
library(lubridate)
library(janitor)
library(dplyr)
library(ggpubr)
library(skimr)
library(hydroTSM)
library(reshape2)
library(patchwork)
library(rmarkdown)
library(scales)
jan22 <- read_csv("r/New folder/202201-divvy-tripdata.csv")
feb22 <- read_csv("r/New folder/202202-divvy-tripdata.csv")
mar22 <- read_csv("r/New folder/202203-divvy-tripdata.csv")
apr22 <- read_csv("r/New folder/202204-divvy-tripdata.csv")
may22 <- read_csv("r/New folder/202205-divvy-tripdata.csv")
jun22 <- read_csv("r/New folder/202206-divvy-tripdata.csv")
jul22 <- read_csv("r/New folder/202207-divvy-tripdata.csv")
aug22 <- read_csv("r/New folder/202208-divvy-tripdata.csv")
sep22 <- read_csv("r/New folder/202209-divvy-publictripdata.csv")
oct22 <- read_csv("r/New folder/202210-divvy-tripdata.csv")
nov22 <- read_csv("r/New folder/202211-divvy-tripdata.csv")
dec22 <- read_csv("r/New folder/202212-divvy-tripdata.csv")
colnames(jan22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual" "ride_length"
colnames(feb22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual" "ride_length"
colnames(mar22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual" "ride_length"
colnames(apr22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual" "ride_length"
colnames(may22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual" "ride_length"
colnames(jun22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual" "ride_length"
colnames(jul22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual" "ride_length"
colnames(aug22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual" "ride_length"
colnames(sep22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual" "ride_length"
colnames(oct22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual" "ride_length"
colnames(nov22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual" "ride_length"
colnames(dec22)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual" "ride_length"
bike_data <- bind_rows(jan22, feb22, mar22, apr22, may22, jun22, jul22, aug22, sep22, oct22, nov22, dec22)
colnames(bike_data)
## [1] "ride_id" "rideable_type" "started_at"
## [4] "ended_at" "start_station_name" "start_station_id"
## [7] "end_station_name" "end_station_id" "start_lat"
## [10] "start_lng" "end_lat" "end_lng"
## [13] "member_casual" "ride_length"
str(bike_data)
## spc_tbl_ [5,667,717 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ ride_id : chr [1:5667717] "C2F7DD78E82EC875" "A6CF8980A652D272" "BD0F91DFF741C66D" "CBB80ED419105406" ...
## $ rideable_type : chr [1:5667717] "electric_bike" "electric_bike" "classic_bike" "classic_bike" ...
## $ started_at : chr [1:5667717] "13-01-2022 11:59" "10-01-2022 08:41" "25-01-2022 04:53" "04-01-2022 00:18" ...
## $ ended_at : chr [1:5667717] "13-01-2022 12:02" "10-01-2022 08:46" "25-01-2022 04:58" "04-01-2022 00:33" ...
## $ start_station_name: chr [1:5667717] "Glenwood Ave & Touhy Ave" "Glenwood Ave & Touhy Ave" "Sheffield Ave & Fullerton Ave" "Clark St & Bryn Mawr Ave" ...
## $ start_station_id : chr [1:5667717] "525" "525" "TA1306000016" "KA1504000151" ...
## $ end_station_name : chr [1:5667717] "Clark St & Touhy Ave" "Clark St & Touhy Ave" "Greenview Ave & Fullerton Ave" "Paulina St & Montrose Ave" ...
## $ end_station_id : chr [1:5667717] "RP-007" "RP-007" "TA1307000001" "TA1309000021" ...
## $ start_lat : num [1:5667717] 42 42 41.9 42 41.9 ...
## $ start_lng : num [1:5667717] -87.7 -87.7 -87.7 -87.7 -87.6 ...
## $ end_lat : num [1:5667717] 42 42 41.9 42 41.9 ...
## $ end_lng : num [1:5667717] -87.7 -87.7 -87.7 -87.7 -87.6 ...
## $ member_casual : chr [1:5667717] "casual" "casual" "member" "casual" ...
## $ ride_length : 'hms' num [1:5667717] 00:02:00 00:04:00 00:04:00 00:14:00 ...
## ..- attr(*, "units")= chr "secs"
## - attr(*, "spec")=
## .. cols(
## .. ride_id = col_character(),
## .. rideable_type = col_character(),
## .. started_at = col_character(),
## .. ended_at = col_character(),
## .. start_station_name = col_character(),
## .. start_station_id = col_character(),
## .. end_station_name = col_character(),
## .. end_station_id = col_character(),
## .. start_lat = col_double(),
## .. start_lng = col_double(),
## .. end_lat = col_double(),
## .. end_lng = col_double(),
## .. member_casual = col_character(),
## .. ride_length = col_time(format = "")
## .. )
## - attr(*, "problems")=<externalptr>
head(bike_data)
## # A tibble: 6 × 14
## ride_id ridea…¹ start…² ended…³ start…⁴ start…⁵ end_s…⁶ end_s…⁷ start…⁸
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 C2F7DD78E82EC… electr… 13-01-… 13-01-… Glenwo… 525 Clark … RP-007 42.0
## 2 A6CF8980A652D… electr… 10-01-… 10-01-… Glenwo… 525 Clark … RP-007 42.0
## 3 BD0F91DFF741C… classi… 25-01-… 25-01-… Sheffi… TA1306… Greenv… TA1307… 41.9
## 4 CBB80ED419105… classi… 04-01-… 04-01-… Clark … KA1504… Paulin… TA1309… 42.0
## 5 DDC963BFDDA51… classi… 20-01-… 20-01-… Michig… TA1309… State … TA1305… 41.9
## 6 A39C6F6CC0586… classi… 11-01-… 11-01-… Wood S… 637 Honore… TA1305… 41.9
## # … with 5 more variables: start_lng <dbl>, end_lat <dbl>, end_lng <dbl>,
## # member_casual <chr>, ride_length <time>, and abbreviated variable names
## # ¹rideable_type, ²started_at, ³ended_at, ⁴start_station_name,
## # ⁵start_station_id, ⁶end_station_name, ⁷end_station_id, ⁸start_lat
bike_data Dataset has 14 variables and 4369291 observations describing about ride type, ride start time, end time, rider type and ride length
Upon checking it is identified that some of the observations are blank therefore data is not Reliable but since for practice purposes data is analysed. Data is provided by 2nd party “Bikeshare” and as per the License users personal information has been excluded hence clearing the dataset for Licensing, Privacy, security and accessibility.
Dataset structure has certain impurities which needs to be cleaned.
bike_data <- bike_data %>%
select(-start_lat, -start_lng, -end_lat, -end_lng)
Removing variables that are unnecessary for analysis purpose
bike_data <- bike_data %>%
distinct()
sum(is.null(bike_data))
## [1] 0
bike_data <- bike_data %>%
drop_na()
bike_data <- rename_with(bike_data, tolower)
bike_data$date <- dmy_hm(bike_data$started_at)
bike_data$month <- format(as.Date(bike_data$date), "%B")
bike_data$weekday <- format(as.Date(bike_data$date), "%A")
bike_data$season <- time2season(bike_data$date, out.fmt = "seasons")
As per SeasonsYear.com “Seasons of the year” project [https://seasonsyear.com/USA/Illinois/Chicago]