library(tidyverse)
library(lubridate)
library(ggplot2)
getwd() setwd(“C:/Users/Satinder/Documents/Rstudio/Bike sharing Analysis”)
q3_2019 <- read_csv(“Divvy_Trips_2019_Q3.csv”) q4_2019 <- read_csv(“Divvy_Trips_2019_Q4.csv”) q1_2020 <- read_csv(“Divvy_Trips_2020_Q1.csv”)
colnames(q3_2019) colnames(q4_2019) colnames(q1_2020)
(q4_2019 <- rename(q4_2019 ,ride_id = trip_id ,rideable_type = bikeid ,started_at = start_time
,ended_at = end_time
,start_station_name = from_station_name ,start_station_id = from_station_id ,end_station_name = to_station_name ,end_station_id = to_station_id ,member_casual = usertype))
(q3_2019 <- rename(q3_2019 ,ride_id = trip_id ,rideable_type = bikeid ,started_at = start_time
,ended_at = end_time
,start_station_name = from_station_name ,start_station_id = from_station_id ,end_station_name = to_station_name ,end_station_id = to_station_id ,member_casual = usertype))
str(q1_2020) str(q4_2019) str(q3_2019)
q4_2019 <- mutate(q4_2019, ride_id = as.character(ride_id) ,rideable_type = as.character(rideable_type)) q3_2019 <- mutate(q3_2019, ride_id = as.character(ride_id) ,rideable_type = as.character(rideable_type))
all_trips <- bind_rows(q3_2019, q4_2019, q1_2020)
all_trips <- all_trips %>%
select(-c(start_lat, start_lng, end_lat, end_lng, birthyear, gender))
colnames(all_trips) nrow(all_trips)
dim(all_trips)
head(all_trips)
str(all_trips) summary(all_trips)
table(all_trips$member_casual)
all_trips <- all_trips %>% mutate(member_casual = recode(member_casual ,“Subscriber” = “member” ,“Customer” = “casual”)) # Check to make sure the proper number of observations were reassigned table(all_trips$member_casual)
all_trips\(date <- as.Date(all_trips\)started_at) all_trips\(month <- format(as.Date(all_trips\)date), “%m”) all_trips\(day <- format(as.Date(all_trips\)date), “%d”) all_trips\(year <- format(as.Date(all_trips\)date), “%Y”) all_trips\(day_of_week <- format(as.Date(all_trips\)date), “%A”)
all_trips\(ride_length <- difftime(all_trips\)ended_at,all_trips$started_at)
str(all_trips)
all_trips_v2 <- all_trips[!(all_trips\(start_station_name == "HQ QR" | all_trips\)ride_length<0),]
mean(all_trips_v2\(ride_length) median(all_trips_v2\)ride_length) max(all_trips_v2\(ride_length) min(all_trips_v2\)ride_length)
aggregate(all_trips_v2\(ride_length ~ all_trips_v2\)member_casual, FUN = mean) aggregate(all_trips_v2\(ride_length ~ all_trips_v2\)member_casual, FUN = median) aggregate(all_trips_v2\(ride_length ~ all_trips_v2\)member_casual, FUN = max) aggregate(all_trips_v2\(ride_length ~ all_trips_v2\)member_casual, FUN = min)
aggregate(all_trips_v2\(ride_length ~ all_trips_v2\)member_casual + all_trips_v2$day_of_week, FUN = mean)
all_trips_v2\(day_of_week <- ordered(all_trips_v2\)day_of_week, levels=c(“Sunday”, “Monday”, “Tuesday”, “Wednesday”, “Thursday”, “Friday”, “Saturday”))
aggregate(all_trips_v2\(ride_length ~ all_trips_v2\)member_casual + all_trips_v2$day_of_week, FUN = mean)
all_trips_v2 %>% mutate(weekday = wday(started_at, label = TRUE)) %>% #creates weekday field using wday() group_by(member_casual, weekday) %>% #groups by usertype and weekday summarise(number_of_rides = n()
all_trips_v2 %>% mutate(weekday = wday(started_at, label = TRUE)) %>% group_by(member_casual, weekday) %>% summarise(number_of_rides = n() ,average_duration = mean(ride_length)) %>% arrange(member_casual, weekday) %>% ggplot(aes(x = weekday, y = number_of_rides, fill = member_casual)) + geom_col(position = “dodge”)
all_trips_v2 %>% mutate(weekday = wday(started_at, label = TRUE)) %>% group_by(member_casual, weekday) %>% summarise(number_of_rides = n() ,average_duration = mean(ride_length)) %>% arrange(member_casual, weekday) %>% ggplot(aes(x = weekday, y = average_duration, fill = member_casual)) + geom_col(position = “dodge”