Source file ⇒ Assignment_7.Rmd

Stations <- mosaic::read.file("http://tiny.cc/dcf/DC-Stations.csv")
## Reading data with read.csv()
data_site <- "http://tiny.cc/dcf/2014-Q4-Trips-History-Data.rds" 
Trips <- readRDS(gzcon(url(data_site)))
View(head(Trips))

Time of Day 1)

graph_1 <- Trips %>% ggplot(aes(x = sdate)) +  geom_density()
graph_1

table_2_time_of_day <- Trips %>% mutate(time_of_day = lubridate::hour(sdate) + lubridate::minute(sdate) / 60) 
ggplot(table_2_time_of_day, aes(x = time_of_day)) + geom_density()

table_3 <- Trips %>% mutate(week_day = lubridate::wday(sdate)) %>% mutate(time_of_day = lubridate::hour(sdate) + lubridate::minute(sdate) / 60) 
ggplot(table_3, aes(x = time_of_day)) + geom_density() + facet_wrap(~week_day)

table_3 <- Trips %>% mutate(week_day = lubridate::wday(sdate)) %>% mutate(time_of_day = lubridate::hour(sdate) + lubridate::minute(sdate) / 60) 
ggplot(table_3, aes(x = time_of_day)) + geom_density(aes(fill = client), alpha = 0.4, color = NA) + facet_wrap(~week_day)

table_3 <- Trips %>% mutate(week_day = lubridate::wday(sdate)) %>% mutate(time_of_day = lubridate::hour(sdate) + lubridate::minute(sdate) / 60) 
ggplot(table_3, aes(x = time_of_day)) + geom_density(aes(fill = client), alpha = 0.4, color = NA, position = position_stack()) + facet_wrap(~week_day)

The second one which uses position stack is cleanier to look at and more helpful to look at.

new_table_weekend_vs_weekday <- Trips %>% mutate(week_day = lubridate::wday(sdate)) %>% mutate(time_of_day = lubridate::hour(sdate) + lubridate::minute(sdate) / 60) %>% mutate(wday = ifelse(lubridate::wday(sdate) %in% c(1,7), "weekend", "weekday"))
ggplot(new_table_weekend_vs_weekday, aes(x = time_of_day)) + geom_density(aes(fill = client), alpha = 0.4, color = NA, position = position_stack()) + facet_wrap(~wday)

Trip Distance 1)

View(head(Stations))
Stations_Left <- Stations %>% rename(sstation = name) %>% select(sstation, lat, long)
Stations_Right <- Stations %>% rename(estation = name, lat2 = lat, long2 = long) %>% select(estation, lat2, long2)
Stations_Left <- Stations %>% rename(sstation = name) %>% select(sstation, lat, long)
Stations_Right <- Stations %>% rename(estation = name, lat2 = lat, long2 = long) %>% select(estation, lat2, long2)
joined_Stations <- Stations_Left %>% merge(Stations_Right, all=TRUE)
View(head(joined_Stations))
source("http://tiny.cc/dcf/haversine.R")
Distances <- joined_Stations %>% mutate(dist = haversine(lat, long, lat2, long2))
join_Stations_to_Trips <- left_join(Trips,Distances)
## Joining by: c("sstation", "estation")
View(head(join_Stations_to_Trips))

Distributions of Distances 1)

join_Stations_to_Trips %>% mutate(hour = lubridate::hour(sdate), wday = ifelse(lubridate::wday(sdate) %in% c(1,7), "weekend", "weekday")) %>% ggplot(aes(x= hour)) + facet_grid(client~wday) + geom_density()

join_Stations_to_Trips %>% mutate(hour = lubridate::hour(sdate), wday = ifelse(lubridate::wday(sdate) %in% c(1,7), "weekend", "weekday")) %>% ggplot(aes(x = hour, y = dist)) + facet_grid(client~wday) + geom_boxplot(aes(group=hour), outlier.size =  1, fill = "grey90") + ylim(0,5) + stat_smooth(color = "blue")
## Warning: Removed 16942 rows containing non-finite values (stat_boxplot).
## Warning: Removed 16942 rows containing non-finite values (stat_smooth).