library(tidyverse)
library(leaflet) # maps
library(leaflet.extras) # maps
library(geosphere)
library(lubridate)
library(viridis)
library(alluvial)
library(ggridges)
library(DT)
library(RColorBrewer)
library(patchwork)
# =========
set.seed(007)
raw_data <- read_csv("D:/R Datawarehouse/Tasty Tuesday/TFL Cycle Hire 2017.csv")
# raw_data <- sample_frac(raw_data, .10)
station_start_coord <- raw_data %>%
select(`StartStation Lon`, `StartStation Lat`)
station_end_coord <- raw_data %>%
select(`EndStation Lon`, `EndStation Lat`)
data1 <- raw_data %>%
mutate(Distance = distCosine(station_start_coord, station_end_coord))
data1 <- data1 %>%
mutate(Speed = round(Distance/Duration*3.6,2),
StartDate = ymd_hms(StartDate),
EndDate = ymd_hms(EndDate),
Date_Diff_min = as.numeric((EndDate - StartDate)/60),
Start_Date_format = date(StartDate),
End_Date_format = date(EndDate),
Date_Diff_day = End_Date_format - Start_Date_format,
Wday_Start = wday(Start_Date_format, label = T),
hour_Start = hour(StartDate),
Suburb_Start = str_replace(`StartStation Name`, ".*, ", ""),
Suburb_End = str_replace(`EndStation Name`, ".*, ", ""),
AM_PM = ifelse(hour_Start >= 1 & hour_Start < 12, "AM", "PM"),
Start_End_Same_Stat = ifelse(`StartStation Id`== `EndStation Id`, "Yes", "No"),
AM_PM = as.factor(AM_PM),
Start_End_Same_Stat = as.factor(Start_End_Same_Stat),
Distance = round(Distance/1000,2),
Date_Diff_min = (Date_Diff_min/60)) %>%
rename(Date_Diff_Hour = Date_Diff_min)datatable(data1[1:5,], option = list(scrollX = T, paging = F))Distance was calculated using shortest distance between Start Station and End Station on a spherical earth.1 Thus, average speed was calucalted using Distance over Time.
data1 %>%
group_by(Wday_Start, hour_Start) %>%
summarise(med_speed = median(Speed, na.rm = T)) %>%
ggplot(aes(hour_Start, Wday_Start))+
geom_tile(aes(fill = med_speed))+
scale_fill_viridis()+
labs(x = "Hour of the Day", y = "Day of the week", fill = "Median Speed (Km/h)")+
theme(legend.position="bottom")allu_sample1 <- data1 %>%
filter(Duration > 0) %>%
group_by(AM_PM, Wday_Start, Start_End_Same_Stat) %>%
count()
allu_sample1 %>%
rename(Time = AM_PM, `W.Day` = Wday_Start, `End Same Station` = Start_End_Same_Stat) %>%
select(-n) %>%
alluvial(freq = allu_sample1$n,
# border = ifelse(allu_sample1$AM_PM == "AM", "green", "blue"),
col = ifelse(allu_sample1$AM_PM == "AM", "green", "blue"),
cex=0.65)Vertical sizes of the blocks are proportional to the frequency, and so are the widths of the alluvia.
I wanted to find out if riders return their bicycle to the same station they picked up depending on time and day of the week.
Majority of riders pick up and drop off station are different. We can also see weekends are mostly for afternoon people.
Top_10_Start_Freq <- data1 %>%
group_by(`StartStation Name`) %>%
summarise(Freq = n()) %>%
top_n(10) %>%
arrange(-Freq)
no.1 <- makeAwesomeIcon(icon= 'flag', markerColor = 'blue', iconColor = 'red')
data1 %>%
filter(`StartStation Name` %in% Top_10_Start_Freq$`StartStation Name`) %>%
select(`StartStation Lat`, `StartStation Lon`, `StartStation Name`) %>%
distinct() %>%
filter(`StartStation Name` != "Belgrove Street , King's Cross") %>%
leaflet() %>%
addTiles() %>%
addMarkers(lat = ~`StartStation Lat`, lng = ~`StartStation Lon`,clusterOptions = markerClusterOptions()) %>%
addProviderTiles(providers$OpenStreetMap.Mapnik) %>%
addScaleBar() %>%
addAwesomeMarkers(lat = 51.52994, lng = -0.123616, label = "Most Popular Bike Station for Pick up & Drop off", icon = no.1)Belgrove Street , King’s Cross is the most popular pick up and drop off station for cyclist based on frequency.