1 Load Data & Library

library(tidyverse)
library(leaflet) # maps
library(leaflet.extras) # maps
library(geosphere)
library(lubridate)
library(viridis)
library(alluvial)
library(ggridges)
library(DT)
library(RColorBrewer)
library(patchwork)
# =========
set.seed(007)

raw_data <- read_csv("D:/R Datawarehouse/Tasty Tuesday/TFL Cycle Hire 2017.csv")
# raw_data <- sample_frac(raw_data, .10)

station_start_coord <- raw_data %>% 
  select(`StartStation Lon`, `StartStation Lat`)

station_end_coord <- raw_data %>%
  select(`EndStation Lon`, `EndStation Lat`)

data1 <- raw_data %>% 
  mutate(Distance = distCosine(station_start_coord, station_end_coord))

data1 <- data1 %>% 
  mutate(Speed = round(Distance/Duration*3.6,2),
         StartDate =  ymd_hms(StartDate),
         EndDate = ymd_hms(EndDate),
         Date_Diff_min = as.numeric((EndDate - StartDate)/60),
         Start_Date_format = date(StartDate),
         End_Date_format = date(EndDate),
         Date_Diff_day = End_Date_format - Start_Date_format,
         Wday_Start = wday(Start_Date_format, label = T),
         hour_Start = hour(StartDate),
         Suburb_Start = str_replace(`StartStation Name`, ".*, ", ""),
         Suburb_End = str_replace(`EndStation Name`, ".*, ", ""),
         AM_PM = ifelse(hour_Start >= 1 & hour_Start < 12, "AM", "PM"),
         Start_End_Same_Stat = ifelse(`StartStation Id`== `EndStation Id`, "Yes", "No"),
         AM_PM = as.factor(AM_PM),
         Start_End_Same_Stat = as.factor(Start_End_Same_Stat),
         Distance = round(Distance/1000,2),
         Date_Diff_min = (Date_Diff_min/60)) %>% 
  rename(Date_Diff_Hour = Date_Diff_min)

2 Preview first 5 observation of the dataset

datatable(data1[1:5,], option = list(scrollX = T, paging = F))

3 Cycling Speed

Distance was calculated using shortest distance between Start Station and End Station on a spherical earth.1 Thus, average speed was calucalted using Distance over Time.

data1 %>% 
  group_by(Wday_Start, hour_Start) %>% 
  summarise(med_speed = median(Speed, na.rm = T)) %>% 
  ggplot(aes(hour_Start, Wday_Start))+
  geom_tile(aes(fill = med_speed))+
  scale_fill_viridis()+
  labs(x = "Hour of the Day", y = "Day of the week", fill = "Median Speed (Km/h)")+
  theme(legend.position="bottom")

4 Do the riders return their bicycle to the same station they picked up?

allu_sample1 <- data1 %>% 
  filter(Duration > 0) %>% 
  group_by(AM_PM, Wday_Start, Start_End_Same_Stat) %>% 
  count()

allu_sample1 %>% 
  rename(Time = AM_PM, `W.Day` = Wday_Start, `End Same Station` = Start_End_Same_Stat) %>% 
  select(-n) %>% 
  alluvial(freq = allu_sample1$n,
           # border = ifelse(allu_sample1$AM_PM == "AM", "green", "blue"),
           col = ifelse(allu_sample1$AM_PM == "AM", "green", "blue"),
           cex=0.65)

Vertical sizes of the blocks are proportional to the frequency, and so are the widths of the alluvia.

I wanted to find out if riders return their bicycle to the same station they picked up depending on time and day of the week.

Majority of riders pick up and drop off station are different. We can also see weekends are mostly for afternoon people.

5 Top 10 Pick up Stations

Top_10_Start_Freq <- data1 %>% 
  group_by(`StartStation Name`) %>% 
  summarise(Freq = n()) %>% 
  top_n(10) %>% 
  arrange(-Freq)

no.1 <- makeAwesomeIcon(icon= 'flag', markerColor = 'blue', iconColor = 'red')

data1 %>% 
  filter(`StartStation Name` %in% Top_10_Start_Freq$`StartStation Name`) %>% 
  select(`StartStation Lat`, `StartStation Lon`, `StartStation Name`) %>% 
  distinct() %>% 
  filter(`StartStation Name` != "Belgrove Street , King's Cross") %>% 
  leaflet() %>% 
  addTiles() %>% 
  addMarkers(lat = ~`StartStation Lat`, lng = ~`StartStation Lon`,clusterOptions = markerClusterOptions()) %>%
  addProviderTiles(providers$OpenStreetMap.Mapnik) %>%
  addScaleBar() %>% 
  addAwesomeMarkers(lat = 51.52994, lng = -0.123616, label = "Most Popular Bike Station for Pick up & Drop off", icon = no.1)

Belgrove Street , King’s Cross is the most popular pick up and drop off station for cyclist based on frequency.