knitr::opts_chunk$set(echo = TRUE)
library(feather)
library(dplyr)
library(tidyr)
library(ggplot2)
library(stlplus)
library(lubridate)
library(anytime)
library(fpp2)
library(GGally)
library(cowplot)
library(kableExtra)
library(knitr)
library(zoo)
library(gridExtra)
library(grid)
#Data intake- Grab data Create total tolls column
myData <- read_feather("NYTrafficData.feather")
myData <- myData[!duplicated(myData),]
myData$Date <- lubridate::mdy(myData$Date)
myData2 <- myData %>%
mutate(total_count=`cash-count`+ `etc-count`)
# convert datetime
wideData <- myData2 %>%
select(Date, id, total_count) %>%
mutate(Date=anydate(Date))
rfkbridge <- wideData %>%
filter(id==2) %>%
arrange(Date)
# convert count to by 1k people and display total count
rfkbridge$total_count <- rfkbridge$total_count / 1000
rfkbridge <- rfkbridge[,c("Date",'total_count')]
kable(rfkbridge[1:5,],caption= "Edited dataframe")
# Create day column with name of day I.E Monday.Tuesday...
rfkbridge$day = strftime(rfkbridge$Date,'%A')
daily <- rfkbridge %>% group_by(day) %>%
summarize(weekday_total=mean(total_count))
daily$day <- ordered(daily$day, levels=c("Monday", "Tuesday", "Wednesday", "Thursday",
"Friday", "Saturday", "Sunday"))
ggplot(daily, aes(day,weekday_total))+
geom_point()+
ggtitle("Average Tolls By Day From 2012-2016")
# Average Tolls By Month From 2012-2016
monthly <- rfkbridge %>% group_by(month=floor_date(Date, "month")) %>%
summarize(amount=mean(total_count))
plot(monthly$month,monthly$amount,type="o",main="Average Tolls For Each Month From 2012-2016")
# barplot averaged monthly data
rfkbridge$month = strftime(rfkbridge$Date,'%B')
monthly <- rfkbridge %>% group_by(month) %>%
summarize(month_total=mean(total_count))
monthly$month <- ordered(monthly$month, levels=c("January", "February", "March", "April",
"May", "June", "July","August","September","October","November","December"))
ggplot(monthly, aes(month,month_total))+
geom_bar(stat="identity")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
coord_cartesian(ylim=c(60,100))+
ggtitle("Barplot of Average Tolls By Month Aggregated over 2012-2016")
# Create full date range to merge into df to deal with missing dates
fullDateRange <- as.data.frame(seq(rfkbridge$Date[1],rfkbridge$Date[1617], by="days"))
colnames(fullDateRange) <- c("Date")
rfkbridge <- left_join(fullDateRange, rfkbridge, by='Date')
kable(rfkbridge[1:5,])
#Convert to ts and plot ts
mytimeseries <- ts(rfkbridge[,'total_count'],
frequency =356)
autoplot(mytimeseries)+
ggtitle("yearly trends starting in March")
plot_grid(autoplot(mytimeseries),ggAcf(mytimeseries,lag=28))
# graphed by year then week
plot(stl(mytimeseries,na.action = na.approx, s.window ="periodic"),main="Frequency by year with stl")
mytimeseries <- ts(rfkbridge[,'total_count'],
frequency =52)
plot(stl(mytimeseries,na.action = na.approx, s.window ="periodic"),main="Frequency by month with stl")
# plug into stlplus
weekDays <- c("Su", "M", "Tu","W", "Th", "F", "Sa")
stlDaily <- stlplus(rfkbridge$total_count,t=rfkbridge$Date,
n.p=7, s.window="periodic",
sub.labels=weekDays, sub.start=1)
plot(stlDaily, xlab="Date", ylab="Daily Vehicles (thous.)",main="stlplus call")
#Plot seasonal function with periodic
plot_seasonal(stlDaily,main="Plot Seasonal Call With s.window= Periodic")
stlDaily <- stlplus(rfkbridge$total_count,t=rfkbridge$Date,
n.p=7, s.window=25,
sub.labels=weekDays, sub.start=1)
#Plot seasonal function with n=25
plot_seasonal(stlDaily,main="Plot Seasonal Call With s.window= 25")
## Taken from turorial linked at beginning
normalizedData <- rfkbridge
normalizedData$Total <- normalizedData$total_count - stlDaily$data$seasonal
day(normalizedData$Date) <- 1
normalizedData <- normalizedData %>%
group_by(Date) %>%
summarise_each(funs(mean(., na.rm=T)))
monthNames <- c("Ja", "F", "Mr", "Ap", "Ma", "Jn", "Jl", "Au", "S", "O", "N", "D")
stlNormalizedMonthly <- stlplus(normalizedData$Total, t=normalizedData$Date,
n.p=12, s.window=25,
sub.start=3, sub.labels = monthNames)
plot_seasonal(stlNormalizedMonthly, xlab="Date", ylab="Daily Vehicles (thous.)", main="Monthly Datapoints By Year")
p1 <- plot_cycle(stlNormalizedMonthly, ylim=c(-17, 17), ylab="Yearly Seasonality")
p2 <- plot_cycle(stlDaily, ylim=c(-17, 17), ylab="Weekly Seasonality")
grid.arrange(p1,p2, ncol=2,top = textGrob("Normalized daily and monthly averages s.window=25",gp=gpar(fontsize=20,font=3)))
weekDays <- c("Su", "M", "Tu","W", "Th", "F", "Sa")
stlDaily <- stlplus(rfkbridge$total_count,t=rfkbridge$Date,
n.p=7, s.window="periodic",
sub.labels=weekDays, sub.start=1)
normalizedData <- rfkbridge
normalizedData$Total <- normalizedData$total_count - stlDaily$data$seasonal
day(normalizedData$Date) <- 1
normalizedData <- normalizedData %>%
group_by(Date) %>%
summarise_each(funs(mean(., na.rm=T)))
monthNames <- c("Ja", "F", "Mr", "Ap", "Ma", "Jn", "Jl", "Au", "S", "O", "N", "D")
stlNormalizedMonthly <- stlplus(normalizedData$Total, t=normalizedData$Date,
n.p=12, s.window="periodic",
sub.start=3, sub.labels = monthNames)
p1 <- plot_cycle(stlNormalizedMonthly, ylim=c(-17, 17), ylab="Yearly Seasonality")
p2 <- plot_cycle(stlDaily, ylim=c(-17, 17), ylab="Weekly Seasonality")
grid.arrange(p1,p2, ncol=2,top=textGrob("Normalized daily and monthly averages s.window=per",gp=gpar(fontsize=20,font=3)))