Install the twitter anomaly detection package.
#pacman::p_install_gh("twitter/AnomalyDetection")
#devtools::install_github("twitter/AnomalyDetection")
library(readxl)
library(readr)
library(plotly)
library(lubridate)
library("rvest")
library(tibble)
library(tidyr)
library(foreach)
library(doParallel)
library(ggjoy)
library(ggfortify)
library(readxl)
library(tidyquant)
library(forecast) # forecasting pkg
library(sweep)# Broom tidiers for forecast pkg
library(DT)
library(timekit)
library(tseries)
library(bsts)
library(reticulate)
pacman::p_load("rio","tidyverse","stringr","ggthemes","RColorBrewer","viridis","data.table","kableExtra","stringr","scales","purrr",
"knitr","mlr",imputeTS,"gam",broom,sweep)
library(ggpubr)
theme_set(theme_pubclean())
# Calculate the number of cores
no_cores <- detectCores() - 1
cl<-makeCluster(no_cores)
registerDoParallel(cl)
Data<- rio::import("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/actvsfcdatajuly311.xlsx",sheet="Sheet4")
Data<-Data%>%mutate(Date=as.Date(Date))
Data%>%head()
## Date Actual
## 1 2016-01-04 NA
## 2 2016-01-11 26
## 3 2016-01-18 27
## 4 2016-01-25 28
## 5 2016-02-01 22
## 6 2016-02-08 27
We can visualize the missing values.
plotNA.gapsize(Data%>%tk_ts())
box()
Fig. 30
#plotNA.imputations(modelseries)
statsNA(Data%>%tk_ts())
## [1] "Length of time series:"
## [1] 118
## [1] "-------------------------"
## [1] "Number of Missing Values:"
## [1] 4
## [1] "-------------------------"
## [1] "Percentage of Missing Values:"
## [1] "3.39%"
## [1] "-------------------------"
## [1] "Stats for Bins"
## [1] " Bin 1 (30 values from 1 to 30) : 1 NAs (3.33%)"
## [1] " Bin 2 (30 values from 31 to 60) : 3 NAs (10%)"
## [1] " Bin 3 (30 values from 61 to 90) : 0 NAs (0%)"
## [1] " Bin 4 (28 values from 91 to 118) : 0 NAs (0%)"
## [1] "-------------------------"
## [1] "Longest NA gap (series of consecutive NAs)"
## [1] "2 in a row"
## [1] "-------------------------"
## [1] "Most frequent gap size (series of consecutive NA series)"
## [1] "1 NA in a row (occuring 2 times)"
## [1] "-------------------------"
## [1] "Gap size accounting for most NAs"
## [1] "2 NA in a row (occuring 1 times, making up for overall 2 NAs)"
## [1] "-------------------------"
## [1] "Overview NA series"
## [1] " 1 NA in a row: 2 times"
## [1] " 2 NA in a row: 1 times"
plotNA.distributionBar(Data%>%tk_ts())
box()
Fig. 30
plotNA.distribution(Data%>%tk_ts())
box()
Fig. 30
Data$Actual[which(is.na(Data$Actual))]
## [1] NA NA NA NA
#
#Uses Kalman Smoothing on structural time series models (or on the state space representation of an arima model) for imputation
modelseries=Data%>%tk_ts()
modelseries=na.kalman(modelseries)
## check that the time series no longer have
## missing values
sum(is.na(modelseries))
## [1] 0
Data["Actual"]<-as.numeric(modelseries)
mlr::summarizeColumns(Data)
## name type na mean disp median mad min max nlevs
## 1 Date Date 0 NA NA NA NA 1 1 118
## 2 Actual numeric 0 34.13857 9.591503 32 7.413 21 58 0
Data%>%
ggplot(aes(y=Actual,x=""))+
# theme_minimal()+
geom_boxplot(width=0.5,fill="white")+
geom_jitter(width = 0.1,size=1)+
labs(y="Number of Vehicles",x="")
Fig. 30
ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/boxplot.png")
Data%>%
drop_na()%>%
summarise(`2.5th`=quantile(Actual,probs = 0.025),
`5th`=quantile(Actual,probs = 0.05),
`50th`=quantile(Actual,probs = 0.5),
`95th`=quantile(Actual,probs = 0.95),
`97.5th`=quantile(Actual,probs = 0.975),
Average=mean(Actual),
StandardDeviation=sd(Actual)
)
## 2.5th 5th 50th 95th 97.5th Average StandardDeviation
## 1 22.925 23 32 55 56 34.13857 9.591503
mycolors=viridis_pal(option = "D")(4)
p<-Data%>%
drop_na()%>%
ggplot(aes(y=Actual,x=Date))+
# theme_minimal()+
geom_point(alpha = 0.5, color = palette_light()[[1]], shape=20,size=2)+
geom_line()+
labs(y="Number of Vehicles",x="")
#Interquartile Range= Q3-Q1
IQR<-quantile(modelseries,probs = c(0.25, 0.75))
Data1<-Data[ Data["Actual"]>IQR[2],]
Data2<-Data[Data["Actual"]<IQR[1] ,]
Data3<-rbind.data.frame(Data1,Data2)%>%drop_na()
p+geom_point(aes(y=Actual,x=Date,color="Above or below IQR"),size=2,data = Data3,shape=1)+
theme(
legend.position="top",
legend.direction="horizontal",
legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
text=element_text(size=8, family="Comic Sans MS"),
#axis.ticks.x = element_blank(),
#axis.text.x = element_blank(), #remove x-qxis text
#axis.title.x = element_blank(),
legend.text = element_text(size = 7),# legend title size
#legend.text = element_blank(),# remove legend text
plot.title = element_text(hjust = 0.5), #position legend in the middle
axis.text.x = element_text(angle = 30, hjust = 1)
)+
labs(x="Time",fill="",title="Weekly Vehicles Recorded")+
#scale_x_date(labels = date_format("%b-%Y"))
(scale_x_date(breaks=date_breaks("2 month"),
labels=date_format("%b %d %y")))
Fig. 30
#[1] "#440154FF" "#482878FF" "#3E4A89FF" "#31688EFF" "#26828EFF" "#1F9E89FF"
# [7] "#35B779FF" "#6DCD59FF" "#B4DE2CFF" "#FDE725FF"
ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A1.png")
Split data into training set( first 85%) and test set (next 15%). Then impute any missing values with a kalman filter.
index<-round(0.9*dim(Data)[1])
train<-Data[1:index,]%>%tk_ts()%>%na.kalman()
test<-Data[(1+index):dim(Data)[1],]%>%tk_ts()%>%na.kalman()
#test<-modelseries[(index+1):length(modelseries),]
#modelseries=Data%>%tk_ts()
#modelseries=na.kalman(modelseries)
n=length(test)
length(train)
## [1] 106
length(test)
## [1] 12
# Add time series signature
train_augmented <- Data[1:index,] %>%
tk_augment_timeseries_signature()
# Model using auto.arima()
# log removes any multiplicative effect
fit_arima <- log(train)%>%auto.arima()
fit_arima
## Series: .
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.2308
## s.e. 0.0975
##
## sigma^2 estimated as 0.009572: log likelihood=95.55
## AIC=-187.1 AICc=-186.99 BIC=-181.8
Xreg<-train_augmented[,c("mweek","week","day","month","quarter","half","year")]
Xregg<-train_augmented[,c("week","day","month","quarter","half","year")]
#Y<-train_augmented[,"Actual"]%>%tk_zoo()
fit_arima2 <- auto.arima(y=log(train),xreg =Xreg )
fit_arima2
## Series: log(train)
## Regression with ARIMA(2,0,0) errors
##
## Coefficients:
## ar1 ar2 intercept mweek week day month
## 0.7167 0.1733 -555.2275 -0.0178 0.1164 -0.0137 -0.4917
## s.e. 0.1004 0.1017 235.1557 0.0084 0.1841 0.0262 0.7999
## quarter half year
## 0.0470 -0.0245 0.2772
## s.e. 0.0541 0.0813 0.1165
##
## sigma^2 estimated as 0.009412: log likelihood=101.4
## AIC=-180.81 AICc=-178 BIC=-151.51
fit_arima4 <- auto.arima(y=train )
fit_arima4
## Series: train
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.1839
## s.e. 0.0974
##
## sigma^2 estimated as 9.915: log likelihood=-268.94
## AIC=541.88 AICc=542 BIC=547.19
# sw_glance for model accuracy
sw_glance(fit_arima)
## # A tibble: 1 x 12
## model.desc sigma logLik AIC BIC ME RMSE MAE MPE MAPE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 ARIMA(0,1,1) 0.0978 95.6 -187 -182 0.00904 0.0969 0.0745 0.200 2.18
## # ... with 2 more variables: MASE <dbl>, ACF1 <dbl>
sw_tidy(fit_arima2)
## # A tibble: 10 x 2
## term estimate
## <chr> <dbl>
## 1 ar1 0.717
## 2 ar2 0.173
## 3 intercept -555
## 4 mweek - 0.0178
## 5 week 0.116
## 6 day - 0.0137
## 7 month - 0.492
## 8 quarter 0.0470
## 9 half - 0.0245
## 10 year 0.277
sw_glance(fit_arima2)
## # A tibble: 1 x 12
## model.desc sigma logLik AIC BIC ME RMSE MAE MPE MAPE
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Regressio… 0.0970 101 -181 -152 -0.00119 0.0923 0.0734 -0.111 2.15
## # ... with 2 more variables: MASE <dbl>, ACF1 <dbl>
fitdata=cbind.data.frame(Date=Data$Date[1:index] ,sweep::sw_augment(fit_arima))%>%mutate(fitted=exp(.fitted))
Stochastic and deterministic trends
#### make prediction dates
idx_future<-train%>%tk_zoo() %>% tk_index()%>%
tk_make_future_timeseries(n_future = n)
#The deterministic trend model is obtained as follows:
fit_arima3 <- auto.arima(y=train,xreg =Xregg,d=0 )
fit_arima3
## Series: train
## Regression with ARIMA(1,0,0) errors
##
## Coefficients:
## ar1 intercept week day month quarter half
## 0.8991 -20853.435 4.4557 -0.6071 -18.5153 0.8672 -1.2304
## s.e. 0.0426 8617.621 6.2057 0.8861 26.9836 1.8018 2.7778
## year
## 10.3634
## s.e. 4.2691
##
## sigma^2 estimated as 10.28: log likelihood=-270.56
## AIC=559.12 AICc=561 BIC=583.09
#Alternatively, the stochastic trend model can be estimated.
(fit_stochastic<- auto.arima(y=train, d=1))
## Series: train
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.1839
## s.e. 0.0974
##
## sigma^2 estimated as 9.915: log likelihood=-268.94
## AIC=541.88 AICc=542 BIC=547.19
fitdat=cbind.data.frame(Date=Data$Date[1:index] ,sweep::sw_augment(fit_arima3))
# Twelve period forecast
test_augmented <- Data[(1+index):dim(Data)[1],] %>%
tk_augment_timeseries_signature()
Xreg2<-test_augmented[,c("week","day","month","quarter","half","year")]
fcast3 <- forecast(fit_arima3, h = n,xreg=Xreg2)%>%data.frame()
fcast3
## Point.Forecast Lo.80 Hi.80 Lo.95 Hi.95
## 107 54.21182 50.10339 58.32025 47.92852 60.49512
## 108 53.52395 47.99901 59.04889 45.07428 61.97362
## 109 52.92626 46.47981 59.37271 43.06726 62.78525
## 110 52.71341 45.60882 59.81800 41.84787 63.57894
## 111 52.26971 44.67463 59.86479 40.65404 63.88539
## 112 51.89158 43.92200 59.86115 39.70317 64.07998
## 113 51.57238 43.31245 59.83230 38.93991 64.20484
## 114 49.78873 41.30133 58.27613 36.80837 62.76908
## 115 49.57018 40.90325 58.23711 36.31525 62.82511
## 116 49.39448 40.58508 58.20388 35.92166 62.86729
## 117 49.25730 40.33438 58.18021 35.61088 62.90372
## 118 50.32567 41.31204 59.33931 36.54050 64.11084
dfuture2=data.frame(Date=idx_future,fcast3)
Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
geom_line(aes(y= .fitted,x=as.Date(Date),color="fitted"),size=1.5,data=fitdat)+
labs(y="Number of Vehicles",x="",title="Regression with ARIMA Errors")+
theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
(scale_x_date(breaks=date_breaks("2 month"),
labels=date_format("%b %d %y")))+
# scale_fill_viridis(discrete = TRUE,option = "D")+
#scale_fill_manual(values = palette_light()[1:3])+
scale_color_manual(values=mycolors)+
geom_vline(xintercept=as.Date("2018-01-08"), linetype=2)+
geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95), fill = "grey", alpha=0.5,data=dfuture2)+
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Point.Forecast,color="12 Week Forecast"), data =dfuture2, size=1.5, linetype=1)+
theme(
legend.position="top",
legend.direction="horizontal",
#legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
legend.title = element_blank(),
text=element_text(size=8, family="Comic Sans MS"),
#axis.ticks.x = element_blank(),
#axis.text.x = element_blank(), #remove x-qxis text
#axis.title.x = element_blank(),
legend.text = element_text(size = 7),# legend title size
#legend.text = element_blank(),# remove legend text
plot.title = element_text(hjust = 0.5), #position legend in the middle
axis.text.x = element_text(angle = 30, hjust = 1)
)+
# geom_rect(xmin = as.numeric(ymd("2017-09-25")),
# xmax = as.numeric(ymd("2018-01-15")),
# ymin = 20, ymax = 65,
# alpha = 0.01 ,fill = "white" ) +
annotate("text", x = ymd("2016-03-17"), y = 90,
label = "Train Region",color="red",size=2.5,angle=0) +
annotate("text", x = ymd("2018-03-15"), y = 90,
label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30
ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A7.png")
#### make prediction dates
idx_future<-train%>%tk_zoo() %>% tk_index()%>%
tk_make_future_timeseries(n_future = n)
# Ten period forecast
fcast <- forecast(fit_arima, h = n)%>%data.frame()
# raise predictions back to exponents because of log taken initially
fcast<- apply(fcast,2, function(x) exp(x))
test_augmented <- Data[(1+index):dim(Data)[1],] %>%
tk_augment_timeseries_signature()
Xreg2<-test_augmented[,c("mweek","week","day","month","quarter","half","year")]
Xreg3=rbind(rep( apply(Xreg2,2,mean),8))
#fcast <- forecast(fit_arima4, h = n,Xreg2)
#predict(fit_arima2,Xreg)
as.matrix(rep( apply(Xreg2,2,mean),8),7,8)
## [,1]
## mweek 3.416667
## week 8.500000
## day 16.000000
## month 2.250000
## quarter 1.083333
## half 1.000000
## year 2018.000000
## mweek 3.416667
## week 8.500000
## day 16.000000
## month 2.250000
## quarter 1.083333
## half 1.000000
## year 2018.000000
## mweek 3.416667
## week 8.500000
## day 16.000000
## month 2.250000
## quarter 1.083333
## half 1.000000
## year 2018.000000
## mweek 3.416667
## week 8.500000
## day 16.000000
## month 2.250000
## quarter 1.083333
## half 1.000000
## year 2018.000000
## mweek 3.416667
## week 8.500000
## day 16.000000
## month 2.250000
## quarter 1.083333
## half 1.000000
## year 2018.000000
## mweek 3.416667
## week 8.500000
## day 16.000000
## month 2.250000
## quarter 1.083333
## half 1.000000
## year 2018.000000
## mweek 3.416667
## week 8.500000
## day 16.000000
## month 2.250000
## quarter 1.083333
## half 1.000000
## year 2018.000000
## mweek 3.416667
## week 8.500000
## day 16.000000
## month 2.250000
## quarter 1.083333
## half 1.000000
## year 2018.000000
dfuture=data.frame(Date=idx_future,fcast)
#as.data.frame.list(rep( apply(Xreg2,2,mean)))
#as.data.frame.matrix(rep( apply(Xreg2,2,mean)),7,8)
Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
geom_line(aes(y= fitted,x=as.Date(Date),color="fitted"),size=1.5,data=fitdata)+
labs(y="Number of Vehicles",x="",title="ARIMA(0,1,1) Model")+
theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
(scale_x_date(breaks=date_breaks("2 month"),
labels=date_format("%b %d %y")))+
# scale_fill_viridis(discrete = TRUE,option = "D")+
#scale_fill_manual(values = palette_light()[1:3])+
scale_color_manual(values=mycolors)+
geom_vline(xintercept=as.Date("2018-01-08"), linetype=2)+
geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95), fill = "grey", alpha=0.5,data=dfuture)+
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Point.Forecast,color="12 Week Forecast"), data =dfuture, size=1.5, linetype=1)+
theme(
legend.position="top",
legend.direction="horizontal",
#legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
legend.title = element_blank(),
text=element_text(size=8, family="Comic Sans MS"),
#axis.ticks.x = element_blank(),
#axis.text.x = element_blank(), #remove x-qxis text
#axis.title.x = element_blank(),
legend.text = element_text(size = 7),# legend title size
#legend.text = element_blank(),# remove legend text
plot.title = element_text(hjust = 0.5), #position legend in the middle
axis.text.x = element_text(angle = 30, hjust = 1)
)+
# geom_rect(xmin = as.numeric(ymd("2017-09-25")),
# xmax = as.numeric(ymd("2018-01-15")),
# ymin = 20, ymax = 65,
# alpha = 0.01 ,fill = "white" ) +
annotate("text", x = ymd("2016-03-17"), y = 90,
label = "Train Region",color="red",size=2.5,angle=0) +
annotate("text", x = ymd("2018-03-21"), y = 90,
label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30
ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A2.png")
#l.start Start value for level (a[0]).
#b.start Start value for trend (b[0]).
#s.start Vector of start values for the seasonal component (s_1[0] … s_p[0])
# non-seasonal implies gamma=FLASE
HW=HoltWinters(train, gamma=FALSE, l.start=mean(modelseries), b.start=1)
sweep::sw_augment(HW)%>%head()
## # A tibble: 6 x 4
## index .actual .fitted .resid
## <int> <dbl> <dbl> <dbl>
## 1 1 26.0 NA NA
## 2 2 26.0 NA NA
## 3 3 27.0 35.1 - 8.14
## 4 4 28.0 29.2 - 1.16
## 5 5 22.0 29.0 - 6.99
## 6 6 27.0 23.8 3.21
fc=forecast(HW,h=n)
fitdata=cbind.data.frame(Date=Data$Date[1:index] ,sweep::sw_augment(HW))
#### make prediction dates
idx_future<-train%>%tk_zoo() %>% tk_index()%>%
tk_make_future_timeseries(n_future = n)
dfuture=data.frame(Date=idx_future,fc)
dfuture
## Date Point.Forecast Lo.80 Hi.80 Lo.95 Hi.95
## 107 2018-01-15 55.11551 50.89825 59.33278 48.66576 61.56527
## 108 2018-01-22 55.63069 50.07383 61.18755 47.13221 64.12917
## 109 2018-01-29 56.14587 49.46520 62.82654 45.92866 66.36308
## 110 2018-02-05 56.66105 48.97602 64.34607 44.90781 68.41428
## 111 2018-02-12 57.17622 48.56346 65.78898 44.00414 70.34831
## 112 2018-02-19 57.69140 48.20417 67.17864 43.18193 72.20087
## 113 2018-02-26 58.20658 47.88380 68.52936 42.41925 73.99390
## 114 2018-03-05 58.72176 47.59287 69.85065 41.70159 75.74192
## 115 2018-03-12 59.23693 47.32470 71.14916 41.01875 77.45511
## 116 2018-03-19 59.75211 47.07446 72.42976 40.36331 79.14091
## 117 2018-03-26 60.26729 46.83845 73.69612 39.72966 80.80492
## 118 2018-04-02 60.78246 46.61386 74.95107 39.11345 82.45148
Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
geom_line(aes(y= .fitted,x=as.Date(Date),color="fitted"),size=1.5,data=fitdata)+
labs(y="Number of Vehicles",x="",title="Holt-Winters Exponential Smoothing Model")+
theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
(scale_x_date(breaks=date_breaks("2 month"),
labels=date_format("%b %d %y")))+
scale_color_manual(values=mycolors)+
#scale_fill_manual(values = palette_light()[1:2])
#scale_fill_manual(values=viridis_pal(option = "D")(2))
geom_vline(xintercept=as.Date("2018-01-08"), linetype=2)+
geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95), fill = "grey", alpha=0.5,data=dfuture)+
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Point.Forecast,color="12 Week Forecast"), data =dfuture, size=1.5, linetype=1)+
theme(
legend.position="top",
legend.direction="horizontal",
#legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
legend.title = element_blank(),
text=element_text(size=8, family="Comic Sans MS"),
#axis.ticks.x = element_blank(),
#axis.text.x = element_blank(), #remove x-qxis text
#axis.title.x = element_blank(),
legend.text = element_text(size = 7),# legend title size
#legend.text = element_blank(),# remove legend text
plot.title = element_text(hjust = 0.5), #position legend in the middle
axis.text.x = element_text(angle = 30, hjust = 1)
)+
# geom_rect(xmin = as.numeric(ymd("2017-09-25")),
# xmax = as.numeric(ymd("2018-01-15")),
# ymin = 20, ymax = 65,
# alpha = 0.01 ,fill = "white" ) +
annotate("text", x = ymd("2016-03-17"), y = 90,
label = "Train Region",color="red",size=2.5,angle=0) +
annotate("text", x = ymd("2018-02-26"), y = 90,
label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30
ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A3.png")
forecast::ggtsdisplay(train)
Fig. 30
forecast::ggtsdisplay(fit_stochastic$residuals)
Fig. 30
#checkresiduals(HW)
The trouble with using the ACF is that there can be other reasons for significant spikes, not just seasonality. So it is indicative but cannot be conclusive.
If the data had a small seasonal period (such as 4 for quarterly data or 12 for monthly data) then a simple approach is to use the ets function in the forecast package for R. If there is a seasonal pattern, it will choose a seasonal model.
The non-integer period will be a problem for any solution that assumes period=52, because the difference between 52 and 365/7 will become apparent with long series.
One approach is to use the tbats model, also in the forecast package in R. It will handle weekly seasonality and will automatically determine if a seasonal pattern is present. For example:
The autocorrelations die down and as they do not cross 0.2. This suggest the absence of seasonality.
#check seasonality/periodicity of time series
# freqeuncy is weekly data 365/7
x <- ts(modelseries, frequency=365/7)
fit <- tbats(modelseries)
seasonal <- !is.null(fit$seasonal)
seasonal
## [1] FALSE
forecast::ggtsdisplay(residuals(HW), lag.max=80, main='HoltWinters Exponenetial smoothing Residuals')
Fig. 30
#find frequency
forecast::findfrequency(modelseries)
## [1] 1
#beer2 <- window(ausbeer, start=1992)
#fit.beer <- tslm(ts(train) ~ trend + season)
fit <- tslm(ts(train) ~ trend )
fcast <- forecast(fit)
autoplot(fcast) +
ggtitle("Forecasts using regression")
Fig. 30
checkresiduals(fit, lag.max=80)
Fig. 30
##
## Breusch-Godfrey test for serial correlation of order up to 10
##
## data: Residuals from Linear regression model
## LM test = 86.817, df = 10, p-value = 2.286e-14
#stl(modelseries)
forecast::tsoutliers(modelseries)
## $index
## integer(0)
##
## $replacements
## numeric(0)
#forecast::stlm(modelseries)
#forecast::ggseasonplot(modelseries)
#forecast::stlf(modelseries)
#stlRes <- stl(ts(na.omit(Data[,2])), s.window = "periodic")
#modelseries$Actual
#decomposedRes <- decompose(na.omit(Data[,2])), type="additive") # use type = "additive/mult" for additive components
#plot (decomposedRes)
adf.test(modelseries)
##
## Augmented Dickey-Fuller Test
##
## data: modelseries
## Dickey-Fuller = -1.9833, Lag order = 4, p-value = 0.5835
## alternative hypothesis: stationary
tseries::adf.test(diff(log(modelseries)), alternative="stationary", k=0)
##
## Augmented Dickey-Fuller Test
##
## data: diff(log(modelseries))
## Dickey-Fuller = -12.878, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary
# Automated forecasting using an exponential model
fit_ets <- ets(modelseries)
fit_ets
## ETS(A,N,N)
##
## Call:
## ets(y = modelseries)
##
## Smoothing parameters:
## alpha = 0.8393
##
## Initial states:
## l = 26.0702
##
## sigma: 3.0461
##
## AIC AICc BIC
## 831.8146 832.0251 840.1267
fc=forecast(fit_ets,h=12)
fc%>%data.frame()%>%head(4)
## Point.Forecast Lo.80 Hi.80 Lo.95 Hi.95
## 119 41.65585 37.75208 45.55963 35.68555 47.62616
## 120 41.65585 36.55939 46.75232 33.86148 49.45023
## 121 41.65585 35.59710 47.71461 32.38979 50.92192
## 122 41.65585 34.76796 48.54375 31.12173 52.18998
#Model <- auto.ssarima(modelseries,lags=c(1,12),
# h=12,holdout=TRUE,intervals="np")
#Model=auto.ces(modelseries,h=12,holdout=FALSE)
#Model%>%summary()
#forecast(Model)%>%tk_tbl()
# Add time series signature
train_augmented <- Data[1:index,] %>%
tk_augment_timeseries_signature()
train_augmented["Actual"]<-as.numeric(train)
train_augmented%>%head(4)
## Date Actual index.num diff year half quarter month month.xts
## 1 2016-01-04 26.04576 1451865600 NA 2016 1 1 1 0
## 2 2016-01-11 26.00000 1452470400 604800 2016 1 1 1 0
## 3 2016-01-18 27.00000 1453075200 604800 2016 1 1 1 0
## 4 2016-01-25 28.00000 1453680000 604800 2016 1 1 1 0
## month.lbl day hour minute second hour12 am.pm wday wday.xts wday.lbl
## 1 January 4 0 0 0 0 1 2 1 Monday
## 2 January 11 0 0 0 0 1 2 1 Monday
## 3 January 18 0 0 0 0 1 2 1 Monday
## 4 January 25 0 0 0 0 1 2 1 Monday
## mday qday yday mweek week week.iso week2 week3 week4 mday7
## 1 4 4 4 2 1 1 1 1 1 1
## 2 11 11 11 3 2 2 0 2 2 2
## 3 18 18 18 4 3 3 1 0 3 3
## 4 25 25 25 5 4 4 0 1 0 4
# Model using the augmented features
fit_lm<-lm(Actual~ year+half+month+quarter+day+mweek+week, data = train_augmented)
broom::glance(fit_lm)
## r.squared adj.r.squared sigma statistic p.value df logLik
## 1 0.5012841 0.4656616 6.527044 14.0721 1.616699e-12 8 -345.0996
## AIC BIC deviance df.residual
## 1 708.1993 732.1702 4175.026 98
broom::tidy(fit_lm)
## term estimate std.error statistic p.value
## 1 (Intercept) -1.371847e+04 3600.7292615 -3.8099141 0.0002426712
## 2 year 6.821454e+00 1.7748385 3.8434224 0.0002156030
## 3 half -4.537905e+00 2.9285478 -1.5495411 0.1244751747
## 4 month -1.609079e+01 32.7436512 -0.4914171 0.6242302509
## 5 quarter 7.148341e+00 2.7007311 2.6468168 0.0094680925
## 6 day -6.038622e-01 1.0812576 -0.5584814 0.5777895211
## 7 mweek 5.003200e-01 0.7198003 0.6950817 0.4886489145
## 8 week 3.631563e+00 7.5328878 0.4820944 0.6308138722
broom::augment_columns(fit_lm,data = train_augmented)%>%head()
## Date Actual index.num diff year half quarter month month.xts
## 1 2016-01-04 26.04576 1451865600 NA 2016 1 1 1 0
## 2 2016-01-11 26.00000 1452470400 604800 2016 1 1 1 0
## 3 2016-01-18 27.00000 1453075200 604800 2016 1 1 1 0
## 4 2016-01-25 28.00000 1453680000 604800 2016 1 1 1 0
## 5 2016-02-01 22.00000 1454284800 604800 2016 1 1 2 1
## 6 2016-02-08 27.00000 1454889600 604800 2016 1 1 2 1
## month.lbl day hour minute second hour12 am.pm wday wday.xts wday.lbl
## 1 January 4 0 0 0 0 1 2 1 Monday
## 2 January 11 0 0 0 0 1 2 1 Monday
## 3 January 18 0 0 0 0 1 2 1 Monday
## 4 January 25 0 0 0 0 1 2 1 Monday
## 5 February 1 0 0 0 0 1 2 1 Monday
## 6 February 8 0 0 0 0 1 2 1 Monday
## mday qday yday mweek week week.iso week2 week3 week4 mday7 .fitted
## 1 4 4 4 2 1 1 1 1 1 1 22.31875
## 2 11 11 11 3 2 2 0 2 2 2 22.22360
## 3 18 18 18 4 3 3 1 0 3 3 22.12845
## 4 25 25 25 5 4 4 0 1 0 4 22.03330
## 5 1 32 32 6 5 5 1 2 1 1 24.56708
## 6 8 39 39 2 6 6 0 0 2 2 21.97033
## .se.fit .resid .hat .sigma .cooksd .std.resid
## 1 1.730559 3.727008 0.07029746 6.548853 0.003314738 0.5922046
## 2 1.537008 3.776400 0.05545222 6.548729 0.002600781 0.5953185
## 3 1.517496 4.871552 0.05405330 6.540861 0.004206303 0.7673924
## 4 1.678107 5.966705 0.06610072 6.530582 0.007916845 0.9459496
## 5 3.389952 -2.567080 0.26974541 6.553509 0.009780500 -0.4602414
## 6 1.638020 5.029673 0.06298041 6.539356 0.005324325 0.7960656
fitdata=cbind.data.frame(Date=Data$Date[1:index] ,broom::augment(fit_lm))
library(ggfortify)
ggplot2::autoplot(fit_lm, which = 1:6, nrow=2, label.size = F,na.rm=TRUE)
Fig. 30
broom::tidy(fit_lm) %>%
gather(x, y, estimate:p.value) %>%
ggplot(aes(x = term, y = y, color = x, fill = x)) +
facet_wrap(~ x, scales = "free", ncol = 2) +
geom_bar(stat = "identity", alpha = 0.8) +
scale_color_manual(values = palette_light()) +
scale_fill_manual(values = palette_light()) +
theme_tq() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
Fig. 30
checkresiduals(fit_lm)
Fig. 30
##
## Breusch-Godfrey test for serial correlation of order up to 11
##
## data: Residuals
## LM test = 80.171, df = 11, p-value = 1.368e-12
Prediction Interval : level (1 − α)% n-2 df \(yˆ(x∗) ± t_{1−α/2,n−2}SE_{yˆ(x∗)}\)
\(prediction ± t_{1−α/2,n−2}\sqrt{(prediction-Actual)^{2}}\)
# confidence Interval for Predictions
# Calculate standard deviation of residuals
#test_resid_sd <- sd(test_residuals)
# Add time series signature
test_augmented <- Data[(1+index):dim(Data)[1],] %>%
tk_augment_timeseries_signature()
predlm<- predict(fit_lm,test_augmented,interval="prediction",se.fit=T)%>%data.frame()
dfuture<-cbind.data.frame(test_augmented,predlm)%>%rename(Predicted=fit.fit)
dfuture3<-
dfuture%>%
#add_column(Actual=as.numeric(test)) %>%
mutate(residual = Actual- Predicted)%>%
mutate(
lo.95 = Predicted - 1.96 * residual,
lo_95 = Predicted + qt(.025,n-2)* residual,
hi_95 = Predicted + qt(.975,n-2)* residual,
hi.95 = Predicted + 1.96 *residual
)
Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
geom_line(aes(y= .fitted,x=as.Date(Date),color="fitted"),size=1.5,data=fitdata)+
labs(y="Number of Vehicles",x="",title="Linear Regression Model")+
theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
(scale_x_date(breaks=date_breaks("2 month"),
labels=date_format("%b %d %y")))+
scale_color_manual(values=mycolors)+
#scale_fill_manual(values = palette_light()[1:2])
#scale_fill_manual(values=viridis_pal(option = "D")(2))
geom_vline(xintercept=as.Date("2018-01-12"), linetype=2)+
# geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95), fill = "grey", alpha=0.5,data=dfuture)+
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Predicted,color="12 Week Forecast"), data =dfuture, size=1.5, linetype=1)+
theme(
legend.position="top",
legend.direction="horizontal",
#legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
legend.title = element_blank(),
text=element_text(size=8, family="Comic Sans MS"),
#axis.ticks.x = element_blank(),
#axis.text.x = element_blank(), #remove x-qxis text
#axis.title.x = element_blank(),
legend.text = element_text(size = 7),# legend title size
#legend.text = element_blank(),# remove legend text
plot.title = element_text(hjust = 0.5), #position legend in the middle
axis.text.x = element_text(angle = 30, hjust = 1)
)+
geom_ribbon(aes(ymin=fit.lwr, ymax=fit.upr), fill = "grey", alpha=0.5,data=dfuture3)+
# geom_rect(xmin = as.numeric(ymd("2017-09-25")),
# xmax = as.numeric(ymd("2018-01-15")),
# ymin = 20, ymax = 65,
# alpha = 0.01 ,fill = "white" ) +
annotate("text", x = ymd("2016-03-17"), y = 90,
label = "Train Region",color="red",size=2.5,angle=0) +
annotate("text", x = ymd("2018-03-01"), y = 90,
label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30
ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A4.png")
fit_lOESS<-gam(Actual~lo(quarter)+lo(month)+lo(day)+lo(mweek), data = train_augmented)
predicted<-predict(fit_lOESS,test_augmented)
dfuture<-cbind.data.frame(test_augmented,Predicted=predict(fit_lm,test_augmented))
fitdata=cbind.data.frame(Date=Data$Date[1:index] ,broom::augment(fit_lOESS))
fit_gam<-gam::gam(Actual~ s(Date)+s(quarter)+s(month)+s(day)+s(day)+s(mweek)+s(week)
, data =train_augmented )
fitdata=cbind.data.frame(Date=Data$Date[1:index] ,broom::augment(fit_gam))
predgam<- predict(fit_lm,test_augmented,interval="prediction",se.fit=T,quantiles = c(.025, .975))%>%data.frame()
dfuture<-cbind.data.frame(test_augmented,predgam)%>%rename(Predicted=fit.fit)
dfuture3<-
dfuture%>%
#add_column(Actual=as.numeric(test)) %>%
mutate(residual = Actual - Predicted)%>%
mutate(
lo.95 = Predicted - 1.96 * residual,
lo.80 = Predicted - 1.28 * residual,
hi.80 = Predicted + 1.28 * residual,
hi.95 = Predicted + 1.96 *residual
)
Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
geom_line(aes(y= .fitted,x=as.Date(Date),color="fitted"),size=1.5,data=fitdata)+
labs(y="Number of Vehicles",x="",title="Generalized Additive Model")+
theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
(scale_x_date(breaks=date_breaks("2 month"),
labels=date_format("%b %d %y")))+
scale_color_manual(values=mycolors)+
#scale_fill_manual(values = palette_light()[1:2])
#scale_fill_manual(values=viridis_pal(option = "D")(2))
geom_vline(xintercept=as.Date("2018-01-08"), linetype=2)+
# geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95), fill = "grey", alpha=0.5,data=dfuture)+
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Predicted,color="12 Week Forecast"), data =dfuture, size=1.5, linetype=1)+
geom_ribbon(aes(ymin=fit.lwr, ymax=fit.upr ), fill = "grey", alpha=0.5,data=dfuture3)+
theme(
legend.position="top",
legend.direction="horizontal",
#legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
legend.title = element_blank(),
text=element_text(size=8, family="Comic Sans MS"),
#axis.ticks.x = element_blank(),
#axis.text.x = element_blank(), #remove x-qxis text
#axis.title.x = element_blank(),
legend.text = element_text(size = 7),# legend title size
#legend.text = element_blank(),# remove legend text
plot.title = element_text(hjust = 0.5), #position legend in the middle
axis.text.x = element_text(angle = 30, hjust = 1)
)+
# geom_rect(xmin = as.numeric(ymd("2017-09-25")),
# xmax = as.numeric(ymd("2018-01-15")),
# ymin = 20, ymax = 65,
# alpha = 0.01 ,fill = "white" ) +
annotate("text", x = ymd("2016-03-17"), y = 90,
label = "Train Region",color="red",size=2.5,angle=0) +
annotate("text", x = ymd("2018-02-28"), y = 90,
label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30
ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A5.png")
# the AddNamedHolidays requires a zoo object input
train<-Data[1:index,]%>%tk_zoo()%>%na.kalman()
# fitting weekly data. number of seasons is 52
train_augmented["Actual"]<-as.numeric(train)
ss<-AddSemilocalLinearTrend(list(), train)
ss<-AddSeasonal(ss, train, nseasons = 52)
#Add a random-walk holiday model to a state specification.
ss <- AddNamedHolidays(ss, named.holidays = NamedHolidays(), train)
timestamps=as.Date(Data[1:index,1])
modelbsts<-bsts( Actual~ year+half+month+quarter+day+mweek+week, state.specification = ss, niter = 1000,timestamps = timestamps,data = train_augmented)
## =-=-=-=-= Iteration 0 Wed Apr 11 18:45:49 2018 =-=-=-=-=
## =-=-=-=-= Iteration 100 Wed Apr 11 18:45:51 2018 =-=-=-=-=
## =-=-=-=-= Iteration 200 Wed Apr 11 18:45:53 2018 =-=-=-=-=
## =-=-=-=-= Iteration 300 Wed Apr 11 18:45:54 2018 =-=-=-=-=
## =-=-=-=-= Iteration 400 Wed Apr 11 18:45:56 2018 =-=-=-=-=
## =-=-=-=-= Iteration 500 Wed Apr 11 18:45:58 2018 =-=-=-=-=
## =-=-=-=-= Iteration 600 Wed Apr 11 18:46:00 2018 =-=-=-=-=
## =-=-=-=-= Iteration 700 Wed Apr 11 18:46:01 2018 =-=-=-=-=
## =-=-=-=-= Iteration 800 Wed Apr 11 18:46:03 2018 =-=-=-=-=
## =-=-=-=-= Iteration 900 Wed Apr 11 18:46:05 2018 =-=-=-=-=
burn<-SuggestBurn(0.1, modelbsts)
Xreg2<-test_augmented[,c("mweek","week","day","month","quarter","half","year")]
# horizon specifies the number of points to predict into the future
pred<-predict(modelbsts, horizon = n, burn = burn, quantiles = c(.025, .975),newdata =Xreg2)
dfuture=data.frame(idx_future,pred$mean,t(pred$interval),pred$median)
names(dfuture) <- c("Date", "Mean" , "lower95", "upper95","median")
fitdata=cbind.data.frame(Actual=pred$original.series, fitted=-colMeans(modelbsts$one.step.prediction.errors[-(1:burn),])+as.numeric(train),Date=index(train))
#fitdata=cbind.data.frame(Date=index(train),fitted=pred$mean)
Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
geom_line(aes(y= fitted,x=as.Date(Date),color="fitted"),size=1.5,data=fitdata)+
labs(y="Number of Vehicles",x="",title="Bayesian Structural Time Series Model")+
theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
(scale_x_date(breaks=date_breaks("2 month"),
labels=date_format("%b %d %y")))+
scale_color_manual(values=mycolors)+
#scale_fill_manual(values = palette_light()[1:2])
#scale_fill_manual(values=viridis_pal(option = "D")(2))
geom_vline(xintercept=as.Date("2018-01-08"), linetype=2)+
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Mean,color="12 Week Forecast"), data =dfuture, size=1.5, linetype=1)+
geom_ribbon(aes(ymin=lower95, ymax=upper95), fill = "grey", alpha=0.5,data=dfuture)+
theme(
legend.position="top",
legend.direction="horizontal",
#legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
legend.title = element_blank(),
text=element_text(size=8, family="Comic Sans MS"),
#axis.ticks.x = element_blank(),
#axis.text.x = element_blank(), #remove x-qxis text
#axis.title.x = element_blank(),
legend.text = element_text(size = 7),# legend title size
#legend.text = element_blank(),# remove legend text
plot.title = element_text(hjust = 0.5), #position legend in the middle
axis.text.x = element_text(angle = 30, hjust = 1)
)+
# geom_rect(xmin = as.numeric(ymd("2017-09-25")),
# xmax = as.numeric(ymd("2018-01-15")),
# ymin = 20, ymax = 65,
# alpha = 0.01 ,fill = "white" ) +
annotate("text", x = ymd("2016-03-17"), y = 90,
label = "Train Region",color="red",size=2.5,angle=0) +
annotate("text", x = ymd("2018-02-28"), y = 90,
label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30
ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A6.png")
#Single Layer Neural Network
train<-Data[1:index,]%>%tk_ts()%>%na.kalman()
Xreg1<-train_augmented[,c("mweek","week","day","month","quarter","half","year")]
fit_singlelayer <- nnetar(train, lambda=0.5 )
#simulation of 9 possible future sample paths for the train data. Each sample path covers the next 30 years after the observed data.
sim <- ts(matrix(0, nrow=1000L, ncol=12L),
start=end(train)[1L]+1L)
for(i in seq(n))
sim[,i] <- simulate(fit_singlelayer, nsim=1000L)
library(ggplot2)
autoplot(train) + forecast::autolayer(sim)+
theme(legend.position="none")
Fig. 30
fit <- nnetar(train, lambda=0)
autoplot(forecast(fit,h=n))
Fig. 30
cbind.data.frame(Data[(index+1):dim(Data)[1],],nnet=apply(sim, 2,mean))
## Date Actual nnet
## 107 2018-01-15 55 52.42865
## 108 2018-01-22 53 43.24573
## 109 2018-01-29 51 49.74955
## 110 2018-02-05 49 46.23346
## 111 2018-02-12 47 50.90919
## 112 2018-02-19 47 47.44900
## 113 2018-02-26 44 52.03859
## 114 2018-03-05 42 51.99048
## 115 2018-03-12 43 51.36121
## 116 2018-03-19 46 49.68019
## 117 2018-03-26 45 50.84598
## 118 2018-04-02 41 50.29773
apply(sim, 1, mean)[1:5]
## [1] 53.67398 53.06146 51.05385 51.91090 49.79054
Bootstrapping and bagging
rwf(y=as.numeric(train), h=10, drift=TRUE)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 107 55.27575 51.18995 59.36156 49.02706 61.52445
## 108 55.55151 49.74586 61.35715 46.67254 64.43048
## 109 55.82726 48.68337 62.97116 44.90162 66.75291
## 110 56.10302 47.81550 64.39054 43.42835 68.77768
## 111 56.37877 47.07025 65.68730 42.14261 70.61493
## 112 56.65453 46.41088 66.89817 40.98822 72.32083
## 113 56.93028 45.81570 68.04486 39.93200 73.92857
## 114 57.20604 45.27065 69.14143 38.95243 75.45964
## 115 57.48179 44.76601 70.19757 38.03468 76.92890
## 116 57.75755 44.29476 71.22033 37.16799 78.34710
Gradient Boosting Machines
library(caret)
library(corrplot) # plot correlations
library(doParallel) # parallel processing
library(dplyr) # Used by caret
library(gbm) # GBM Models
library(pROC) # plot the ROC curve
library(xgboost)
ctrl <- trainControl(method = "repeatedcv", # 10fold cross validation
number = 5, # do 5 repititions of c
allowParallel = TRUE)
grid <- expand.grid(interaction.depth=c(1,2), # Depth of variable interactions
n.trees=c(10,20), # Num trees to fit
shrinkage=c(0.01,0.1), # Try 2 values for learning rate
n.minobsinnode = 20)
gbm.tune=train(x=Xreg,y=as.numeric(train),
method = "gbm",
trControl = ctrl,
tuneGrid=grid,
verbose=FALSE)
summary(gbm.tune)
Fig. 30
## var rel.inf
## week week 60.524389
## year year 35.565282
## day day 2.576255
## half half 1.334075
## mweek mweek 0.000000
## month month 0.000000
## quarter quarter 0.000000
predgbm<- predict(gbm.tune,test_augmented)%>%data.frame()
names(predgbm)<-"predgbm"
fitdata<-predict(gbm.tune,train_augmented)%>%data.frame()
names(fitdata)<-"fit"
fitdata<-cbind.data.frame(fitdata,Data[1:index,])
dfuture<-cbind.data.frame(Data[(index+1):dim(Data)[1],],predgbm)%>%
rename(Predicted=predgbm)
dfuture3<-
dfuture%>%
#add_column(Actual=as.numeric(test)) %>%
mutate(residual= Actual- Predicted)%>%
mutate(
lo.95 = Predicted - 1.96 * residual,
lo_95 = Predicted + qt(.025,n-2)* residual,
hi_95 = Predicted + qt(.975,n-2)* residual,
hi.95 = Predicted + 1.96 *residual
)
Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
geom_line(aes(y= fit,x=as.Date(Date),color="fitted"),size=1.5,data=fitdata)+
labs(y="Number of Vehicles",x="",title="Linear Regression Model")+
theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
(scale_x_date(breaks=date_breaks("2 month"),
labels=date_format("%b %d %y")))+
scale_color_manual(values=mycolors)+
#scale_fill_manual(values = palette_light()[1:2])
#scale_fill_manual(values=viridis_pal(option = "D")(2))
geom_vline(xintercept=as.Date("2018-01-12"), linetype=2)+
# geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95), fill = "grey", alpha=0.5,data=dfuture)+
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Predicted,color="12 Week Forecast"), data =dfuture3, size=1.5, linetype=1)+
theme(
legend.position="top",
legend.direction="horizontal",
#legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
legend.title = element_blank(),
text=element_text(size=8, family="Comic Sans MS"),
#axis.ticks.x = element_blank(),
#axis.text.x = element_blank(), #remove x-qxis text
#axis.title.x = element_blank(),
legend.text = element_text(size = 7),# legend title size
#legend.text = element_blank(),# remove legend text
plot.title = element_text(hjust = 0.5), #position legend in the middle
axis.text.x = element_text(angle = 30, hjust = 1)
)+
geom_ribbon(aes(ymin=lo.95, ymax= hi.95), fill = "grey", alpha=0.5,data=dfuture3)+
# geom_rect(xmin = as.numeric(ymd("2017-09-25")),
# xmax = as.numeric(ymd("2018-01-15")),
# ymin = 20, ymax = 65,
# alpha = 0.01 ,fill = "white" ) +
annotate("text", x = ymd("2016-03-17"), y = 90,
label = "Train Region",color="red",size=2.5,angle=0) +
annotate("text", x = ymd("2018-03-01"), y = 90,
label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30
ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/Agbm.png")
Non Linear Regression Models
h <- 12
ts_train<-tk_ts(train)
fit.lin <- tslm(ts_train ~ trend)
fcasts.lin <- forecast(fit.lin, h = h)
fit.exp <- tslm(ts_train ~ trend, lambda = 0)
fcasts.exp <- forecast(fit.exp, h = h)
t <- time(ts_train)
t.break1 <-2017-06-19
t.break2 <-2017-10-16
tb1 <- ts(pmax(0, t - t.break1), start = 2016-01-04)
tb2 <- ts(pmax(0, t - t.break2), start = 2016-01-04)
fit.pw <- tslm(ts_train ~ t + tb1 + tb2)
t.new <- t[length(t)] + seq(h)
tb1.new <- tb1[length(tb1)] + seq(h)
tb2.new <- tb2[length(tb2)] + seq(h)
newdata <- cbind(t=t.new, tb1=tb1.new, tb2=tb2.new) %>%
as.data.frame()
fcasts.pw <- forecast(fit.pw, newdata = newdata)
fit.spline <- tslm(ts_train ~ t + I(t^2) + I(t^3) +
I(tb1^3) + I(tb2^3))
fcasts.spl <- forecast(fit.spline, newdata = newdata)
autoplot(ts_train) +
autolayer(fitted(fit.lin), series = "Linear") +
autolayer(fitted(fit.exp), series = "Exponential") +
autolayer(fitted(fit.pw), series = "Piecewise") +
autolayer(fitted(fit.spline), series = "Cubic Spline") +
autolayer(fcasts.pw, series="Piecewise") +
autolayer(fcasts.lin, series="Linear", PI=FALSE) +
autolayer(fcasts.exp, series="Exponential", PI=FALSE) +
autolayer(fcasts.spl, series="Cubic Spline", PI=FALSE) +
xlab("Time") + ylab("Number of Vehicles") +
ggtitle("Vehicle Forecast") +
guides(colour = guide_legend(title = " "))
Fig. 30
ts_train %>%
splinef(lambda=0) %>%
autoplot()
Fig. 30
ts_train %>%
splinef() %>%
autoplot()+geom_line()
Fig. 30
f=ts_train %>%
splinef(lambda=0)
checkresiduals(as.vector(f$residuals))
Fig. 30
ggtsdisplay(as.vector(f$residuals))
Fig. 30
Harmonic regression allows the seasonal pattern to be modelled using Fourier terms with short-term time series dynamics handled by an ARMA error.
# plots <- list()
# for (i in seq(6)) {
# fit <- auto.arima(train, xreg = fourier(train, K = i),
# seasonal = FALSE, lambda = 0)
# plots[[i]] <- autoplot(forecast(fit,xreg=fourier(train, K=i, h=24))) +
# xlab(paste("K=",i," AICC=",round(fit$aicc,2))) +
# ylab("") + ylim(1.5,4.7)
# }
#
# gridExtra::grid.arrange(plots[[1]],plots[[2]],plots[[3]],
# plots[[4]],plots[[5]],plots[[6]], nrow=3)
# n=12
#
#
# NewData<- rio::import("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/actvsfcdatajuly311.xlsx",sheet="Sheet5")
#
# NewData[,"Date"]<-as.Date(NewData[,"Date"])
#
# NewData%>%head()
#
#
# train_aug<-tk_augment_timeseries_signature(NewData)
#
#
# Xreg4<-train_aug[,c("mweek","week","day","month","quarter","half","year")]
#
# Xreg4
#
# #### make prediction dates
#
# Dates_future<-NewData%>%tk_zoo() %>% tk_index()%>%
# tk_make_future_timeseries(n_future = n)%>%data.frame()
#
# Dates_future
#
# test_aug<-tk_augment_timeseries_signature(Dates_future)
#
# test_aug
#
# Xreg5<-test_aug[,c("mweek")]
#
# Xreg5
#
# #The deterministic trend model is obtained as follows:
#
#
# Train<-NewData%>%tk_ts()%>%na.kalman()
#
# fit_arima6 <- auto.arima(y=Train,d=0 )
# fit_arima6
# Add time series signature
#train_augmented <- NewData[,"Date"] %>%
# tk_augment_timeseries_signature()
#idx_future<-train%>%tk_zoo() %>% tk_index()%>%
# tk_make_future_timeseries(n_future = n)
# # Add time series signature
# future_aug <- Dates_future %>%
# tk_augment_timeseries_signature()
#
#
#
# Train<-NewData%>%tk_ts()%>%na.kalman()
#
# #The deterministic trend model is obtained as follows:
# fit_arima6 <- auto.arima(y=Train,xreg =Xreg4,d=0 )
# fit_arima6
#
#
# # Add time series signature
# train_aug <- NewData[,"Date"]%>%tk_ts()%>%
# tk_augment_timeseries_signature()
#
# idx=Train%>%tk_index()%>%tk_get_timeseries_signature()
# idx
#
# #### make prediction dates
#
# Dates_future<-NewData%>%tk_zoo() %>% tk_index()%>%
# tk_make_future_timeseries(n_future = n)
#
#
#
#
# fcast6 <- forecast(fit_arima6, h = n,xreg=future_aug)%>%data.frame()
# fcast6
#
#
# #seq.POSIXt(start=as.Date("2017-10-30"))
#spring March 20
#summer june 21
#fall september 23
# december 21
stopImplicitCluster()