Load Required Packages

Install the twitter anomaly detection package.

#pacman::p_install_gh("twitter/AnomalyDetection")


#devtools::install_github("twitter/AnomalyDetection")
library(readxl)
library(readr)
library(plotly)
library(lubridate)
library("rvest")
library(tibble)
library(tidyr)
library(foreach)
library(doParallel)
library(ggjoy)
library(ggfortify)
library(readxl)
library(tidyquant)
library(forecast)   #  forecasting pkg
library(sweep)# Broom tidiers for forecast pkg
library(DT)
library(timekit)
library(tseries)
library(bsts)
library(reticulate)
pacman::p_load("rio","tidyverse","stringr","ggthemes","RColorBrewer","viridis","data.table","kableExtra","stringr","scales","purrr",
               "knitr","mlr",imputeTS,"gam",broom,sweep)


library(ggpubr)
theme_set(theme_pubclean())

# Calculate the number of cores
no_cores <- detectCores() - 1

cl<-makeCluster(no_cores)
registerDoParallel(cl)

Read in Data

Data<- rio::import("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/actvsfcdatajuly311.xlsx",sheet="Sheet4")


Data<-Data%>%mutate(Date=as.Date(Date))


Data%>%head()
##         Date Actual
## 1 2016-01-04     NA
## 2 2016-01-11     26
## 3 2016-01-18     27
## 4 2016-01-25     28
## 5 2016-02-01     22
## 6 2016-02-08     27

Compute Missing Values

We can visualize the missing values.

plotNA.gapsize(Data%>%tk_ts())
box()
Fig. 30

Fig. 30

#plotNA.imputations(modelseries)

statsNA(Data%>%tk_ts())
## [1] "Length of time series:"
## [1] 118
## [1] "-------------------------"
## [1] "Number of Missing Values:"
## [1] 4
## [1] "-------------------------"
## [1] "Percentage of Missing Values:"
## [1] "3.39%"
## [1] "-------------------------"
## [1] "Stats for Bins"
## [1] "  Bin 1 (30 values from 1 to 30) :      1 NAs (3.33%)"
## [1] "  Bin 2 (30 values from 31 to 60) :      3 NAs (10%)"
## [1] "  Bin 3 (30 values from 61 to 90) :      0 NAs (0%)"
## [1] "  Bin 4 (28 values from 91 to 118) :      0 NAs (0%)"
## [1] "-------------------------"
## [1] "Longest NA gap (series of consecutive NAs)"
## [1] "2 in a row"
## [1] "-------------------------"
## [1] "Most frequent gap size (series of consecutive NA series)"
## [1] "1 NA in a row (occuring 2 times)"
## [1] "-------------------------"
## [1] "Gap size accounting for most NAs"
## [1] "2 NA in a row (occuring 1 times, making up for overall 2 NAs)"
## [1] "-------------------------"
## [1] "Overview NA series"
## [1] "  1 NA in a row: 2 times"
## [1] "  2 NA in a row: 1 times"
plotNA.distributionBar(Data%>%tk_ts())
box()
Fig. 30

Fig. 30

plotNA.distribution(Data%>%tk_ts())
box()
Fig. 30

Fig. 30

Data$Actual[which(is.na(Data$Actual))]
## [1] NA NA NA NA
#
#Uses Kalman Smoothing on structural time series models (or on the state space representation of an arima model) for imputation


modelseries=Data%>%tk_ts()

modelseries=na.kalman(modelseries)


## check that the time series  no longer have
## missing values

sum(is.na(modelseries))
## [1] 0
Data["Actual"]<-as.numeric(modelseries)

Summary Statistics

mlr::summarizeColumns(Data)
##     name    type na     mean     disp median   mad min max nlevs
## 1   Date    Date  0       NA       NA     NA    NA   1   1   118
## 2 Actual numeric  0 34.13857 9.591503     32 7.413  21  58     0
Data%>%
ggplot(aes(y=Actual,x=""))+
  
# theme_minimal()+
  
  geom_boxplot(width=0.5,fill="white")+
  
  geom_jitter(width = 0.1,size=1)+
  
  labs(y="Number of Vehicles",x="")
Fig. 30

Fig. 30

ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/boxplot.png")
Data%>%
  drop_na()%>%
  summarise(`2.5th`=quantile(Actual,probs = 0.025),
        `5th`=quantile(Actual,probs = 0.05),
            `50th`=quantile(Actual,probs = 0.5),
            `95th`=quantile(Actual,probs = 0.95),
            `97.5th`=quantile(Actual,probs = 0.975),
            Average=mean(Actual),
            StandardDeviation=sd(Actual)
            )
##    2.5th 5th 50th 95th 97.5th  Average StandardDeviation
## 1 22.925  23   32   55     56 34.13857          9.591503
mycolors=viridis_pal(option = "D")(4)


p<-Data%>%
  drop_na()%>%
ggplot(aes(y=Actual,x=Date))+
  
# theme_minimal()+
  
  geom_point(alpha = 0.5, color = palette_light()[[1]], shape=20,size=2)+
  
  geom_line()+
  
  labs(y="Number of Vehicles",x="")

#Interquartile Range= Q3-Q1
IQR<-quantile(modelseries,probs = c(0.25, 0.75))
Data1<-Data[ Data["Actual"]>IQR[2],]
Data2<-Data[Data["Actual"]<IQR[1] ,]
Data3<-rbind.data.frame(Data1,Data2)%>%drop_na()

p+geom_point(aes(y=Actual,x=Date,color="Above or below IQR"),size=2,data = Data3,shape=1)+
  
      theme(
    legend.position="top",
    legend.direction="horizontal",
    legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
    text=element_text(size=8,  family="Comic Sans MS"),
    #axis.ticks.x = element_blank(),
    #axis.text.x = element_blank(), #remove x-qxis text
    #axis.title.x = element_blank(),
    legend.text = element_text(size = 7),# legend title size
    #legend.text = element_blank(),# remove legend text
    plot.title = element_text(hjust = 0.5), #position legend in the middle
    axis.text.x = element_text(angle = 30, hjust = 1)
)+
  labs(x="Time",fill="",title="Weekly Vehicles Recorded")+
   #scale_x_date(labels = date_format("%b-%Y"))
  (scale_x_date(breaks=date_breaks("2 month"),
      labels=date_format("%b %d  %y")))
Fig. 30

Fig. 30

#[1] "#440154FF" "#482878FF" "#3E4A89FF" "#31688EFF" "#26828EFF" "#1F9E89FF"
# [7] "#35B779FF" "#6DCD59FF" "#B4DE2CFF" "#FDE725FF"
ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A1.png")

Split data into training set( first 85%) and test set (next 15%). Then impute any missing values with a kalman filter.

index<-round(0.9*dim(Data)[1])


train<-Data[1:index,]%>%tk_ts()%>%na.kalman()



test<-Data[(1+index):dim(Data)[1],]%>%tk_ts()%>%na.kalman()

#test<-modelseries[(index+1):length(modelseries),]

#modelseries=Data%>%tk_ts()

#modelseries=na.kalman(modelseries)

n=length(test)

length(train)
## [1] 106
length(test)
## [1] 12

ARIMA Modelling

# Add time series signature
train_augmented <- Data[1:index,] %>%
    tk_augment_timeseries_signature()

# Model using auto.arima()
# log removes any multiplicative effect
fit_arima <- log(train)%>%auto.arima()
fit_arima
## Series: . 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.2308
## s.e.   0.0975
## 
## sigma^2 estimated as 0.009572:  log likelihood=95.55
## AIC=-187.1   AICc=-186.99   BIC=-181.8
Xreg<-train_augmented[,c("mweek","week","day","month","quarter","half","year")]


Xregg<-train_augmented[,c("week","day","month","quarter","half","year")]

#Y<-train_augmented[,"Actual"]%>%tk_zoo()
  
fit_arima2 <- auto.arima(y=log(train),xreg =Xreg )
fit_arima2
## Series: log(train) 
## Regression with ARIMA(2,0,0) errors 
## 
## Coefficients:
##          ar1     ar2  intercept    mweek    week      day    month
##       0.7167  0.1733  -555.2275  -0.0178  0.1164  -0.0137  -0.4917
## s.e.  0.1004  0.1017   235.1557   0.0084  0.1841   0.0262   0.7999
##       quarter     half    year
##        0.0470  -0.0245  0.2772
## s.e.   0.0541   0.0813  0.1165
## 
## sigma^2 estimated as 0.009412:  log likelihood=101.4
## AIC=-180.81   AICc=-178   BIC=-151.51
fit_arima4 <- auto.arima(y=train )
fit_arima4
## Series: train 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.1839
## s.e.   0.0974
## 
## sigma^2 estimated as 9.915:  log likelihood=-268.94
## AIC=541.88   AICc=542   BIC=547.19

ARIMA Modelling

# sw_glance for model accuracy
sw_glance(fit_arima)
## # A tibble: 1 x 12
##   model.desc    sigma logLik   AIC   BIC      ME   RMSE    MAE   MPE  MAPE
##   <chr>         <dbl>  <dbl> <dbl> <dbl>   <dbl>  <dbl>  <dbl> <dbl> <dbl>
## 1 ARIMA(0,1,1) 0.0978   95.6  -187  -182 0.00904 0.0969 0.0745 0.200  2.18
## # ... with 2 more variables: MASE <dbl>, ACF1 <dbl>
sw_tidy(fit_arima2)
## # A tibble: 10 x 2
##    term       estimate
##    <chr>         <dbl>
##  1 ar1          0.717 
##  2 ar2          0.173 
##  3 intercept -555     
##  4 mweek     -  0.0178
##  5 week         0.116 
##  6 day       -  0.0137
##  7 month     -  0.492 
##  8 quarter      0.0470
##  9 half      -  0.0245
## 10 year         0.277
sw_glance(fit_arima2)
## # A tibble: 1 x 12
##   model.desc  sigma logLik   AIC   BIC       ME   RMSE    MAE    MPE  MAPE
##   <chr>       <dbl>  <dbl> <dbl> <dbl>    <dbl>  <dbl>  <dbl>  <dbl> <dbl>
## 1 Regressio… 0.0970    101  -181  -152 -0.00119 0.0923 0.0734 -0.111  2.15
## # ... with 2 more variables: MASE <dbl>, ACF1 <dbl>
fitdata=cbind.data.frame(Date=Data$Date[1:index]  ,sweep::sw_augment(fit_arima))%>%mutate(fitted=exp(.fitted))

Dynamic Regression Models

Regression with ARIMA Errors Modelling

Stochastic and deterministic trends

#### make prediction dates

idx_future<-train%>%tk_zoo() %>% tk_index()%>%
    tk_make_future_timeseries(n_future = n)


#The deterministic trend model is obtained as follows:
fit_arima3 <- auto.arima(y=train,xreg =Xregg,d=0 )
fit_arima3
## Series: train 
## Regression with ARIMA(1,0,0) errors 
## 
## Coefficients:
##          ar1   intercept    week      day     month  quarter     half
##       0.8991  -20853.435  4.4557  -0.6071  -18.5153   0.8672  -1.2304
## s.e.  0.0426    8617.621  6.2057   0.8861   26.9836   1.8018   2.7778
##          year
##       10.3634
## s.e.   4.2691
## 
## sigma^2 estimated as 10.28:  log likelihood=-270.56
## AIC=559.12   AICc=561   BIC=583.09
#Alternatively, the stochastic trend model can be estimated.

(fit_stochastic<- auto.arima(y=train, d=1))
## Series: train 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.1839
## s.e.   0.0974
## 
## sigma^2 estimated as 9.915:  log likelihood=-268.94
## AIC=541.88   AICc=542   BIC=547.19
fitdat=cbind.data.frame(Date=Data$Date[1:index]  ,sweep::sw_augment(fit_arima3))


# Twelve period forecast 

test_augmented <- Data[(1+index):dim(Data)[1],] %>%
    tk_augment_timeseries_signature()

Xreg2<-test_augmented[,c("week","day","month","quarter","half","year")]
fcast3 <- forecast(fit_arima3, h = n,xreg=Xreg2)%>%data.frame()
fcast3 
##     Point.Forecast    Lo.80    Hi.80    Lo.95    Hi.95
## 107       54.21182 50.10339 58.32025 47.92852 60.49512
## 108       53.52395 47.99901 59.04889 45.07428 61.97362
## 109       52.92626 46.47981 59.37271 43.06726 62.78525
## 110       52.71341 45.60882 59.81800 41.84787 63.57894
## 111       52.26971 44.67463 59.86479 40.65404 63.88539
## 112       51.89158 43.92200 59.86115 39.70317 64.07998
## 113       51.57238 43.31245 59.83230 38.93991 64.20484
## 114       49.78873 41.30133 58.27613 36.80837 62.76908
## 115       49.57018 40.90325 58.23711 36.31525 62.82511
## 116       49.39448 40.58508 58.20388 35.92166 62.86729
## 117       49.25730 40.33438 58.18021 35.61088 62.90372
## 118       50.32567 41.31204 59.33931 36.54050 64.11084
dfuture2=data.frame(Date=idx_future,fcast3)

Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
  
  
  geom_line(aes(y= .fitted,x=as.Date(Date),color="fitted"),size=1.5,data=fitdat)+
  
  labs(y="Number of Vehicles",x="",title="Regression with ARIMA Errors")+
   theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
    (scale_x_date(breaks=date_breaks("2 month"),
      labels=date_format("%b %d  %y")))+

 # scale_fill_viridis(discrete = TRUE,option = "D")+
 #scale_fill_manual(values = palette_light()[1:3])+
scale_color_manual(values=mycolors)+

geom_vline(xintercept=as.Date("2018-01-08"), linetype=2)+

  geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95), fill = "grey", alpha=0.5,data=dfuture2)+
  
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Point.Forecast,color="12 Week Forecast"), data =dfuture2, size=1.5, linetype=1)+
  
    
      theme(
    legend.position="top",
    legend.direction="horizontal",
    #legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
    legend.title = element_blank(),
    text=element_text(size=8,  family="Comic Sans MS"),
    #axis.ticks.x = element_blank(),
    #axis.text.x = element_blank(), #remove x-qxis text
    #axis.title.x = element_blank(),
    legend.text = element_text(size = 7),# legend title size
    #legend.text = element_blank(),# remove legend text
    plot.title = element_text(hjust = 0.5), #position legend in the middle
    axis.text.x = element_text(angle = 30, hjust = 1)
)+
  
 # geom_rect(xmin = as.numeric(ymd("2017-09-25")),
#              xmax = as.numeric(ymd("2018-01-15")),
#              ymin = 20, ymax = 65,
#              alpha = 0.01 ,fill =  "white"  ) +
  
  annotate("text", x = ymd("2016-03-17"), y = 90,
             label = "Train Region",color="red",size=2.5,angle=0) +
   annotate("text", x = ymd("2018-03-15"), y = 90,
              label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30

Fig. 30

ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A7.png")

ARIMA Modelling

#### make prediction dates

idx_future<-train%>%tk_zoo() %>% tk_index()%>%
    tk_make_future_timeseries(n_future = n)

# Ten period forecast 
fcast <- forecast(fit_arima, h = n)%>%data.frame()

# raise predictions back to exponents because of log taken initially

fcast<- apply(fcast,2, function(x) exp(x))

test_augmented <- Data[(1+index):dim(Data)[1],] %>%
    tk_augment_timeseries_signature()

Xreg2<-test_augmented[,c("mweek","week","day","month","quarter","half","year")]


Xreg3=rbind(rep( apply(Xreg2,2,mean),8))

#fcast <- forecast(fit_arima4, h = n,Xreg2)

#predict(fit_arima2,Xreg)
as.matrix(rep( apply(Xreg2,2,mean),8),7,8)
##                [,1]
## mweek      3.416667
## week       8.500000
## day       16.000000
## month      2.250000
## quarter    1.083333
## half       1.000000
## year    2018.000000
## mweek      3.416667
## week       8.500000
## day       16.000000
## month      2.250000
## quarter    1.083333
## half       1.000000
## year    2018.000000
## mweek      3.416667
## week       8.500000
## day       16.000000
## month      2.250000
## quarter    1.083333
## half       1.000000
## year    2018.000000
## mweek      3.416667
## week       8.500000
## day       16.000000
## month      2.250000
## quarter    1.083333
## half       1.000000
## year    2018.000000
## mweek      3.416667
## week       8.500000
## day       16.000000
## month      2.250000
## quarter    1.083333
## half       1.000000
## year    2018.000000
## mweek      3.416667
## week       8.500000
## day       16.000000
## month      2.250000
## quarter    1.083333
## half       1.000000
## year    2018.000000
## mweek      3.416667
## week       8.500000
## day       16.000000
## month      2.250000
## quarter    1.083333
## half       1.000000
## year    2018.000000
## mweek      3.416667
## week       8.500000
## day       16.000000
## month      2.250000
## quarter    1.083333
## half       1.000000
## year    2018.000000
dfuture=data.frame(Date=idx_future,fcast)

#as.data.frame.list(rep( apply(Xreg2,2,mean)))
#as.data.frame.matrix(rep( apply(Xreg2,2,mean)),7,8)


Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
  
  
  geom_line(aes(y= fitted,x=as.Date(Date),color="fitted"),size=1.5,data=fitdata)+
  
  labs(y="Number of Vehicles",x="",title="ARIMA(0,1,1)  Model")+
   theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
    (scale_x_date(breaks=date_breaks("2 month"),
      labels=date_format("%b %d  %y")))+

 # scale_fill_viridis(discrete = TRUE,option = "D")+
 #scale_fill_manual(values = palette_light()[1:3])+
scale_color_manual(values=mycolors)+

geom_vline(xintercept=as.Date("2018-01-08"), linetype=2)+

  geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95), fill = "grey", alpha=0.5,data=dfuture)+
  
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Point.Forecast,color="12 Week Forecast"), data =dfuture, size=1.5, linetype=1)+
  
    
      theme(
    legend.position="top",
    legend.direction="horizontal",
    #legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
    legend.title = element_blank(),
    text=element_text(size=8,  family="Comic Sans MS"),
    #axis.ticks.x = element_blank(),
    #axis.text.x = element_blank(), #remove x-qxis text
    #axis.title.x = element_blank(),
    legend.text = element_text(size = 7),# legend title size
    #legend.text = element_blank(),# remove legend text
    plot.title = element_text(hjust = 0.5), #position legend in the middle
    axis.text.x = element_text(angle = 30, hjust = 1)
)+
  
 # geom_rect(xmin = as.numeric(ymd("2017-09-25")),
#              xmax = as.numeric(ymd("2018-01-15")),
#              ymin = 20, ymax = 65,
#              alpha = 0.01 ,fill =  "white"  ) +
  
  annotate("text", x = ymd("2016-03-17"), y = 90,
             label = "Train Region",color="red",size=2.5,angle=0) +
   annotate("text", x = ymd("2018-03-21"), y = 90,
              label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30

Fig. 30

ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A2.png")

Holt-Winters Exponetial Smoothing Modelling

#l.start    Start value for level (a[0]).
#b.start     Start value for trend (b[0]).
#s.start    Vector of start values for the seasonal component (s_1[0] … s_p[0])
# non-seasonal implies gamma=FLASE
HW=HoltWinters(train, gamma=FALSE, l.start=mean(modelseries), b.start=1)

sweep::sw_augment(HW)%>%head()
## # A tibble: 6 x 4
##   index .actual .fitted .resid
##   <int>   <dbl>   <dbl>  <dbl>
## 1     1    26.0    NA    NA   
## 2     2    26.0    NA    NA   
## 3     3    27.0    35.1 - 8.14
## 4     4    28.0    29.2 - 1.16
## 5     5    22.0    29.0 - 6.99
## 6     6    27.0    23.8   3.21
fc=forecast(HW,h=n)

fitdata=cbind.data.frame(Date=Data$Date[1:index]  ,sweep::sw_augment(HW))


#### make prediction dates

idx_future<-train%>%tk_zoo() %>% tk_index()%>%
    tk_make_future_timeseries(n_future = n)


dfuture=data.frame(Date=idx_future,fc)


dfuture
##           Date Point.Forecast    Lo.80    Hi.80    Lo.95    Hi.95
## 107 2018-01-15       55.11551 50.89825 59.33278 48.66576 61.56527
## 108 2018-01-22       55.63069 50.07383 61.18755 47.13221 64.12917
## 109 2018-01-29       56.14587 49.46520 62.82654 45.92866 66.36308
## 110 2018-02-05       56.66105 48.97602 64.34607 44.90781 68.41428
## 111 2018-02-12       57.17622 48.56346 65.78898 44.00414 70.34831
## 112 2018-02-19       57.69140 48.20417 67.17864 43.18193 72.20087
## 113 2018-02-26       58.20658 47.88380 68.52936 42.41925 73.99390
## 114 2018-03-05       58.72176 47.59287 69.85065 41.70159 75.74192
## 115 2018-03-12       59.23693 47.32470 71.14916 41.01875 77.45511
## 116 2018-03-19       59.75211 47.07446 72.42976 40.36331 79.14091
## 117 2018-03-26       60.26729 46.83845 73.69612 39.72966 80.80492
## 118 2018-04-02       60.78246 46.61386 74.95107 39.11345 82.45148
Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
  
   geom_line(aes(y= .fitted,x=as.Date(Date),color="fitted"),size=1.5,data=fitdata)+
  
  labs(y="Number of Vehicles",x="",title="Holt-Winters Exponential Smoothing Model")+
   theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
    (scale_x_date(breaks=date_breaks("2 month"),
      labels=date_format("%b %d  %y")))+
  scale_color_manual(values=mycolors)+
 #scale_fill_manual(values = palette_light()[1:2])
#scale_fill_manual(values=viridis_pal(option = "D")(2))
geom_vline(xintercept=as.Date("2018-01-08"), linetype=2)+

  geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95), fill = "grey", alpha=0.5,data=dfuture)+
  
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Point.Forecast,color="12 Week Forecast"), data =dfuture, size=1.5, linetype=1)+
  
    
      theme(
    legend.position="top",
    legend.direction="horizontal",
    #legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
    legend.title = element_blank(),
    text=element_text(size=8,  family="Comic Sans MS"),
    #axis.ticks.x = element_blank(),
    #axis.text.x = element_blank(), #remove x-qxis text
    #axis.title.x = element_blank(),
    legend.text = element_text(size = 7),# legend title size
    #legend.text = element_blank(),# remove legend text
    plot.title = element_text(hjust = 0.5), #position legend in the middle
    axis.text.x = element_text(angle = 30, hjust = 1)
)+
  
 # geom_rect(xmin = as.numeric(ymd("2017-09-25")),
#              xmax = as.numeric(ymd("2018-01-15")),
#              ymin = 20, ymax = 65,
#              alpha = 0.01 ,fill =  "white"  ) +
  
  annotate("text", x = ymd("2016-03-17"), y = 90,
             label = "Train Region",color="red",size=2.5,angle=0) +
   annotate("text", x = ymd("2018-02-26"), y = 90,
              label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30

Fig. 30

ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A3.png")




forecast::ggtsdisplay(train)
Fig. 30

Fig. 30

forecast::ggtsdisplay(fit_stochastic$residuals)
Fig. 30

Fig. 30

#checkresiduals(HW)

The trouble with using the ACF is that there can be other reasons for significant spikes, not just seasonality. So it is indicative but cannot be conclusive.

If the data had a small seasonal period (such as 4 for quarterly data or 12 for monthly data) then a simple approach is to use the ets function in the forecast package for R. If there is a seasonal pattern, it will choose a seasonal model.

The non-integer period will be a problem for any solution that assumes period=52, because the difference between 52 and 365/7 will become apparent with long series.

One approach is to use the tbats model, also in the forecast package in R. It will handle weekly seasonality and will automatically determine if a seasonal pattern is present. For example:

The autocorrelations die down and as they do not cross 0.2. This suggest the absence of seasonality.

#check seasonality/periodicity of time series
# freqeuncy is weekly data 365/7
x <- ts(modelseries, frequency=365/7)
fit <- tbats(modelseries)
seasonal <- !is.null(fit$seasonal)
seasonal
## [1] FALSE
forecast::ggtsdisplay(residuals(HW), lag.max=80, main='HoltWinters Exponenetial smoothing Residuals')
Fig. 30

Fig. 30

#find frequency
forecast::findfrequency(modelseries)
## [1] 1
#beer2 <- window(ausbeer, start=1992)
#fit.beer <- tslm(ts(train) ~ trend + season)
fit <- tslm(ts(train) ~ trend )
fcast <- forecast(fit)
autoplot(fcast) +
  ggtitle("Forecasts  using regression")
Fig. 30

Fig. 30

checkresiduals(fit, lag.max=80)
Fig. 30

Fig. 30

## 
##  Breusch-Godfrey test for serial correlation of order up to 10
## 
## data:  Residuals from Linear regression model
## LM test = 86.817, df = 10, p-value = 2.286e-14
#stl(modelseries)

forecast::tsoutliers(modelseries)
## $index
## integer(0)
## 
## $replacements
## numeric(0)
#forecast::stlm(modelseries)

#forecast::ggseasonplot(modelseries)

#forecast::stlf(modelseries)
#stlRes <- stl(ts(na.omit(Data[,2])), s.window = "periodic")

#modelseries$Actual

#decomposedRes <- decompose(na.omit(Data[,2])), type="additive") # use type = "additive/mult" for additive components
#plot (decomposedRes)

adf.test(modelseries)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  modelseries
## Dickey-Fuller = -1.9833, Lag order = 4, p-value = 0.5835
## alternative hypothesis: stationary
tseries::adf.test(diff(log(modelseries)), alternative="stationary", k=0)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(log(modelseries))
## Dickey-Fuller = -12.878, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary

Exponetial Smoothing Modelling

# Automated forecasting using an exponential model
fit_ets <- ets(modelseries)
fit_ets
## ETS(A,N,N) 
## 
## Call:
##  ets(y = modelseries) 
## 
##   Smoothing parameters:
##     alpha = 0.8393 
## 
##   Initial states:
##     l = 26.0702 
## 
##   sigma:  3.0461
## 
##      AIC     AICc      BIC 
## 831.8146 832.0251 840.1267
fc=forecast(fit_ets,h=12)
fc%>%data.frame()%>%head(4)
##     Point.Forecast    Lo.80    Hi.80    Lo.95    Hi.95
## 119       41.65585 37.75208 45.55963 35.68555 47.62616
## 120       41.65585 36.55939 46.75232 33.86148 49.45023
## 121       41.65585 35.59710 47.71461 32.38979 50.92192
## 122       41.65585 34.76796 48.54375 31.12173 52.18998
#Model <- auto.ssarima(modelseries,lags=c(1,12),
#                         h=12,holdout=TRUE,intervals="np")
    
#Model=auto.ces(modelseries,h=12,holdout=FALSE)


#Model%>%summary()

#forecast(Model)%>%tk_tbl()

Linear Regression Modelling

# Add time series signature
train_augmented <- Data[1:index,] %>%
    tk_augment_timeseries_signature()

train_augmented["Actual"]<-as.numeric(train)

train_augmented%>%head(4)
##         Date   Actual  index.num   diff year half quarter month month.xts
## 1 2016-01-04 26.04576 1451865600     NA 2016    1       1     1         0
## 2 2016-01-11 26.00000 1452470400 604800 2016    1       1     1         0
## 3 2016-01-18 27.00000 1453075200 604800 2016    1       1     1         0
## 4 2016-01-25 28.00000 1453680000 604800 2016    1       1     1         0
##   month.lbl day hour minute second hour12 am.pm wday wday.xts wday.lbl
## 1   January   4    0      0      0      0     1    2        1   Monday
## 2   January  11    0      0      0      0     1    2        1   Monday
## 3   January  18    0      0      0      0     1    2        1   Monday
## 4   January  25    0      0      0      0     1    2        1   Monday
##   mday qday yday mweek week week.iso week2 week3 week4 mday7
## 1    4    4    4     2    1        1     1     1     1     1
## 2   11   11   11     3    2        2     0     2     2     2
## 3   18   18   18     4    3        3     1     0     3     3
## 4   25   25   25     5    4        4     0     1     0     4
# Model using the augmented features
fit_lm<-lm(Actual~ year+half+month+quarter+day+mweek+week, data = train_augmented)

broom::glance(fit_lm)
##   r.squared adj.r.squared    sigma statistic      p.value df    logLik
## 1 0.5012841     0.4656616 6.527044   14.0721 1.616699e-12  8 -345.0996
##        AIC      BIC deviance df.residual
## 1 708.1993 732.1702 4175.026          98
broom::tidy(fit_lm)
##          term      estimate    std.error  statistic      p.value
## 1 (Intercept) -1.371847e+04 3600.7292615 -3.8099141 0.0002426712
## 2        year  6.821454e+00    1.7748385  3.8434224 0.0002156030
## 3        half -4.537905e+00    2.9285478 -1.5495411 0.1244751747
## 4       month -1.609079e+01   32.7436512 -0.4914171 0.6242302509
## 5     quarter  7.148341e+00    2.7007311  2.6468168 0.0094680925
## 6         day -6.038622e-01    1.0812576 -0.5584814 0.5777895211
## 7       mweek  5.003200e-01    0.7198003  0.6950817 0.4886489145
## 8        week  3.631563e+00    7.5328878  0.4820944 0.6308138722
broom::augment_columns(fit_lm,data = train_augmented)%>%head()
##         Date   Actual  index.num   diff year half quarter month month.xts
## 1 2016-01-04 26.04576 1451865600     NA 2016    1       1     1         0
## 2 2016-01-11 26.00000 1452470400 604800 2016    1       1     1         0
## 3 2016-01-18 27.00000 1453075200 604800 2016    1       1     1         0
## 4 2016-01-25 28.00000 1453680000 604800 2016    1       1     1         0
## 5 2016-02-01 22.00000 1454284800 604800 2016    1       1     2         1
## 6 2016-02-08 27.00000 1454889600 604800 2016    1       1     2         1
##   month.lbl day hour minute second hour12 am.pm wday wday.xts wday.lbl
## 1   January   4    0      0      0      0     1    2        1   Monday
## 2   January  11    0      0      0      0     1    2        1   Monday
## 3   January  18    0      0      0      0     1    2        1   Monday
## 4   January  25    0      0      0      0     1    2        1   Monday
## 5  February   1    0      0      0      0     1    2        1   Monday
## 6  February   8    0      0      0      0     1    2        1   Monday
##   mday qday yday mweek week week.iso week2 week3 week4 mday7  .fitted
## 1    4    4    4     2    1        1     1     1     1     1 22.31875
## 2   11   11   11     3    2        2     0     2     2     2 22.22360
## 3   18   18   18     4    3        3     1     0     3     3 22.12845
## 4   25   25   25     5    4        4     0     1     0     4 22.03330
## 5    1   32   32     6    5        5     1     2     1     1 24.56708
## 6    8   39   39     2    6        6     0     0     2     2 21.97033
##    .se.fit    .resid       .hat   .sigma     .cooksd .std.resid
## 1 1.730559  3.727008 0.07029746 6.548853 0.003314738  0.5922046
## 2 1.537008  3.776400 0.05545222 6.548729 0.002600781  0.5953185
## 3 1.517496  4.871552 0.05405330 6.540861 0.004206303  0.7673924
## 4 1.678107  5.966705 0.06610072 6.530582 0.007916845  0.9459496
## 5 3.389952 -2.567080 0.26974541 6.553509 0.009780500 -0.4602414
## 6 1.638020  5.029673 0.06298041 6.539356 0.005324325  0.7960656
fitdata=cbind.data.frame(Date=Data$Date[1:index]  ,broom::augment(fit_lm))


library(ggfortify)


ggplot2::autoplot(fit_lm, which = 1:6, nrow=2, label.size = F,na.rm=TRUE)
Fig. 30

Fig. 30

broom::tidy(fit_lm) %>%
  gather(x, y, estimate:p.value) %>%
  ggplot(aes(x = term, y = y, color = x, fill = x)) +
    facet_wrap(~ x, scales = "free", ncol = 2) +
    geom_bar(stat = "identity", alpha = 0.8) +
    scale_color_manual(values = palette_light()) +
    scale_fill_manual(values = palette_light()) +
    theme_tq() +
    theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
Fig. 30

Fig. 30

checkresiduals(fit_lm)
Fig. 30

Fig. 30

## 
##  Breusch-Godfrey test for serial correlation of order up to 11
## 
## data:  Residuals
## LM test = 80.171, df = 11, p-value = 1.368e-12

Prediction Interval : level (1 − α)% n-2 df \(yˆ(x∗) ± t_{1−α/2,n−2}SE_{yˆ(x∗)}\)

\(prediction ± t_{1−α/2,n−2}\sqrt{(prediction-Actual)^{2}}\)

#  confidence Interval for Predictions

# Calculate standard deviation of residuals
#test_resid_sd <- sd(test_residuals)



# Add time series signature
test_augmented <- Data[(1+index):dim(Data)[1],] %>%
    tk_augment_timeseries_signature()

predlm<- predict(fit_lm,test_augmented,interval="prediction",se.fit=T)%>%data.frame()


dfuture<-cbind.data.frame(test_augmented,predlm)%>%rename(Predicted=fit.fit)

dfuture3<-
dfuture%>%
     #add_column(Actual=as.numeric(test)) %>%
    mutate(residual = Actual- Predicted)%>%
   mutate(
        lo.95 = Predicted - 1.96 * residual,
        lo_95 = Predicted + qt(.025,n-2)* residual,
        hi_95 = Predicted + qt(.975,n-2)* residual,
        hi.95 = Predicted + 1.96 *residual
        )


Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
  
   geom_line(aes(y= .fitted,x=as.Date(Date),color="fitted"),size=1.5,data=fitdata)+
  
  labs(y="Number of Vehicles",x="",title="Linear Regression Model")+
   theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
    (scale_x_date(breaks=date_breaks("2 month"),
      labels=date_format("%b %d  %y")))+
  scale_color_manual(values=mycolors)+
 #scale_fill_manual(values = palette_light()[1:2])
#scale_fill_manual(values=viridis_pal(option = "D")(2))
geom_vline(xintercept=as.Date("2018-01-12"), linetype=2)+

 # geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95), fill = "grey", alpha=0.5,data=dfuture)+
  
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Predicted,color="12 Week Forecast"), data =dfuture, size=1.5, linetype=1)+
  
    
      theme(
    legend.position="top",
    legend.direction="horizontal",
    #legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
    legend.title = element_blank(),
    text=element_text(size=8,  family="Comic Sans MS"),
    #axis.ticks.x = element_blank(),
    #axis.text.x = element_blank(), #remove x-qxis text
    #axis.title.x = element_blank(),
    legend.text = element_text(size = 7),# legend title size
    #legend.text = element_blank(),# remove legend text
    plot.title = element_text(hjust = 0.5), #position legend in the middle
    axis.text.x = element_text(angle = 30, hjust = 1)
)+
 
  geom_ribbon(aes(ymin=fit.lwr, ymax=fit.upr), fill = "grey", alpha=0.5,data=dfuture3)+ 
 # geom_rect(xmin = as.numeric(ymd("2017-09-25")),
#              xmax = as.numeric(ymd("2018-01-15")),
#              ymin = 20, ymax = 65,
#              alpha = 0.01 ,fill =  "white"  ) +
  
  annotate("text", x = ymd("2016-03-17"), y = 90,
             label = "Train Region",color="red",size=2.5,angle=0) +
   annotate("text", x = ymd("2018-03-01"), y = 90,
              label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30

Fig. 30

ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A4.png")

LOESS Regression Modelling

fit_lOESS<-gam(Actual~lo(quarter)+lo(month)+lo(day)+lo(mweek), data = train_augmented)

predicted<-predict(fit_lOESS,test_augmented)

dfuture<-cbind.data.frame(test_augmented,Predicted=predict(fit_lm,test_augmented))

fitdata=cbind.data.frame(Date=Data$Date[1:index]  ,broom::augment(fit_lOESS))

Generalized Additive Model

fit_gam<-gam::gam(Actual~ s(Date)+s(quarter)+s(month)+s(day)+s(day)+s(mweek)+s(week)
, data =train_augmented )

fitdata=cbind.data.frame(Date=Data$Date[1:index]  ,broom::augment(fit_gam))

predgam<- predict(fit_lm,test_augmented,interval="prediction",se.fit=T,quantiles = c(.025, .975))%>%data.frame()


dfuture<-cbind.data.frame(test_augmented,predgam)%>%rename(Predicted=fit.fit)

dfuture3<-
dfuture%>%
     #add_column(Actual=as.numeric(test)) %>%
    mutate(residual = Actual - Predicted)%>%
   mutate(
        lo.95 = Predicted - 1.96 * residual,
        lo.80 = Predicted - 1.28 * residual,
        hi.80 = Predicted + 1.28 * residual,
        hi.95 = Predicted + 1.96 *residual
        )
Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
  
   geom_line(aes(y= .fitted,x=as.Date(Date),color="fitted"),size=1.5,data=fitdata)+
  
  labs(y="Number of Vehicles",x="",title="Generalized Additive Model")+
   theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
    (scale_x_date(breaks=date_breaks("2 month"),
      labels=date_format("%b %d  %y")))+
  scale_color_manual(values=mycolors)+
 #scale_fill_manual(values = palette_light()[1:2])
#scale_fill_manual(values=viridis_pal(option = "D")(2))
geom_vline(xintercept=as.Date("2018-01-08"), linetype=2)+

 # geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95), fill = "grey", alpha=0.5,data=dfuture)+
  
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Predicted,color="12 Week Forecast"), data =dfuture, size=1.5, linetype=1)+
 
  geom_ribbon(aes(ymin=fit.lwr, ymax=fit.upr  ), fill = "grey", alpha=0.5,data=dfuture3)+  
    
      theme(
    legend.position="top",
    legend.direction="horizontal",
    #legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
    legend.title = element_blank(),
    text=element_text(size=8,  family="Comic Sans MS"),
    #axis.ticks.x = element_blank(),
    #axis.text.x = element_blank(), #remove x-qxis text
    #axis.title.x = element_blank(),
    legend.text = element_text(size = 7),# legend title size
    #legend.text = element_blank(),# remove legend text
    plot.title = element_text(hjust = 0.5), #position legend in the middle
    axis.text.x = element_text(angle = 30, hjust = 1)
)+
  
 # geom_rect(xmin = as.numeric(ymd("2017-09-25")),
#              xmax = as.numeric(ymd("2018-01-15")),
#              ymin = 20, ymax = 65,
#              alpha = 0.01 ,fill =  "white"  ) +
  
  annotate("text", x = ymd("2016-03-17"), y = 90,
             label = "Train Region",color="red",size=2.5,angle=0) +
   annotate("text", x = ymd("2018-02-28"), y = 90,
              label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30

Fig. 30

ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A5.png")

Bayesian Structural Model

# the AddNamedHolidays requires a zoo object input
train<-Data[1:index,]%>%tk_zoo()%>%na.kalman()


# fitting weekly data. number of seasons is 52 
train_augmented["Actual"]<-as.numeric(train)

ss<-AddSemilocalLinearTrend(list(),  train)

ss<-AddSeasonal(ss,  train, nseasons = 52)

#Add a random-walk holiday model to a state specification.

ss <- AddNamedHolidays(ss, named.holidays = NamedHolidays(), train)

timestamps=as.Date(Data[1:index,1])

modelbsts<-bsts( Actual~ year+half+month+quarter+day+mweek+week, state.specification = ss, niter = 1000,timestamps = timestamps,data = train_augmented)
## =-=-=-=-= Iteration 0 Wed Apr 11 18:45:49 2018 =-=-=-=-=
## =-=-=-=-= Iteration 100 Wed Apr 11 18:45:51 2018 =-=-=-=-=
## =-=-=-=-= Iteration 200 Wed Apr 11 18:45:53 2018 =-=-=-=-=
## =-=-=-=-= Iteration 300 Wed Apr 11 18:45:54 2018 =-=-=-=-=
## =-=-=-=-= Iteration 400 Wed Apr 11 18:45:56 2018 =-=-=-=-=
## =-=-=-=-= Iteration 500 Wed Apr 11 18:45:58 2018 =-=-=-=-=
## =-=-=-=-= Iteration 600 Wed Apr 11 18:46:00 2018 =-=-=-=-=
## =-=-=-=-= Iteration 700 Wed Apr 11 18:46:01 2018 =-=-=-=-=
## =-=-=-=-= Iteration 800 Wed Apr 11 18:46:03 2018 =-=-=-=-=
## =-=-=-=-= Iteration 900 Wed Apr 11 18:46:05 2018 =-=-=-=-=
burn<-SuggestBurn(0.1, modelbsts)
Xreg2<-test_augmented[,c("mweek","week","day","month","quarter","half","year")]
# horizon specifies the number of points to predict into the future
pred<-predict(modelbsts, horizon = n, burn = burn, quantiles = c(.025, .975),newdata =Xreg2)


dfuture=data.frame(idx_future,pred$mean,t(pred$interval),pred$median)
names(dfuture) <- c("Date", "Mean" , "lower95", "upper95","median")



fitdata=cbind.data.frame(Actual=pred$original.series, fitted=-colMeans(modelbsts$one.step.prediction.errors[-(1:burn),])+as.numeric(train),Date=index(train))

#fitdata=cbind.data.frame(Date=index(train),fitted=pred$mean)
Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
  
    geom_line(aes(y= fitted,x=as.Date(Date),color="fitted"),size=1.5,data=fitdata)+
  
  labs(y="Number of Vehicles",x="",title="Bayesian Structural Time Series Model")+
   theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
    (scale_x_date(breaks=date_breaks("2 month"),
      labels=date_format("%b %d  %y")))+
  scale_color_manual(values=mycolors)+
 #scale_fill_manual(values = palette_light()[1:2])
#scale_fill_manual(values=viridis_pal(option = "D")(2))
geom_vline(xintercept=as.Date("2018-01-08"), linetype=2)+


  
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Mean,color="12 Week Forecast"), data =dfuture, size=1.5, linetype=1)+
  
   geom_ribbon(aes(ymin=lower95, ymax=upper95), fill = "grey", alpha=0.5,data=dfuture)+
  
    
      theme(
    legend.position="top",
    legend.direction="horizontal",
    #legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
    legend.title = element_blank(),
    text=element_text(size=8,  family="Comic Sans MS"),
    #axis.ticks.x = element_blank(),
    #axis.text.x = element_blank(), #remove x-qxis text
    #axis.title.x = element_blank(),
    legend.text = element_text(size = 7),# legend title size
    #legend.text = element_blank(),# remove legend text
    plot.title = element_text(hjust = 0.5), #position legend in the middle
    axis.text.x = element_text(angle = 30, hjust = 1)
)+
  
 # geom_rect(xmin = as.numeric(ymd("2017-09-25")),
#              xmax = as.numeric(ymd("2018-01-15")),
#              ymin = 20, ymax = 65,
#              alpha = 0.01 ,fill =  "white"  ) +
  
  annotate("text", x = ymd("2016-03-17"), y = 90,
             label = "Train Region",color="red",size=2.5,angle=0) +
   annotate("text", x = ymd("2018-02-28"), y = 90,
              label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30

Fig. 30

ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/A6.png")

Deep Neural Network Model

#Single Layer Neural Network

train<-Data[1:index,]%>%tk_ts()%>%na.kalman()


Xreg1<-train_augmented[,c("mweek","week","day","month","quarter","half","year")]

fit_singlelayer <- nnetar(train, lambda=0.5 )


#simulation of 9 possible future sample paths for the train data. Each sample path covers the next 30 years after the observed data.


sim <- ts(matrix(0, nrow=1000L, ncol=12L),
  start=end(train)[1L]+1L)
for(i in seq(n))
  sim[,i] <- simulate(fit_singlelayer, nsim=1000L)


library(ggplot2)
autoplot(train) + forecast::autolayer(sim)+

 theme(legend.position="none")
Fig. 30

Fig. 30

fit <- nnetar(train, lambda=0)
autoplot(forecast(fit,h=n))
Fig. 30

Fig. 30

cbind.data.frame(Data[(index+1):dim(Data)[1],],nnet=apply(sim, 2,mean))
##           Date Actual     nnet
## 107 2018-01-15     55 52.42865
## 108 2018-01-22     53 43.24573
## 109 2018-01-29     51 49.74955
## 110 2018-02-05     49 46.23346
## 111 2018-02-12     47 50.90919
## 112 2018-02-19     47 47.44900
## 113 2018-02-26     44 52.03859
## 114 2018-03-05     42 51.99048
## 115 2018-03-12     43 51.36121
## 116 2018-03-19     46 49.68019
## 117 2018-03-26     45 50.84598
## 118 2018-04-02     41 50.29773
apply(sim, 1, mean)[1:5]
## [1] 53.67398 53.06146 51.05385 51.91090 49.79054

Bootstrapping and bagging


rwf(y=as.numeric(train), h=10, drift=TRUE)
##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 107       55.27575 51.18995 59.36156 49.02706 61.52445
## 108       55.55151 49.74586 61.35715 46.67254 64.43048
## 109       55.82726 48.68337 62.97116 44.90162 66.75291
## 110       56.10302 47.81550 64.39054 43.42835 68.77768
## 111       56.37877 47.07025 65.68730 42.14261 70.61493
## 112       56.65453 46.41088 66.89817 40.98822 72.32083
## 113       56.93028 45.81570 68.04486 39.93200 73.92857
## 114       57.20604 45.27065 69.14143 38.95243 75.45964
## 115       57.48179 44.76601 70.19757 38.03468 76.92890
## 116       57.75755 44.29476 71.22033 37.16799 78.34710

Gradient Boosting Machines


library(caret)
library(corrplot)           # plot correlations
library(doParallel)     # parallel processing
library(dplyr)        # Used by caret
library(gbm)                  # GBM Models
library(pROC)                 # plot the ROC curve
library(xgboost)

ctrl <- trainControl(method = "repeatedcv",   # 10fold cross validation
                     number = 5,                            # do 5 repititions of c
                     allowParallel = TRUE)


grid <- expand.grid(interaction.depth=c(1,2), # Depth of variable interactions
                    n.trees=c(10,20),           # Num trees to fit
                    shrinkage=c(0.01,0.1),      # Try 2 values for learning rate 
                    n.minobsinnode = 20)

gbm.tune=train(x=Xreg,y=as.numeric(train),
                              method = "gbm",
                              
                              trControl = ctrl,
                              tuneGrid=grid,
                              verbose=FALSE)



summary(gbm.tune)
Fig. 30

Fig. 30

##             var   rel.inf
## week       week 60.524389
## year       year 35.565282
## day         day  2.576255
## half       half  1.334075
## mweek     mweek  0.000000
## month     month  0.000000
## quarter quarter  0.000000
predgbm<- predict(gbm.tune,test_augmented)%>%data.frame()

names(predgbm)<-"predgbm"


fitdata<-predict(gbm.tune,train_augmented)%>%data.frame()

names(fitdata)<-"fit"

fitdata<-cbind.data.frame(fitdata,Data[1:index,])





dfuture<-cbind.data.frame(Data[(index+1):dim(Data)[1],],predgbm)%>%
  rename(Predicted=predgbm)

dfuture3<-
dfuture%>%
     #add_column(Actual=as.numeric(test)) %>%
    mutate(residual= Actual- Predicted)%>%
   mutate(
        lo.95 = Predicted - 1.96 * residual,
        lo_95 = Predicted + qt(.025,n-2)* residual,
        hi_95 = Predicted + qt(.975,n-2)* residual,
        hi.95 = Predicted + 1.96 *residual
        )
Data%>%ggplot(aes(x=as.Date(Date)))+geom_line(aes(y=Actual,color="Actual Data"), size=1.5, linetype=1)+
  
   geom_line(aes(y= fit,x=as.Date(Date),color="fitted"),size=1.5,data=fitdata)+
  
  labs(y="Number of Vehicles",x="",title="Linear Regression Model")+
   theme(axis.text.x=element_text(angle=50, size=10, vjust=0.5))+
    (scale_x_date(breaks=date_breaks("2 month"),
      labels=date_format("%b %d  %y")))+
  scale_color_manual(values=mycolors)+
 #scale_fill_manual(values = palette_light()[1:2])
#scale_fill_manual(values=viridis_pal(option = "D")(2))
geom_vline(xintercept=as.Date("2018-01-12"), linetype=2)+

 # geom_ribbon(aes(ymin=Lo.95, ymax=Hi.95), fill = "grey", alpha=0.5,data=dfuture)+
  
#geom_line(aes(x =as.Date(Date), y = Actual,color="Actual after #09-25-2017"),data = Data2, size=1.5)+
geom_line(aes(x =as.Date(Date), y = Predicted,color="12 Week Forecast"), data =dfuture3, size=1.5, linetype=1)+
  
    
      theme(
    legend.position="top",
    legend.direction="horizontal",
    #legend.title = element_text("Above or below IQR"), # remove element_blank() x axis ticks and labels
    legend.title = element_blank(),
    text=element_text(size=8,  family="Comic Sans MS"),
    #axis.ticks.x = element_blank(),
    #axis.text.x = element_blank(), #remove x-qxis text
    #axis.title.x = element_blank(),
    legend.text = element_text(size = 7),# legend title size
    #legend.text = element_blank(),# remove legend text
    plot.title = element_text(hjust = 0.5), #position legend in the middle
    axis.text.x = element_text(angle = 30, hjust = 1)
)+
 
  geom_ribbon(aes(ymin=lo.95, ymax=  hi.95), fill = "grey", alpha=0.5,data=dfuture3)+ 
 # geom_rect(xmin = as.numeric(ymd("2017-09-25")),
#              xmax = as.numeric(ymd("2018-01-15")),
#              ymin = 20, ymax = 65,
#              alpha = 0.01 ,fill =  "white"  ) +
  
  annotate("text", x = ymd("2016-03-17"), y = 90,
             label = "Train Region",color="red",size=2.5,angle=0) +
   annotate("text", x = ymd("2018-03-01"), y = 90,
              label = "Test Region",color="red",size=2.5,angle=0)
Fig. 30

Fig. 30

ggsave("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/Agbm.png")

Non Linear Regression Models


h <- 12

ts_train<-tk_ts(train)
fit.lin <- tslm(ts_train ~ trend)
fcasts.lin <- forecast(fit.lin, h = h)
fit.exp <- tslm(ts_train ~ trend, lambda = 0)
fcasts.exp <- forecast(fit.exp, h = h)

t <- time(ts_train)
t.break1 <-2017-06-19
t.break2 <-2017-10-16
tb1 <- ts(pmax(0, t - t.break1), start = 2016-01-04)
tb2 <- ts(pmax(0, t - t.break2), start = 2016-01-04)

fit.pw <- tslm(ts_train ~ t + tb1 + tb2)
t.new <- t[length(t)] + seq(h)
tb1.new <- tb1[length(tb1)] + seq(h)
tb2.new <- tb2[length(tb2)] + seq(h)

newdata <- cbind(t=t.new, tb1=tb1.new, tb2=tb2.new) %>%
  as.data.frame()
fcasts.pw <- forecast(fit.pw, newdata = newdata)

fit.spline <- tslm(ts_train ~ t + I(t^2) + I(t^3) +
  I(tb1^3) + I(tb2^3))
fcasts.spl <- forecast(fit.spline, newdata = newdata)

autoplot(ts_train) +
  autolayer(fitted(fit.lin), series = "Linear") +
  autolayer(fitted(fit.exp), series = "Exponential") +
  autolayer(fitted(fit.pw), series = "Piecewise") +
  autolayer(fitted(fit.spline), series = "Cubic Spline") +
  autolayer(fcasts.pw, series="Piecewise") +
  autolayer(fcasts.lin, series="Linear", PI=FALSE) +
  autolayer(fcasts.exp, series="Exponential", PI=FALSE) +
  autolayer(fcasts.spl, series="Cubic Spline", PI=FALSE) +
  xlab("Time") + ylab("Number of Vehicles") +
  ggtitle("Vehicle Forecast") +
  guides(colour = guide_legend(title = " "))
Fig. 30

Fig. 30

ts_train %>%
  splinef(lambda=0) %>%
  autoplot()
Fig. 30

Fig. 30

ts_train %>%
  splinef() %>%
  autoplot()+geom_line()
Fig. 30

Fig. 30

f=ts_train %>%
  splinef(lambda=0) 


checkresiduals(as.vector(f$residuals))
Fig. 30

Fig. 30

ggtsdisplay(as.vector(f$residuals))
Fig. 30

Fig. 30

Dynamic harmonic regression

Harmonic regression allows the seasonal pattern to be modelled using Fourier terms with short-term time series dynamics handled by an ARMA error.

# plots <- list()
# for (i in seq(6)) {
#   fit <- auto.arima(train, xreg = fourier(train, K = i),
#     seasonal = FALSE, lambda = 0)
#   plots[[i]] <- autoplot(forecast(fit,xreg=fourier(train, K=i, h=24))) +
#     xlab(paste("K=",i,"   AICC=",round(fit$aicc,2))) +
#     ylab("") + ylim(1.5,4.7)
# }
# 
# gridExtra::grid.arrange(plots[[1]],plots[[2]],plots[[3]],
#                         plots[[4]],plots[[5]],plots[[6]], nrow=3)
# n=12
# 
# 
# NewData<- rio::import("/Users/nanaakwasiabayieboateng/Documents/memphisclassesbooks/Time series/Manpower/actvsfcdatajuly311.xlsx",sheet="Sheet5")
# 
# NewData[,"Date"]<-as.Date(NewData[,"Date"])
# 
# NewData%>%head()
# 
# 
# train_aug<-tk_augment_timeseries_signature(NewData)
# 
# 
# Xreg4<-train_aug[,c("mweek","week","day","month","quarter","half","year")]
# 
# Xreg4
# 
# #### make prediction dates
# 
# Dates_future<-NewData%>%tk_zoo() %>% tk_index()%>%
#     tk_make_future_timeseries(n_future = n)%>%data.frame()
# 
# Dates_future
# 
# test_aug<-tk_augment_timeseries_signature(Dates_future)
# 
# test_aug
# 
# Xreg5<-test_aug[,c("mweek")]
# 
# Xreg5
# 
# #The deterministic trend model is obtained as follows:
# 
# 
# Train<-NewData%>%tk_ts()%>%na.kalman()
# 
# fit_arima6 <- auto.arima(y=Train,d=0 )
# fit_arima6
# Add time series signature
#train_augmented <- NewData[,"Date"] %>%
#    tk_augment_timeseries_signature()


#idx_future<-train%>%tk_zoo() %>% tk_index()%>%
#    tk_make_future_timeseries(n_future = n)



# # Add time series signature
# future_aug <- Dates_future %>%
#     tk_augment_timeseries_signature()
# 
# 
# 
# Train<-NewData%>%tk_ts()%>%na.kalman()
# 
# #The deterministic trend model is obtained as follows:
# fit_arima6 <- auto.arima(y=Train,xreg =Xreg4,d=0 )
# fit_arima6
# 
# 
# # Add time series signature
# train_aug <- NewData[,"Date"]%>%tk_ts()%>%
#     tk_augment_timeseries_signature()
# 
# idx=Train%>%tk_index()%>%tk_get_timeseries_signature() 
# idx
# 
# #### make prediction dates
# 
# Dates_future<-NewData%>%tk_zoo() %>% tk_index()%>%
#     tk_make_future_timeseries(n_future = n)
# 
# 
# 
# 
# fcast6 <- forecast(fit_arima6, h = n,xreg=future_aug)%>%data.frame()
# fcast6
# 
# 
# #seq.POSIXt(start=as.Date("2017-10-30"))
#spring March 20
#summer june 21
#fall september 23
# december 21
stopImplicitCluster()