full_data <- readxl::read_excel('Data Set for Class.xls')
head(full_data)
## # A tibble: 6 x 7
##   SeriesInd group Var01     Var02 Var03 Var05 Var07
##       <dbl> <chr> <dbl>     <dbl> <dbl> <dbl> <dbl>
## 1     40669 S03    30.6 123432400  30.3  30.5  30.6
## 2     40669 S02    10.3  60855800  10.0  10.2  10.3
## 3     40669 S01    26.6  10369300  25.9  26.2  26.0
## 4     40669 S06    27.5  39335700  26.8  27.0  27.3
## 5     40669 S05    69.3  27809100  68.2  68.7  69.2
## 6     40669 S04    17.2  16587400  16.9  16.9  17.1
# For simplicity lets convert SeriesInd to date. by setting Origin of date to 1900 Jan 1st.
print(paste("Start Of Data:",as.Date(min(full_data$SeriesInd) , origin = "1900-01-01")))
## [1] "Start Of Data: 2011-05-08"
print(paste("End Of Data:",as.Date(max(full_data$SeriesInd) , origin = "1900-01-01")))
## [1] "End Of Data: 2018-05-03"
print(paste("Forecast from :",as.Date(max(full_data$SeriesInd)+1 , origin = "1900-01-01")," TO ",as.Date(max(full_data$SeriesInd)+140 , origin = "1900-01-01")))
## [1] "Forecast from : 2018-05-04  TO  2018-09-20"
full_data$SeriesInd <- as.Date(full_data$SeriesInd, origin = "1900-01-01")
head(full_data)
## # A tibble: 6 x 7
##   SeriesInd  group Var01     Var02 Var03 Var05 Var07
##   <date>     <chr> <dbl>     <dbl> <dbl> <dbl> <dbl>
## 1 2011-05-08 S03    30.6 123432400  30.3  30.5  30.6
## 2 2011-05-08 S02    10.3  60855800  10.0  10.2  10.3
## 3 2011-05-08 S01    26.6  10369300  25.9  26.2  26.0
## 4 2011-05-08 S06    27.5  39335700  26.8  27.0  27.3
## 5 2011-05-08 S05    69.3  27809100  68.2  68.7  69.2
## 6 2011-05-08 S04    17.2  16587400  16.9  16.9  17.1
# Get the full date range for the data 
allDates <- seq.Date(
  min(full_data$SeriesInd),
  max(full_data$SeriesInd),
  "day")
# creating NA for missing data entry in full data.
full_data_na <- left_join(as.data.frame(allDates),full_data,by=c("allDates"= "SeriesInd"))
# subset(full_data[,c(1,2,3)], group == "S01")%>% .[,c(1,3)]  %>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
# 
# 
# subset(full_data[,c(1,2,3)], group == "S02")%>% .[,c(1,3)]  %>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
# 
# full_data_na %>% filter(.,group=="S01") %>% .[,c(1,3)] %>% plot()
# 
#   
# #+geom_col(position="dodge", alpha=0.5) 
# 
# 
# dt_s06_v1 %>% ggplot(aes(x=allDates, y= Var01))+  geom_line()
# 
# 
# 
# full_data_na[,c(1,3)]plot()
# Creating a subset of date for each Group and Varible. 
dt_s01_v1 = subset(full_data[,c(1,2,3)], group == "S01")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s01_v2 = subset(full_data[,c(1,2,4)], group == "S01")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s01_v3 = subset(full_data[,c(1,2,5)], group == "S01")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s01_v5 = subset(full_data[,c(1,2,6)], group == "S01")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s01_v7 = subset(full_data[,c(1,2,7)], group == "S01")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s02_v1 = subset(full_data[,c(1,2,3)], group == "S02")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s02_v2= subset(full_data[,c(1,2,4)], group == "S02")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s02_v3= subset(full_data[,c(1,2,5)], group == "S02")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s02_v5= subset(full_data[,c(1,2,6)], group == "S02")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s02_v7= subset(full_data[,c(1,2,7)], group == "S02")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s03_v1 = subset(full_data[,c(1,2,3)], group == "S03")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s03_v2 = subset(full_data[,c(1,2,4)], group == "S03")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s03_v3 = subset(full_data[,c(1,2,5)], group == "S03")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s03_v5 = subset(full_data[,c(1,2,6)], group == "S03")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s03_v7 = subset(full_data[,c(1,2,7)], group == "S03")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s04_v1 = subset(full_data[,c(1,2,3)], group == "S04")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s04_v2= subset(full_data[,c(1,2,4)], group == "S04")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s04_v3= subset(full_data[,c(1,2,5)], group == "S04")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s04_v5= subset(full_data[,c(1,2,6)], group == "S04")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s04_v7= subset(full_data[,c(1,2,7)], group == "S04")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s05_v1 = subset(full_data[,c(1,2,3)], group == "S04")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s05_v2 = subset(full_data[,c(1,2,4)], group == "S05")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s05_v3 = subset(full_data[,c(1,2,5)], group == "S05")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s05_v5 = subset(full_data[,c(1,2,6)], group == "S05")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s05_v7 = subset(full_data[,c(1,2,7)], group == "S05")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s06_v1 = subset(full_data[,c(1,2,3)], group == "S06")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s06_v2 = subset(full_data[,c(1,2,4)], group == "S06")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s06_v3 = subset(full_data[,c(1,2,5)], group == "S06")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s06_v5 = subset(full_data[,c(1,2,6)], group == "S06")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s06_v7 = subset(full_data[,c(1,2,7)], group == "S06")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))

S04 - Forecast Var01, Var02

We usually see consecutive sequences of five SeriesInd entries followed by a two-integer (-days, based on our assumption) break, which might be indicative of weekdays. There are some additional breaks in the data beyond the five days on, two days off pattern.

Note that if it were confirmed that the SeriesInd values represented weekdays, it might be prudent to remove the weekend days from the sequence for forecasting purposes.

Exploratory Data Analysis

summary(dt_s04_v1)
##     allDates              Var01      
##  Min.   :2011-05-08   Min.   :11.80  
##  1st Qu.:2013-02-04   1st Qu.:15.99  
##  Median :2014-11-04   Median :23.34  
##  Mean   :2014-11-04   Mean   :26.46  
##  3rd Qu.:2016-08-03   3rd Qu.:36.42  
##  Max.   :2018-05-03   Max.   :52.62  
##                       NA's   :933
summary(dt_s04_v2)
##     allDates              Var02          
##  Min.   :2011-05-08   Min.   :  3468900  
##  1st Qu.:2013-02-04   1st Qu.: 12918725  
##  Median :2014-11-04   Median : 17000800  
##  Mean   :2014-11-04   Mean   : 20757818  
##  3rd Qu.:2016-08-03   3rd Qu.: 24015700  
##  Max.   :2018-05-03   Max.   :233872100  
##                       NA's   :931

We see NA’s as expected. The maximum value of Var02 looks like it could be a substantial outlier.

Let’s start with S04 Var01.

plot(dt_s04_v1,main="S04 Var01")

There might be a seasonal drop near the end of each year, but it’s not definitive. Business cycles could be present.

plot(dt_s04_v2,main="S04 Var02")

There is no obvious trend or seasonality, but there are some outlier values - one clear outlier value is apparent in late 2015. It appears relatively stationary.

Because we haven’t confirmed the weekend hypothesis, we will go ahead and fill the missing SeriesInd values for both Var01 and Var02 with “N/A” entries for now. We won’t remove the presumed SeriesInd weekend values from the dataset.

Let’s review some different methods of handling NA values in the data.

# View of some differnet imputation methods and their values.  
cbind("Value"=dt_s01_v1$Var01,
      "na.interp"=na.interp(dt_s01_v1$Var01),
      "na.approx"=na_seadec(dt_s01_v1$Var01),
      "na.kalman"=na.kalman(dt_s01_v1$Var01),
      "na.interpolation"=na_interpolation(dt_s01_v1$Var01)
      
      ) %>% 
  head(.,20)
## Warning in na_seadec(dt_s01_v1$Var01): No seasonality information for dataset could be found, going on without decomposition.
##               Setting find_frequency=TRUE might be an option.
## Warning: na.kalman will replaced by na_kalman.
##     Functionality stays the same.
##     The new function name better fits modern R code style guidelines.
##     Please adjust your code accordingly.
## Time Series:
## Start = 1 
## End = 20 
## Frequency = 1 
##    Value na.interp na.approx na.kalman na.interpolation
##  1 26.61  26.61000  26.61000  26.61000         26.61000
##  2 26.30  26.30000  26.30000  26.30000         26.30000
##  3 26.03  26.03000  26.03000  26.03000         26.03000
##  4 25.84  25.84000  25.84000  25.84000         25.84000
##  5 26.34  26.34000  26.34000  26.34000         26.34000
##  6    NA  26.39000  26.39000  26.37014         26.39000
##  7    NA  26.44000  26.44000  26.41866         26.44000
##  8 26.49  26.49000  26.49000  26.49000         26.49000
##  9 26.03  26.03000  26.03000  26.03000         26.03000
## 10 25.16  25.16000  25.16000  25.16000         25.16000
## 11 25.00  25.00000  25.00000  25.00000         25.00000
## 12 24.77  24.77000  24.77000  24.77000         24.77000
## 13    NA  24.77250  24.77250  24.77825         24.77250
## 14    NA  24.77500  24.77500  24.77663         24.77500
## 15    NA  24.77750  24.77750  24.77502         24.77750
## 16 24.78  24.78000  24.78000  24.78000         24.78000
## 17 24.61  24.61000  24.61000  24.61000         24.61000
## 18 24.88  24.88000  24.88000  24.88000         24.88000
## 19 24.17  24.17000  24.17000  24.17000         24.17000
## 20    NA  24.05333  24.05333  24.07196         24.05333

We will use the approx function.

dt_s04_v1_xts <- xts(c(dt_s04_v1$Var01),  order.by=as.Date(dt_s04_v1$allDates))%>% na.approx()
dt_s04_v2_xts <- xts(c(dt_s04_v2$Var02),  order.by=as.Date(dt_s04_v2$allDates))%>% na.approx()
dt_s04_v1_stl <- c(dt_s04_v1$Var01,  order.by=as.Date(dt_s04_v1$allDates))%>% na.approx()
dt_s04_v2_stl <- c(dt_s04_v2$Var02,  order.by=as.Date(dt_s04_v2$allDates))%>% na.approx()

We appear to have two disparate datasets, so we will address them separately. Let’s find the right model to forecast S04 Var01.

ggtsdisplay(dt_s04_v1_xts,main="S04 Var01",xlab="Date",ylab="Value")

dt_s04_v1_xts %>% ur.kpss() %>% summary()
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 21.2264 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

We see clear autocorrelation between variables in the ACF. Let’s take the first-order difference to see if our data can be made stationary for modeling.

ggtsdisplay(diff(dt_s04_v1_xts),main="S04 Diff Var01",xlab="Date",ylab="Diff Value")
## Warning: Removed 1 rows containing missing values (geom_point).

dt_s04_v1_xts %>% diff() %>% ur.kpss() %>% summary()
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.1268 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

This test statistic looks better, so we conclude that the differences data are stationary. We verify with ndiffs.

ndiffs(dt_s04_v1_xts)
## [1] 1

The S04 Var01 plots seem to indicate that ARIMA would be a good fit for this model.

aa_fit_dt_s04_v1 <- auto.arima(coredata(dt_s04_v1_xts,stepwise=FALSE,approximation = FALSE))
summary(aa_fit_dt_s04_v1)
## Series: coredata(dt_s04_v1_xts, stepwise = FALSE, approximation = FALSE) 
## ARIMA(1,1,1) 
## 
## Coefficients:
##          ar1      ma1
##       0.5637  -0.5158
## s.e.  0.1730   0.1793
## 
## sigma^2 estimated as 0.1517:  log likelihood=-1118.44
## AIC=2242.88   AICc=2242.89   BIC=2260.17
## 
## Training set error measures:
##                       ME      RMSE       MAE       MPE      MAPE      MASE
## Training set 0.007547223 0.3892143 0.2209094 0.0210539 0.8355055 0.9902587
##                      ACF1
## Training set -0.004536873
coef(aa_fit_dt_s04_v1)
##        ar1        ma1 
##  0.5637108 -0.5157887

Auto arima did not give us drift.

checkresiduals(aa_fit_dt_s04_v1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1)
## Q* = 4.6289, df = 8, p-value = 0.7964
## 
## Model df: 2.   Total lags used: 10

The residuals are independent, which is what we want.

forecast(aa_fit_dt_s04_v1,h=140) %>% autoplot()

Fore_dt_s04_v1 <- forecast(aa_fit_dt_s04_v1,h=140)

autoplot(aa_fit_dt_s04_v1)

Fore_dt_s04_v1
##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2354       36.90138 36.40226 37.40050 36.13805 37.66471
## 2355       36.89652 36.17355 37.61949 35.79084 38.00221
## 2356       36.89378 35.99349 37.79408 35.51690 38.27067
## 2357       36.89224 35.84029 37.94419 35.28342 38.50106
## 2358       36.89137 35.70505 38.07769 35.07706 38.70569
## 2359       36.89088 35.58292 38.19884 34.89053 38.89123
## 2360       36.89060 35.47087 38.31034 34.71930 39.06190
## 2361       36.89045 35.36683 38.41406 34.56028 39.22062
## 2362       36.89036 35.26936 38.51136 34.41126 39.36946
## 2363       36.89031 35.17738 38.60324 34.27061 39.51001
## 2364       36.89028 35.09006 38.69050 34.13708 39.64348
## 2365       36.89027 35.00678 38.77375 34.00972 39.77081
## 2366       36.89026 34.92702 38.85350 33.88774 39.89277
## 2367       36.89025 34.85037 38.93014 33.77052 40.00999
## 2368       36.89025 34.77650 39.00400 33.65754 40.12296
## 2369       36.89025 34.70512 39.07538 33.54838 40.23212
## 2370       36.89025 34.63600 39.14450 33.44267 40.33782
## 2371       36.89025 34.56894 39.21156 33.34011 40.44038
## 2372       36.89025 34.50376 39.27673 33.24043 40.54007
## 2373       36.89025 34.44031 39.34018 33.14340 40.63710
## 2374       36.89025 34.37847 39.40202 33.04881 40.73168
## 2375       36.89025 34.31811 39.46238 32.95651 40.82399
## 2376       36.89025 34.25914 39.52135 32.86631 40.91418
## 2377       36.89025 34.20146 39.57903 32.77810 41.00239
## 2378       36.89025 34.14499 39.63550 32.69174 41.08875
## 2379       36.89025 34.08966 39.69083 32.60712 41.17337
## 2380       36.89025 34.03540 39.74509 32.52414 41.25635
## 2381       36.89025 33.98216 39.79833 32.44271 41.33778
## 2382       36.89025 33.92987 39.85062 32.36274 41.41775
## 2383       36.89025 33.87849 39.90200 32.28416 41.49633
## 2384       36.89025 33.82797 39.95252 32.20690 41.57359
## 2385       36.89025 33.77827 40.00222 32.13090 41.64960
## 2386       36.89025 33.72936 40.05113 32.05608 41.72441
## 2387       36.89025 33.68119 40.09931 31.98241 41.79808
## 2388       36.89025 33.63373 40.14676 31.90983 41.87066
## 2389       36.89025 33.58695 40.19354 31.83829 41.94220
## 2390       36.89025 33.54083 40.23967 31.76775 42.01274
## 2391       36.89025 33.49533 40.28516 31.69817 42.08232
## 2392       36.89025 33.45043 40.33006 31.62951 42.15098
## 2393       36.89025 33.40612 40.37438 31.56173 42.21876
## 2394       36.89025 33.36236 40.41814 31.49480 42.28569
## 2395       36.89025 33.31913 40.46136 31.42870 42.35179
## 2396       36.89025 33.27642 40.50407 31.36338 42.41711
## 2397       36.89025 33.23422 40.54628 31.29883 42.48166
## 2398       36.89025 33.19249 40.58800 31.23501 42.54548
## 2399       36.89025 33.15123 40.62926 31.17191 42.60858
## 2400       36.89025 33.11042 40.67007 31.10950 42.67099
## 2401       36.89025 33.07004 40.71045 31.04775 42.73274
## 2402       36.89025 33.03009 40.75040 30.98665 42.79384
## 2403       36.89025 32.99055 40.78994 30.92617 42.85432
## 2404       36.89025 32.95140 40.82909 30.86630 42.91419
## 2405       36.89025 32.91264 40.86785 30.80702 42.97347
## 2406       36.89025 32.87425 40.90624 30.74831 43.03218
## 2407       36.89025 32.83623 40.94426 30.69016 43.09033
## 2408       36.89025 32.79856 40.98193 30.63255 43.14794
## 2409       36.89025 32.76123 41.01926 30.57547 43.20503
## 2410       36.89025 32.72424 41.05625 30.51889 43.26160
## 2411       36.89025 32.68757 41.09292 30.46281 43.31768
## 2412       36.89025 32.65123 41.12927 30.40722 43.37327
## 2413       36.89025 32.61519 41.16531 30.35210 43.42839
## 2414       36.89025 32.57945 41.20105 30.29745 43.48304
## 2415       36.89025 32.54400 41.23649 30.24324 43.53725
## 2416       36.89025 32.50884 41.27165 30.18947 43.59102
## 2417       36.89025 32.47396 41.30653 30.13613 43.64436
## 2418       36.89025 32.43936 41.34113 30.08320 43.69729
## 2419       36.89025 32.40502 41.37547 30.03069 43.74980
## 2420       36.89025 32.37094 41.40955 29.97857 43.80192
## 2421       36.89025 32.33712 41.44337 29.92684 43.85365
## 2422       36.89025 32.30355 41.47694 29.87550 43.90499
## 2423       36.89025 32.27022 41.51027 29.82453 43.95596
## 2424       36.89025 32.23713 41.54336 29.77392 44.00657
## 2425       36.89025 32.20427 41.57622 29.72367 44.05682
## 2426       36.89025 32.17165 41.60885 29.67377 44.10672
## 2427       36.89025 32.13924 41.64125 29.62421 44.15628
## 2428       36.89025 32.10706 41.67343 29.57499 44.20550
## 2429       36.89025 32.07509 41.70540 29.52610 44.25439
## 2430       36.89025 32.04333 41.73716 29.47753 44.30296
## 2431       36.89025 32.01178 41.76871 29.42928 44.35122
## 2432       36.89025 31.98043 41.80006 29.38133 44.39916
## 2433       36.89025 31.94928 41.83121 29.33369 44.44680
## 2434       36.89025 31.91833 41.86216 29.28635 44.49414
## 2435       36.89025 31.88756 41.89293 29.23930 44.54119
## 2436       36.89025 31.85699 41.92350 29.19254 44.58795
## 2437       36.89025 31.82660 41.95389 29.14607 44.63443
## 2438       36.89025 31.79639 41.98410 29.09986 44.68063
## 2439       36.89025 31.76636 42.01413 29.05394 44.72656
## 2440       36.89025 31.73650 42.04399 29.00828 44.77222
## 2441       36.89025 31.70682 42.07367 28.96288 44.81761
## 2442       36.89025 31.67730 42.10319 28.91774 44.86275
## 2443       36.89025 31.64795 42.13254 28.87285 44.90764
## 2444       36.89025 31.61877 42.16172 28.82822 44.95227
## 2445       36.89025 31.58974 42.19075 28.78383 44.99666
## 2446       36.89025 31.56088 42.21961 28.73968 45.04081
## 2447       36.89025 31.53217 42.24833 28.69577 45.08472
## 2448       36.89025 31.50361 42.27688 28.65209 45.12840
## 2449       36.89025 31.47520 42.30529 28.60865 45.17184
## 2450       36.89025 31.44694 42.33355 28.56543 45.21506
## 2451       36.89025 31.41883 42.36166 28.52243 45.25806
## 2452       36.89025 31.39086 42.38963 28.47966 45.30083
## 2453       36.89025 31.36303 42.41746 28.43710 45.34339
## 2454       36.89025 31.33534 42.44515 28.39475 45.38574
## 2455       36.89025 31.30779 42.47270 28.35262 45.42787
## 2456       36.89025 31.28037 42.50012 28.31069 45.46980
## 2457       36.89025 31.25309 42.52740 28.26896 45.51153
## 2458       36.89025 31.22594 42.55455 28.22744 45.55305
## 2459       36.89025 31.19892 42.58157 28.18611 45.59438
## 2460       36.89025 31.17202 42.60847 28.14498 45.63551
## 2461       36.89025 31.14526 42.63524 28.10404 45.67645
## 2462       36.89025 31.11861 42.66188 28.06329 45.71720
## 2463       36.89025 31.09209 42.68840 28.02273 45.75776
## 2464       36.89025 31.06569 42.71480 27.98236 45.79814
## 2465       36.89025 31.03941 42.74108 27.94216 45.83833
## 2466       36.89025 31.01324 42.76725 27.90215 45.87834
## 2467       36.89025 30.98720 42.79330 27.86231 45.91818
## 2468       36.89025 30.96126 42.81923 27.82265 45.95784
## 2469       36.89025 30.93544 42.84505 27.78316 45.99733
## 2470       36.89025 30.90973 42.87076 27.74384 46.03665
## 2471       36.89025 30.88413 42.89636 27.70469 46.07580
## 2472       36.89025 30.85864 42.92185 27.66570 46.11479
## 2473       36.89025 30.83326 42.94723 27.62688 46.15361
## 2474       36.89025 30.80798 42.97251 27.58822 46.19227
## 2475       36.89025 30.78281 42.99768 27.54973 46.23077
## 2476       36.89025 30.75774 43.02275 27.51139 46.26911
## 2477       36.89025 30.73277 43.04772 27.47320 46.30729
## 2478       36.89025 30.70790 43.07259 27.43517 46.34532
## 2479       36.89025 30.68314 43.09735 27.39729 46.38320
## 2480       36.89025 30.65847 43.12202 27.35957 46.42093
## 2481       36.89025 30.63390 43.14659 27.32199 46.45850
## 2482       36.89025 30.60942 43.17107 27.28456 46.49594
## 2483       36.89025 30.58504 43.19545 27.24727 46.53322
## 2484       36.89025 30.56076 43.21974 27.21013 46.57036
## 2485       36.89025 30.53656 43.24393 27.17313 46.60736
## 2486       36.89025 30.51246 43.26803 27.13627 46.64423
## 2487       36.89025 30.48845 43.29204 27.09954 46.68095
## 2488       36.89025 30.46453 43.31596 27.06296 46.71753
## 2489       36.89025 30.44070 43.33979 27.02651 46.75398
## 2490       36.89025 30.41695 43.36354 26.99020 46.79029
## 2491       36.89025 30.39329 43.38720 26.95402 46.82648
## 2492       36.89025 30.36972 43.41077 26.91797 46.86253
## 2493       36.89025 30.34623 43.43426 26.88204 46.89845

Now, let’s find the right model to forecast S04 Var02.

ggtsdisplay(dt_s04_v2_xts,main="S04 Var02",xlab="Date",ylab="Value")

dt_s04_v2_xts %>% ur.kpss() %>% summary()
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 1.3452 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

We see clear autocorrelation between variables in the ACF. Let’s take the first-order difference to see if our data can be made stationary for modeling.

ggtsdisplay(diff(dt_s04_v2_xts),main="S04 Diff Var02",xlab="Date",ylab="Diff Value")
## Warning: Removed 1 rows containing missing values (geom_point).

dt_s04_v2_xts %>% diff() %>% ur.kpss() %>% summary()
## 
## ####################### 
## # KPSS Unit Root Test # 
## ####################### 
## 
## Test is of type: mu with 8 lags. 
## 
## Value of test-statistic is: 0.0035 
## 
## Critical value for a significance level of: 
##                 10pct  5pct 2.5pct  1pct
## critical values 0.347 0.463  0.574 0.739

This test statistic looks better, so we conclude that the differences data are stationary. We verify with ndiffs.

ndiffs(dt_s04_v2_xts)
## [1] 1

The S04 Var02 plots seem to indicate that ARIMA would be a good fit for this model.

aa_fit_dt_s04_v2 <- auto.arima(log(dt_s04_v2_xts))
summary(aa_fit_dt_s04_v2)
## Series: log(dt_s04_v2_xts) 
## ARIMA(2,1,2) 
## 
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
##          ar1     ar2      ma1      ma2
##       0.4107  0.1706  -0.7038  -0.2506
## s.e.     NaN     NaN      NaN      NaN
## 
## sigma^2 estimated as 0.09486:  log likelihood=-566.04
## AIC=1142.09   AICc=1142.11   BIC=1170.9
## 
## Training set error measures:
##                        ME      RMSE       MAE         MPE    MAPE      MASE
## Training set -0.001647831 0.3076678 0.2142562 -0.04139464 1.27564 0.9477997
##                      ACF1
## Training set -0.004640337
coef(aa_fit_dt_s04_v2)
##        ar1        ar2        ma1        ma2 
##  0.4107044  0.1705645 -0.7037685 -0.2506051

Auto arima did not give us drift.

checkresiduals(aa_fit_dt_s04_v2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,2)
## Q* = 11.363, df = 6, p-value = 0.07779
## 
## Model df: 4.   Total lags used: 10

The residuals are independent, which is what we want.

forecast(aa_fit_dt_s04_v2,h=140) %>% autoplot()

Fore_dt_s04_v2 <- forecast(aa_fit_dt_s04_v2,h=140)

autoplot(aa_fit_dt_s04_v2)

fc <- forecast(aa_fit_dt_s04_v2,h=140)
fc$mean<-exp(fc$mean)
fc$upper<-exp(fc$upper)
fc$lower<-exp(fc$lower)
fc$x<-exp(fc$x)
fc
##      Point Forecast   Lo 80    Hi 80   Lo 95    Hi 95
## 2354        7900872 5324194 11724549 4320252 14449107
## 2355        8993461 5546214 14583344 4294042 18835948
## 2356        9800806 5808754 16536387 4403741 21812319
## 2357       10379823 6027357 17875288 4520252 23835118
## 2358       10784402 6190209 18788272 4614021 25206501
## 2359       11062879 6304673 19412154 4681532 26142575
## 2360       11252471 6381694 19840830 4726573 26788568
## 2361       11380657 6431434 20138485 4754435 27241793
## 2362       11466912 6461943 20348379 4769868 27566814
## 2363       11524771 6479175 20499576 4776592 27806510
## 2364       11563501 6487364 20611538 4777336 27989357
## 2365       11589390 6489449 20697282 4774029 28134299
## 2366       11606679 6487441 20765507 4768006 28253952
## 2367       11618219 6482703 20822024 4760177 28356721
## 2368       11625917 6476154 20870712 4751157 28448214
## 2369       11631051 6468409 20914162 4741362 28532172
## 2370       11634475 6459880 20954105 4731067 28611094
## 2371       11636757 6450843 20991694 4720458 28686647
## 2372       11638279 6441481 21027701 4709660 28759944
## 2373       11639294 6431917 21062640 4698753 28831730
## 2374       11639970 6422232 21096856 4687792 28902500
## 2375       11640421 6412480 21130576 4676814 28972588
## 2376       11640722 6402697 21163955 4665842 29042215
## 2377       11640922 6392907 21197096 4654893 29111529
## 2378       11641056 6383126 21230065 4643977 29180630
## 2379       11641145 6373363 21262911 4633100 29249584
## 2380       11641204 6363626 21295662 4622267 29318436
## 2381       11641243 6353919 21328340 4611480 29387215
## 2382       11641270 6344246 21360958 4600741 29455942
## 2383       11641287 6334607 21393526 4590051 29524630
## 2384       11641299 6325004 21426050 4579411 29593289
## 2385       11641307 6315437 21458535 4568821 29661925
## 2386       11641312 6305907 21490983 4558281 29730541
## 2387       11641316 6296415 21523397 4547790 29799142
## 2388       11641318 6286959 21555777 4537348 29867729
## 2389       11641319 6277540 21588126 4526955 29936305
## 2390       11641320 6268157 21620444 4516612 30004869
## 2391       11641321 6258811 21652731 4506316 30073424
## 2392       11641322 6249502 21684988 4496069 30141969
## 2393       11641322 6240228 21717216 4485869 30210506
## 2394       11641322 6230990 21749415 4475717 30279036
## 2395       11641322 6221787 21781586 4465611 30347558
## 2396       11641322 6212619 21813727 4455552 30416073
## 2397       11641322 6203487 21845841 4445539 30484581
## 2398       11641322 6194389 21877927 4435572 30553084
## 2399       11641322 6185325 21909986 4425650 30621581
## 2400       11641323 6176296 21942017 4415773 30690072
## 2401       11641323 6167301 21974021 4405941 30758559
## 2402       11641323 6158339 22005998 4396153 30827041
## 2403       11641323 6149410 22037949 4386409 30895520
## 2404       11641323 6140515 22069874 4376709 30963994
## 2405       11641323 6131653 22101772 4367052 31032465
## 2406       11641323 6122823 22133645 4357438 31100933
## 2407       11641323 6114026 22165493 4347867 31169399
## 2408       11641323 6105261 22197315 4338338 31237862
## 2409       11641323 6096527 22229112 4328850 31306323
## 2410       11641323 6087826 22260884 4319405 31374783
## 2411       11641323 6079156 22292632 4310001 31443241
## 2412       11641323 6070518 22324355 4300637 31511698
## 2413       11641323 6061910 22356054 4291315 31580155
## 2414       11641323 6053333 22387729 4282033 31648611
## 2415       11641323 6044787 22419381 4272791 31717068
## 2416       11641323 6036272 22451009 4263588 31785525
## 2417       11641323 6027786 22482614 4254425 31853982
## 2418       11641323 6019331 22514196 4245302 31922440
## 2419       11641323 6010905 22545755 4236217 31990900
## 2420       11641323 6002509 22577291 4227171 32059361
## 2421       11641323 5994142 22608805 4218163 32127824
## 2422       11641323 5985805 22640296 4209193 32196289
## 2423       11641323 5977496 22671766 4200261 32264757
## 2424       11641323 5969216 22703213 4191366 32333228
## 2425       11641323 5960965 22734640 4182508 32401701
## 2426       11641323 5952742 22766044 4173688 32470178
## 2427       11641323 5944547 22797428 4164904 32538659
## 2428       11641323 5936381 22828790 4156157 32607143
## 2429       11641323 5928242 22860131 4147445 32675632
## 2430       11641323 5920131 22891452 4138770 32744125
## 2431       11641323 5912047 22922752 4130130 32812623
## 2432       11641323 5903991 22954032 4121525 32881126
## 2433       11641323 5895961 22985292 4112956 32949634
## 2434       11641323 5887959 23016532 4104421 33018147
## 2435       11641323 5879983 23047752 4095922 33086667
## 2436       11641323 5872034 23078952 4087456 33155192
## 2437       11641323 5864111 23110133 4079025 33223724
## 2438       11641323 5856215 23141295 4070627 33292262
## 2439       11641323 5848344 23172437 4062264 33360807
## 2440       11641323 5840500 23203561 4053933 33429360
## 2441       11641323 5832681 23234666 4045636 33497919
## 2442       11641323 5824888 23265752 4037372 33566486
## 2443       11641323 5817120 23296820 4029141 33635061
## 2444       11641323 5809377 23327870 4020942 33703644
## 2445       11641323 5801660 23358901 4012775 33772235
## 2446       11641323 5793967 23389914 4004641 33840835
## 2447       11641323 5786299 23420910 3996538 33909444
## 2448       11641323 5778656 23451888 3988467 33978061
## 2449       11641323 5771037 23482849 3980428 34046688
## 2450       11641323 5763443 23513792 3972420 34115324
## 2451       11641323 5755872 23544718 3964443 34183970
## 2452       11641323 5748326 23575627 3956496 34252625
## 2453       11641323 5740804 23606519 3948581 34321291
## 2454       11641323 5733305 23637394 3940696 34389967
## 2455       11641323 5725830 23668253 3932841 34458654
## 2456       11641323 5718378 23699095 3925016 34527351
## 2457       11641323 5710950 23729921 3917220 34596059
## 2458       11641323 5703545 23760731 3909455 34664779
## 2459       11641323 5696163 23791525 3901719 34733510
## 2460       11641323 5688803 23822302 3894012 34802252
## 2461       11641323 5681467 23853065 3886334 34871006
## 2462       11641323 5674153 23883811 3878686 34939773
## 2463       11641323 5666861 23914542 3871065 35008551
## 2464       11641323 5659592 23945258 3863474 35077342
## 2465       11641323 5652345 23975958 3855910 35146145
## 2466       11641323 5645120 24006643 3848375 35214962
## 2467       11641323 5637918 24037314 3840868 35283791
## 2468       11641323 5630736 24067969 3833389 35352633
## 2469       11641323 5623577 24098610 3825937 35421489
## 2470       11641323 5616439 24129236 3818513 35490359
## 2471       11641323 5609323 24159848 3811116 35559242
## 2472       11641323 5602228 24190445 3803746 35628139
## 2473       11641323 5595154 24221028 3796403 35697050
## 2474       11641323 5588102 24251598 3789087 35765975
## 2475       11641323 5581070 24282153 3781797 35834915
## 2476       11641323 5574059 24312694 3774534 35903870
## 2477       11641323 5567069 24343222 3767298 35972840
## 2478       11641323 5560099 24373736 3760087 36041824
## 2479       11641323 5553150 24404236 3752902 36110824
## 2480       11641323 5546222 24434723 3745743 36179839
## 2481       11641323 5539313 24465197 3738610 36248870
## 2482       11641323 5532425 24495658 3731502 36317916
## 2483       11641323 5525557 24526106 3724420 36386978
## 2484       11641323 5518709 24556540 3717363 36456057
## 2485       11641323 5511880 24586962 3710331 36525151
## 2486       11641323 5505071 24617372 3703323 36594262
## 2487       11641323 5498282 24647768 3696341 36663390
## 2488       11641323 5491513 24678152 3689383 36732534
## 2489       11641323 5484763 24708524 3682450 36801695
## 2490       11641323 5478032 24738884 3675541 36870874
## 2491       11641323 5471320 24769231 3668656 36940069
## 2492       11641323 5464627 24799567 3661795 37009282
## 2493       11641323 5457954 24829890 3654958 37078512
print(paste0("MAPE for S04 Var01 is ", MLmetrics::MAPE(Fore_dt_s04_v1$fitted,dt_s04_v1_xts)))
## [1] "MAPE for S04 Var01 is 0.00835505464453924"
print(paste0("MAPE for S04 Var02 is ", MLmetrics::MAPE(exp(Fore_dt_s04_v2$fitted),dt_s04_v2_xts)))
## [1] "MAPE for S04 Var02 is 0.21408917391016"
write.csv(fc,"s04v02.csv")