full_data <- readxl::read_excel('Data Set for Class.xls')
head(full_data)
## # A tibble: 6 x 7
## SeriesInd group Var01 Var02 Var03 Var05 Var07
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 40669 S03 30.6 123432400 30.3 30.5 30.6
## 2 40669 S02 10.3 60855800 10.0 10.2 10.3
## 3 40669 S01 26.6 10369300 25.9 26.2 26.0
## 4 40669 S06 27.5 39335700 26.8 27.0 27.3
## 5 40669 S05 69.3 27809100 68.2 68.7 69.2
## 6 40669 S04 17.2 16587400 16.9 16.9 17.1
# For simplicity lets convert SeriesInd to date. by setting Origin of date to 1900 Jan 1st.
print(paste("Start Of Data:",as.Date(min(full_data$SeriesInd) , origin = "1900-01-01")))
## [1] "Start Of Data: 2011-05-08"
print(paste("End Of Data:",as.Date(max(full_data$SeriesInd) , origin = "1900-01-01")))
## [1] "End Of Data: 2018-05-03"
print(paste("Forecast from :",as.Date(max(full_data$SeriesInd)+1 , origin = "1900-01-01")," TO ",as.Date(max(full_data$SeriesInd)+140 , origin = "1900-01-01")))
## [1] "Forecast from : 2018-05-04 TO 2018-09-20"
full_data$SeriesInd <- as.Date(full_data$SeriesInd, origin = "1900-01-01")
head(full_data)
## # A tibble: 6 x 7
## SeriesInd group Var01 Var02 Var03 Var05 Var07
## <date> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2011-05-08 S03 30.6 123432400 30.3 30.5 30.6
## 2 2011-05-08 S02 10.3 60855800 10.0 10.2 10.3
## 3 2011-05-08 S01 26.6 10369300 25.9 26.2 26.0
## 4 2011-05-08 S06 27.5 39335700 26.8 27.0 27.3
## 5 2011-05-08 S05 69.3 27809100 68.2 68.7 69.2
## 6 2011-05-08 S04 17.2 16587400 16.9 16.9 17.1
# Get the full date range for the data
allDates <- seq.Date(
min(full_data$SeriesInd),
max(full_data$SeriesInd),
"day")
# creating NA for missing data entry in full data.
full_data_na <- left_join(as.data.frame(allDates),full_data,by=c("allDates"= "SeriesInd"))
# subset(full_data[,c(1,2,3)], group == "S01")%>% .[,c(1,3)] %>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
#
#
# subset(full_data[,c(1,2,3)], group == "S02")%>% .[,c(1,3)] %>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
#
# full_data_na %>% filter(.,group=="S01") %>% .[,c(1,3)] %>% plot()
#
#
# #+geom_col(position="dodge", alpha=0.5)
#
#
# dt_s06_v1 %>% ggplot(aes(x=allDates, y= Var01))+ geom_line()
#
#
#
# full_data_na[,c(1,3)]plot()
# Creating a subset of date for each Group and Varible.
dt_s01_v1 = subset(full_data[,c(1,2,3)], group == "S01")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s01_v2 = subset(full_data[,c(1,2,4)], group == "S01")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s01_v3 = subset(full_data[,c(1,2,5)], group == "S01")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s01_v5 = subset(full_data[,c(1,2,6)], group == "S01")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s01_v7 = subset(full_data[,c(1,2,7)], group == "S01")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s02_v1 = subset(full_data[,c(1,2,3)], group == "S02")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s02_v2= subset(full_data[,c(1,2,4)], group == "S02")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s02_v3= subset(full_data[,c(1,2,5)], group == "S02")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s02_v5= subset(full_data[,c(1,2,6)], group == "S02")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s02_v7= subset(full_data[,c(1,2,7)], group == "S02")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s03_v1 = subset(full_data[,c(1,2,3)], group == "S03")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s03_v2 = subset(full_data[,c(1,2,4)], group == "S03")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s03_v3 = subset(full_data[,c(1,2,5)], group == "S03")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s03_v5 = subset(full_data[,c(1,2,6)], group == "S03")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s03_v7 = subset(full_data[,c(1,2,7)], group == "S03")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s04_v1 = subset(full_data[,c(1,2,3)], group == "S04")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s04_v2= subset(full_data[,c(1,2,4)], group == "S04")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s04_v3= subset(full_data[,c(1,2,5)], group == "S04")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s04_v5= subset(full_data[,c(1,2,6)], group == "S04")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s04_v7= subset(full_data[,c(1,2,7)], group == "S04")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s05_v1 = subset(full_data[,c(1,2,3)], group == "S04")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s05_v2 = subset(full_data[,c(1,2,4)], group == "S05")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s05_v3 = subset(full_data[,c(1,2,5)], group == "S05")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s05_v5 = subset(full_data[,c(1,2,6)], group == "S05")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s05_v7 = subset(full_data[,c(1,2,7)], group == "S05")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s06_v1 = subset(full_data[,c(1,2,3)], group == "S06")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s06_v2 = subset(full_data[,c(1,2,4)], group == "S06")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s06_v3 = subset(full_data[,c(1,2,5)], group == "S06")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s06_v5 = subset(full_data[,c(1,2,6)], group == "S06")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
dt_s06_v7 = subset(full_data[,c(1,2,7)], group == "S06")%>% .[,c(1,3)]%>% left_join(as.data.frame(allDates),.,by=c("allDates"= "SeriesInd"))
We usually see consecutive sequences of five SeriesInd entries followed by a two-integer (-days, based on our assumption) break, which might be indicative of weekdays. There are some additional breaks in the data beyond the five days on, two days off pattern.
Note that if it were confirmed that the SeriesInd values represented weekdays, it might be prudent to remove the weekend days from the sequence for forecasting purposes.
summary(dt_s04_v1)
## allDates Var01
## Min. :2011-05-08 Min. :11.80
## 1st Qu.:2013-02-04 1st Qu.:15.99
## Median :2014-11-04 Median :23.34
## Mean :2014-11-04 Mean :26.46
## 3rd Qu.:2016-08-03 3rd Qu.:36.42
## Max. :2018-05-03 Max. :52.62
## NA's :933
summary(dt_s04_v2)
## allDates Var02
## Min. :2011-05-08 Min. : 3468900
## 1st Qu.:2013-02-04 1st Qu.: 12918725
## Median :2014-11-04 Median : 17000800
## Mean :2014-11-04 Mean : 20757818
## 3rd Qu.:2016-08-03 3rd Qu.: 24015700
## Max. :2018-05-03 Max. :233872100
## NA's :931
We see NA’s as expected. The maximum value of Var02 looks like it could be a substantial outlier.
Let’s start with S04 Var01.
plot(dt_s04_v1,main="S04 Var01")
There might be a seasonal drop near the end of each year, but it’s not definitive. Business cycles could be present.
plot(dt_s04_v2,main="S04 Var02")
There is no obvious trend or seasonality, but there are some outlier values - one clear outlier value is apparent in late 2015. It appears relatively stationary.
Because we haven’t confirmed the weekend hypothesis, we will go ahead and fill the missing SeriesInd values for both Var01 and Var02 with “N/A” entries for now. We won’t remove the presumed SeriesInd weekend values from the dataset.
Let’s review some different methods of handling NA values in the data.
# View of some differnet imputation methods and their values.
cbind("Value"=dt_s01_v1$Var01,
"na.interp"=na.interp(dt_s01_v1$Var01),
"na.approx"=na_seadec(dt_s01_v1$Var01),
"na.kalman"=na.kalman(dt_s01_v1$Var01),
"na.interpolation"=na_interpolation(dt_s01_v1$Var01)
) %>%
head(.,20)
## Warning in na_seadec(dt_s01_v1$Var01): No seasonality information for dataset could be found, going on without decomposition.
## Setting find_frequency=TRUE might be an option.
## Warning: na.kalman will replaced by na_kalman.
## Functionality stays the same.
## The new function name better fits modern R code style guidelines.
## Please adjust your code accordingly.
## Time Series:
## Start = 1
## End = 20
## Frequency = 1
## Value na.interp na.approx na.kalman na.interpolation
## 1 26.61 26.61000 26.61000 26.61000 26.61000
## 2 26.30 26.30000 26.30000 26.30000 26.30000
## 3 26.03 26.03000 26.03000 26.03000 26.03000
## 4 25.84 25.84000 25.84000 25.84000 25.84000
## 5 26.34 26.34000 26.34000 26.34000 26.34000
## 6 NA 26.39000 26.39000 26.37014 26.39000
## 7 NA 26.44000 26.44000 26.41866 26.44000
## 8 26.49 26.49000 26.49000 26.49000 26.49000
## 9 26.03 26.03000 26.03000 26.03000 26.03000
## 10 25.16 25.16000 25.16000 25.16000 25.16000
## 11 25.00 25.00000 25.00000 25.00000 25.00000
## 12 24.77 24.77000 24.77000 24.77000 24.77000
## 13 NA 24.77250 24.77250 24.77825 24.77250
## 14 NA 24.77500 24.77500 24.77663 24.77500
## 15 NA 24.77750 24.77750 24.77502 24.77750
## 16 24.78 24.78000 24.78000 24.78000 24.78000
## 17 24.61 24.61000 24.61000 24.61000 24.61000
## 18 24.88 24.88000 24.88000 24.88000 24.88000
## 19 24.17 24.17000 24.17000 24.17000 24.17000
## 20 NA 24.05333 24.05333 24.07196 24.05333
We will use the approx function.
dt_s04_v1_xts <- xts(c(dt_s04_v1$Var01), order.by=as.Date(dt_s04_v1$allDates))%>% na.approx()
dt_s04_v2_xts <- xts(c(dt_s04_v2$Var02), order.by=as.Date(dt_s04_v2$allDates))%>% na.approx()
dt_s04_v1_stl <- c(dt_s04_v1$Var01, order.by=as.Date(dt_s04_v1$allDates))%>% na.approx()
dt_s04_v2_stl <- c(dt_s04_v2$Var02, order.by=as.Date(dt_s04_v2$allDates))%>% na.approx()
We appear to have two disparate datasets, so we will address them separately. Let’s find the right model to forecast S04 Var01.
ggtsdisplay(dt_s04_v1_xts,main="S04 Var01",xlab="Date",ylab="Value")
dt_s04_v1_xts %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 21.2264
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
We see clear autocorrelation between variables in the ACF. Let’s take the first-order difference to see if our data can be made stationary for modeling.
ggtsdisplay(diff(dt_s04_v1_xts),main="S04 Diff Var01",xlab="Date",ylab="Diff Value")
## Warning: Removed 1 rows containing missing values (geom_point).
dt_s04_v1_xts %>% diff() %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.1268
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
This test statistic looks better, so we conclude that the differences data are stationary. We verify with ndiffs.
ndiffs(dt_s04_v1_xts)
## [1] 1
The S04 Var01 plots seem to indicate that ARIMA would be a good fit for this model.
aa_fit_dt_s04_v1 <- auto.arima(coredata(dt_s04_v1_xts,stepwise=FALSE,approximation = FALSE))
summary(aa_fit_dt_s04_v1)
## Series: coredata(dt_s04_v1_xts, stepwise = FALSE, approximation = FALSE)
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## 0.5637 -0.5158
## s.e. 0.1730 0.1793
##
## sigma^2 estimated as 0.1517: log likelihood=-1118.44
## AIC=2242.88 AICc=2242.89 BIC=2260.17
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.007547223 0.3892143 0.2209094 0.0210539 0.8355055 0.9902587
## ACF1
## Training set -0.004536873
coef(aa_fit_dt_s04_v1)
## ar1 ma1
## 0.5637108 -0.5157887
Auto arima did not give us drift.
checkresiduals(aa_fit_dt_s04_v1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)
## Q* = 4.6289, df = 8, p-value = 0.7964
##
## Model df: 2. Total lags used: 10
The residuals are independent, which is what we want.
forecast(aa_fit_dt_s04_v1,h=140) %>% autoplot()
Fore_dt_s04_v1 <- forecast(aa_fit_dt_s04_v1,h=140)
autoplot(aa_fit_dt_s04_v1)
Fore_dt_s04_v1
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2354 36.90138 36.40226 37.40050 36.13805 37.66471
## 2355 36.89652 36.17355 37.61949 35.79084 38.00221
## 2356 36.89378 35.99349 37.79408 35.51690 38.27067
## 2357 36.89224 35.84029 37.94419 35.28342 38.50106
## 2358 36.89137 35.70505 38.07769 35.07706 38.70569
## 2359 36.89088 35.58292 38.19884 34.89053 38.89123
## 2360 36.89060 35.47087 38.31034 34.71930 39.06190
## 2361 36.89045 35.36683 38.41406 34.56028 39.22062
## 2362 36.89036 35.26936 38.51136 34.41126 39.36946
## 2363 36.89031 35.17738 38.60324 34.27061 39.51001
## 2364 36.89028 35.09006 38.69050 34.13708 39.64348
## 2365 36.89027 35.00678 38.77375 34.00972 39.77081
## 2366 36.89026 34.92702 38.85350 33.88774 39.89277
## 2367 36.89025 34.85037 38.93014 33.77052 40.00999
## 2368 36.89025 34.77650 39.00400 33.65754 40.12296
## 2369 36.89025 34.70512 39.07538 33.54838 40.23212
## 2370 36.89025 34.63600 39.14450 33.44267 40.33782
## 2371 36.89025 34.56894 39.21156 33.34011 40.44038
## 2372 36.89025 34.50376 39.27673 33.24043 40.54007
## 2373 36.89025 34.44031 39.34018 33.14340 40.63710
## 2374 36.89025 34.37847 39.40202 33.04881 40.73168
## 2375 36.89025 34.31811 39.46238 32.95651 40.82399
## 2376 36.89025 34.25914 39.52135 32.86631 40.91418
## 2377 36.89025 34.20146 39.57903 32.77810 41.00239
## 2378 36.89025 34.14499 39.63550 32.69174 41.08875
## 2379 36.89025 34.08966 39.69083 32.60712 41.17337
## 2380 36.89025 34.03540 39.74509 32.52414 41.25635
## 2381 36.89025 33.98216 39.79833 32.44271 41.33778
## 2382 36.89025 33.92987 39.85062 32.36274 41.41775
## 2383 36.89025 33.87849 39.90200 32.28416 41.49633
## 2384 36.89025 33.82797 39.95252 32.20690 41.57359
## 2385 36.89025 33.77827 40.00222 32.13090 41.64960
## 2386 36.89025 33.72936 40.05113 32.05608 41.72441
## 2387 36.89025 33.68119 40.09931 31.98241 41.79808
## 2388 36.89025 33.63373 40.14676 31.90983 41.87066
## 2389 36.89025 33.58695 40.19354 31.83829 41.94220
## 2390 36.89025 33.54083 40.23967 31.76775 42.01274
## 2391 36.89025 33.49533 40.28516 31.69817 42.08232
## 2392 36.89025 33.45043 40.33006 31.62951 42.15098
## 2393 36.89025 33.40612 40.37438 31.56173 42.21876
## 2394 36.89025 33.36236 40.41814 31.49480 42.28569
## 2395 36.89025 33.31913 40.46136 31.42870 42.35179
## 2396 36.89025 33.27642 40.50407 31.36338 42.41711
## 2397 36.89025 33.23422 40.54628 31.29883 42.48166
## 2398 36.89025 33.19249 40.58800 31.23501 42.54548
## 2399 36.89025 33.15123 40.62926 31.17191 42.60858
## 2400 36.89025 33.11042 40.67007 31.10950 42.67099
## 2401 36.89025 33.07004 40.71045 31.04775 42.73274
## 2402 36.89025 33.03009 40.75040 30.98665 42.79384
## 2403 36.89025 32.99055 40.78994 30.92617 42.85432
## 2404 36.89025 32.95140 40.82909 30.86630 42.91419
## 2405 36.89025 32.91264 40.86785 30.80702 42.97347
## 2406 36.89025 32.87425 40.90624 30.74831 43.03218
## 2407 36.89025 32.83623 40.94426 30.69016 43.09033
## 2408 36.89025 32.79856 40.98193 30.63255 43.14794
## 2409 36.89025 32.76123 41.01926 30.57547 43.20503
## 2410 36.89025 32.72424 41.05625 30.51889 43.26160
## 2411 36.89025 32.68757 41.09292 30.46281 43.31768
## 2412 36.89025 32.65123 41.12927 30.40722 43.37327
## 2413 36.89025 32.61519 41.16531 30.35210 43.42839
## 2414 36.89025 32.57945 41.20105 30.29745 43.48304
## 2415 36.89025 32.54400 41.23649 30.24324 43.53725
## 2416 36.89025 32.50884 41.27165 30.18947 43.59102
## 2417 36.89025 32.47396 41.30653 30.13613 43.64436
## 2418 36.89025 32.43936 41.34113 30.08320 43.69729
## 2419 36.89025 32.40502 41.37547 30.03069 43.74980
## 2420 36.89025 32.37094 41.40955 29.97857 43.80192
## 2421 36.89025 32.33712 41.44337 29.92684 43.85365
## 2422 36.89025 32.30355 41.47694 29.87550 43.90499
## 2423 36.89025 32.27022 41.51027 29.82453 43.95596
## 2424 36.89025 32.23713 41.54336 29.77392 44.00657
## 2425 36.89025 32.20427 41.57622 29.72367 44.05682
## 2426 36.89025 32.17165 41.60885 29.67377 44.10672
## 2427 36.89025 32.13924 41.64125 29.62421 44.15628
## 2428 36.89025 32.10706 41.67343 29.57499 44.20550
## 2429 36.89025 32.07509 41.70540 29.52610 44.25439
## 2430 36.89025 32.04333 41.73716 29.47753 44.30296
## 2431 36.89025 32.01178 41.76871 29.42928 44.35122
## 2432 36.89025 31.98043 41.80006 29.38133 44.39916
## 2433 36.89025 31.94928 41.83121 29.33369 44.44680
## 2434 36.89025 31.91833 41.86216 29.28635 44.49414
## 2435 36.89025 31.88756 41.89293 29.23930 44.54119
## 2436 36.89025 31.85699 41.92350 29.19254 44.58795
## 2437 36.89025 31.82660 41.95389 29.14607 44.63443
## 2438 36.89025 31.79639 41.98410 29.09986 44.68063
## 2439 36.89025 31.76636 42.01413 29.05394 44.72656
## 2440 36.89025 31.73650 42.04399 29.00828 44.77222
## 2441 36.89025 31.70682 42.07367 28.96288 44.81761
## 2442 36.89025 31.67730 42.10319 28.91774 44.86275
## 2443 36.89025 31.64795 42.13254 28.87285 44.90764
## 2444 36.89025 31.61877 42.16172 28.82822 44.95227
## 2445 36.89025 31.58974 42.19075 28.78383 44.99666
## 2446 36.89025 31.56088 42.21961 28.73968 45.04081
## 2447 36.89025 31.53217 42.24833 28.69577 45.08472
## 2448 36.89025 31.50361 42.27688 28.65209 45.12840
## 2449 36.89025 31.47520 42.30529 28.60865 45.17184
## 2450 36.89025 31.44694 42.33355 28.56543 45.21506
## 2451 36.89025 31.41883 42.36166 28.52243 45.25806
## 2452 36.89025 31.39086 42.38963 28.47966 45.30083
## 2453 36.89025 31.36303 42.41746 28.43710 45.34339
## 2454 36.89025 31.33534 42.44515 28.39475 45.38574
## 2455 36.89025 31.30779 42.47270 28.35262 45.42787
## 2456 36.89025 31.28037 42.50012 28.31069 45.46980
## 2457 36.89025 31.25309 42.52740 28.26896 45.51153
## 2458 36.89025 31.22594 42.55455 28.22744 45.55305
## 2459 36.89025 31.19892 42.58157 28.18611 45.59438
## 2460 36.89025 31.17202 42.60847 28.14498 45.63551
## 2461 36.89025 31.14526 42.63524 28.10404 45.67645
## 2462 36.89025 31.11861 42.66188 28.06329 45.71720
## 2463 36.89025 31.09209 42.68840 28.02273 45.75776
## 2464 36.89025 31.06569 42.71480 27.98236 45.79814
## 2465 36.89025 31.03941 42.74108 27.94216 45.83833
## 2466 36.89025 31.01324 42.76725 27.90215 45.87834
## 2467 36.89025 30.98720 42.79330 27.86231 45.91818
## 2468 36.89025 30.96126 42.81923 27.82265 45.95784
## 2469 36.89025 30.93544 42.84505 27.78316 45.99733
## 2470 36.89025 30.90973 42.87076 27.74384 46.03665
## 2471 36.89025 30.88413 42.89636 27.70469 46.07580
## 2472 36.89025 30.85864 42.92185 27.66570 46.11479
## 2473 36.89025 30.83326 42.94723 27.62688 46.15361
## 2474 36.89025 30.80798 42.97251 27.58822 46.19227
## 2475 36.89025 30.78281 42.99768 27.54973 46.23077
## 2476 36.89025 30.75774 43.02275 27.51139 46.26911
## 2477 36.89025 30.73277 43.04772 27.47320 46.30729
## 2478 36.89025 30.70790 43.07259 27.43517 46.34532
## 2479 36.89025 30.68314 43.09735 27.39729 46.38320
## 2480 36.89025 30.65847 43.12202 27.35957 46.42093
## 2481 36.89025 30.63390 43.14659 27.32199 46.45850
## 2482 36.89025 30.60942 43.17107 27.28456 46.49594
## 2483 36.89025 30.58504 43.19545 27.24727 46.53322
## 2484 36.89025 30.56076 43.21974 27.21013 46.57036
## 2485 36.89025 30.53656 43.24393 27.17313 46.60736
## 2486 36.89025 30.51246 43.26803 27.13627 46.64423
## 2487 36.89025 30.48845 43.29204 27.09954 46.68095
## 2488 36.89025 30.46453 43.31596 27.06296 46.71753
## 2489 36.89025 30.44070 43.33979 27.02651 46.75398
## 2490 36.89025 30.41695 43.36354 26.99020 46.79029
## 2491 36.89025 30.39329 43.38720 26.95402 46.82648
## 2492 36.89025 30.36972 43.41077 26.91797 46.86253
## 2493 36.89025 30.34623 43.43426 26.88204 46.89845
Now, let’s find the right model to forecast S04 Var02.
ggtsdisplay(dt_s04_v2_xts,main="S04 Var02",xlab="Date",ylab="Value")
dt_s04_v2_xts %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 1.3452
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
We see clear autocorrelation between variables in the ACF. Let’s take the first-order difference to see if our data can be made stationary for modeling.
ggtsdisplay(diff(dt_s04_v2_xts),main="S04 Diff Var02",xlab="Date",ylab="Diff Value")
## Warning: Removed 1 rows containing missing values (geom_point).
dt_s04_v2_xts %>% diff() %>% ur.kpss() %>% summary()
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 8 lags.
##
## Value of test-statistic is: 0.0035
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
This test statistic looks better, so we conclude that the differences data are stationary. We verify with ndiffs.
ndiffs(dt_s04_v2_xts)
## [1] 1
The S04 Var02 plots seem to indicate that ARIMA would be a good fit for this model.
aa_fit_dt_s04_v2 <- auto.arima(log(dt_s04_v2_xts))
summary(aa_fit_dt_s04_v2)
## Series: log(dt_s04_v2_xts)
## ARIMA(2,1,2)
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ar2 ma1 ma2
## 0.4107 0.1706 -0.7038 -0.2506
## s.e. NaN NaN NaN NaN
##
## sigma^2 estimated as 0.09486: log likelihood=-566.04
## AIC=1142.09 AICc=1142.11 BIC=1170.9
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.001647831 0.3076678 0.2142562 -0.04139464 1.27564 0.9477997
## ACF1
## Training set -0.004640337
coef(aa_fit_dt_s04_v2)
## ar1 ar2 ma1 ma2
## 0.4107044 0.1705645 -0.7037685 -0.2506051
Auto arima did not give us drift.
checkresiduals(aa_fit_dt_s04_v2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,2)
## Q* = 11.363, df = 6, p-value = 0.07779
##
## Model df: 4. Total lags used: 10
The residuals are independent, which is what we want.
forecast(aa_fit_dt_s04_v2,h=140) %>% autoplot()
Fore_dt_s04_v2 <- forecast(aa_fit_dt_s04_v2,h=140)
autoplot(aa_fit_dt_s04_v2)
fc <- forecast(aa_fit_dt_s04_v2,h=140)
fc$mean<-exp(fc$mean)
fc$upper<-exp(fc$upper)
fc$lower<-exp(fc$lower)
fc$x<-exp(fc$x)
fc
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2354 7900872 5324194 11724549 4320252 14449107
## 2355 8993461 5546214 14583344 4294042 18835948
## 2356 9800806 5808754 16536387 4403741 21812319
## 2357 10379823 6027357 17875288 4520252 23835118
## 2358 10784402 6190209 18788272 4614021 25206501
## 2359 11062879 6304673 19412154 4681532 26142575
## 2360 11252471 6381694 19840830 4726573 26788568
## 2361 11380657 6431434 20138485 4754435 27241793
## 2362 11466912 6461943 20348379 4769868 27566814
## 2363 11524771 6479175 20499576 4776592 27806510
## 2364 11563501 6487364 20611538 4777336 27989357
## 2365 11589390 6489449 20697282 4774029 28134299
## 2366 11606679 6487441 20765507 4768006 28253952
## 2367 11618219 6482703 20822024 4760177 28356721
## 2368 11625917 6476154 20870712 4751157 28448214
## 2369 11631051 6468409 20914162 4741362 28532172
## 2370 11634475 6459880 20954105 4731067 28611094
## 2371 11636757 6450843 20991694 4720458 28686647
## 2372 11638279 6441481 21027701 4709660 28759944
## 2373 11639294 6431917 21062640 4698753 28831730
## 2374 11639970 6422232 21096856 4687792 28902500
## 2375 11640421 6412480 21130576 4676814 28972588
## 2376 11640722 6402697 21163955 4665842 29042215
## 2377 11640922 6392907 21197096 4654893 29111529
## 2378 11641056 6383126 21230065 4643977 29180630
## 2379 11641145 6373363 21262911 4633100 29249584
## 2380 11641204 6363626 21295662 4622267 29318436
## 2381 11641243 6353919 21328340 4611480 29387215
## 2382 11641270 6344246 21360958 4600741 29455942
## 2383 11641287 6334607 21393526 4590051 29524630
## 2384 11641299 6325004 21426050 4579411 29593289
## 2385 11641307 6315437 21458535 4568821 29661925
## 2386 11641312 6305907 21490983 4558281 29730541
## 2387 11641316 6296415 21523397 4547790 29799142
## 2388 11641318 6286959 21555777 4537348 29867729
## 2389 11641319 6277540 21588126 4526955 29936305
## 2390 11641320 6268157 21620444 4516612 30004869
## 2391 11641321 6258811 21652731 4506316 30073424
## 2392 11641322 6249502 21684988 4496069 30141969
## 2393 11641322 6240228 21717216 4485869 30210506
## 2394 11641322 6230990 21749415 4475717 30279036
## 2395 11641322 6221787 21781586 4465611 30347558
## 2396 11641322 6212619 21813727 4455552 30416073
## 2397 11641322 6203487 21845841 4445539 30484581
## 2398 11641322 6194389 21877927 4435572 30553084
## 2399 11641322 6185325 21909986 4425650 30621581
## 2400 11641323 6176296 21942017 4415773 30690072
## 2401 11641323 6167301 21974021 4405941 30758559
## 2402 11641323 6158339 22005998 4396153 30827041
## 2403 11641323 6149410 22037949 4386409 30895520
## 2404 11641323 6140515 22069874 4376709 30963994
## 2405 11641323 6131653 22101772 4367052 31032465
## 2406 11641323 6122823 22133645 4357438 31100933
## 2407 11641323 6114026 22165493 4347867 31169399
## 2408 11641323 6105261 22197315 4338338 31237862
## 2409 11641323 6096527 22229112 4328850 31306323
## 2410 11641323 6087826 22260884 4319405 31374783
## 2411 11641323 6079156 22292632 4310001 31443241
## 2412 11641323 6070518 22324355 4300637 31511698
## 2413 11641323 6061910 22356054 4291315 31580155
## 2414 11641323 6053333 22387729 4282033 31648611
## 2415 11641323 6044787 22419381 4272791 31717068
## 2416 11641323 6036272 22451009 4263588 31785525
## 2417 11641323 6027786 22482614 4254425 31853982
## 2418 11641323 6019331 22514196 4245302 31922440
## 2419 11641323 6010905 22545755 4236217 31990900
## 2420 11641323 6002509 22577291 4227171 32059361
## 2421 11641323 5994142 22608805 4218163 32127824
## 2422 11641323 5985805 22640296 4209193 32196289
## 2423 11641323 5977496 22671766 4200261 32264757
## 2424 11641323 5969216 22703213 4191366 32333228
## 2425 11641323 5960965 22734640 4182508 32401701
## 2426 11641323 5952742 22766044 4173688 32470178
## 2427 11641323 5944547 22797428 4164904 32538659
## 2428 11641323 5936381 22828790 4156157 32607143
## 2429 11641323 5928242 22860131 4147445 32675632
## 2430 11641323 5920131 22891452 4138770 32744125
## 2431 11641323 5912047 22922752 4130130 32812623
## 2432 11641323 5903991 22954032 4121525 32881126
## 2433 11641323 5895961 22985292 4112956 32949634
## 2434 11641323 5887959 23016532 4104421 33018147
## 2435 11641323 5879983 23047752 4095922 33086667
## 2436 11641323 5872034 23078952 4087456 33155192
## 2437 11641323 5864111 23110133 4079025 33223724
## 2438 11641323 5856215 23141295 4070627 33292262
## 2439 11641323 5848344 23172437 4062264 33360807
## 2440 11641323 5840500 23203561 4053933 33429360
## 2441 11641323 5832681 23234666 4045636 33497919
## 2442 11641323 5824888 23265752 4037372 33566486
## 2443 11641323 5817120 23296820 4029141 33635061
## 2444 11641323 5809377 23327870 4020942 33703644
## 2445 11641323 5801660 23358901 4012775 33772235
## 2446 11641323 5793967 23389914 4004641 33840835
## 2447 11641323 5786299 23420910 3996538 33909444
## 2448 11641323 5778656 23451888 3988467 33978061
## 2449 11641323 5771037 23482849 3980428 34046688
## 2450 11641323 5763443 23513792 3972420 34115324
## 2451 11641323 5755872 23544718 3964443 34183970
## 2452 11641323 5748326 23575627 3956496 34252625
## 2453 11641323 5740804 23606519 3948581 34321291
## 2454 11641323 5733305 23637394 3940696 34389967
## 2455 11641323 5725830 23668253 3932841 34458654
## 2456 11641323 5718378 23699095 3925016 34527351
## 2457 11641323 5710950 23729921 3917220 34596059
## 2458 11641323 5703545 23760731 3909455 34664779
## 2459 11641323 5696163 23791525 3901719 34733510
## 2460 11641323 5688803 23822302 3894012 34802252
## 2461 11641323 5681467 23853065 3886334 34871006
## 2462 11641323 5674153 23883811 3878686 34939773
## 2463 11641323 5666861 23914542 3871065 35008551
## 2464 11641323 5659592 23945258 3863474 35077342
## 2465 11641323 5652345 23975958 3855910 35146145
## 2466 11641323 5645120 24006643 3848375 35214962
## 2467 11641323 5637918 24037314 3840868 35283791
## 2468 11641323 5630736 24067969 3833389 35352633
## 2469 11641323 5623577 24098610 3825937 35421489
## 2470 11641323 5616439 24129236 3818513 35490359
## 2471 11641323 5609323 24159848 3811116 35559242
## 2472 11641323 5602228 24190445 3803746 35628139
## 2473 11641323 5595154 24221028 3796403 35697050
## 2474 11641323 5588102 24251598 3789087 35765975
## 2475 11641323 5581070 24282153 3781797 35834915
## 2476 11641323 5574059 24312694 3774534 35903870
## 2477 11641323 5567069 24343222 3767298 35972840
## 2478 11641323 5560099 24373736 3760087 36041824
## 2479 11641323 5553150 24404236 3752902 36110824
## 2480 11641323 5546222 24434723 3745743 36179839
## 2481 11641323 5539313 24465197 3738610 36248870
## 2482 11641323 5532425 24495658 3731502 36317916
## 2483 11641323 5525557 24526106 3724420 36386978
## 2484 11641323 5518709 24556540 3717363 36456057
## 2485 11641323 5511880 24586962 3710331 36525151
## 2486 11641323 5505071 24617372 3703323 36594262
## 2487 11641323 5498282 24647768 3696341 36663390
## 2488 11641323 5491513 24678152 3689383 36732534
## 2489 11641323 5484763 24708524 3682450 36801695
## 2490 11641323 5478032 24738884 3675541 36870874
## 2491 11641323 5471320 24769231 3668656 36940069
## 2492 11641323 5464627 24799567 3661795 37009282
## 2493 11641323 5457954 24829890 3654958 37078512
print(paste0("MAPE for S04 Var01 is ", MLmetrics::MAPE(Fore_dt_s04_v1$fitted,dt_s04_v1_xts)))
## [1] "MAPE for S04 Var01 is 0.00835505464453924"
print(paste0("MAPE for S04 Var02 is ", MLmetrics::MAPE(exp(Fore_dt_s04_v2$fitted),dt_s04_v2_xts)))
## [1] "MAPE for S04 Var02 is 0.21408917391016"
write.csv(fc,"s04v02.csv")