knitr::opts_chunk$set(echo = TRUE)
library(fpp2)
## Warning: package 'fpp2' was built under R version 3.5.3
## Loading required package: ggplot2
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 3.5.3
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.5.3
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 3.5.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(imputeTS)
## Warning: package 'imputeTS' was built under R version 3.5.3
library(urca)
## Warning: package 'urca' was built under R version 3.5.3
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 3.5.3
##
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:base':
##
## Recall
data_project <- readxl::read_excel("./project1data/Data Set for class.xls")
head(data_project)
S02 <- subset(data_project, group == 'S02', select = c(SeriesInd, Var02, Var03))
head(S02)
predictobs <- 1623:1762
S2 <- ts(S02[-predictobs, 2:3])
S2v1 <- ts(S02[-predictobs,2])
S2v2 <- ts(S02[-predictobs,3])
summary(S2v1)
## Var02
## Min. : 7128800
## 1st Qu.: 27880300
## Median : 39767500
## Mean : 50633098
## 3rd Qu.: 59050900
## Max. :480879500
summary(S2v2)
## Var03
## Min. : 8.82
## 1st Qu.:11.82
## Median :13.76
## Mean :13.68
## 3rd Qu.:15.52
## Max. :38.28
## NA's :4
Var02 has no missing values and Var03 has 4 missing values.
autoplot(S2v1)
hist(scale(S2v1),br=100,xlim=c(0,100) )
boxplot(S2v1)
Looking at above plots fixed distribution assumption do not hold true, as histogram is not be bell-shaped, and the normal probability plot is not linear. it is right skewed. Box plot show many outliers. May be after suppressing outlier’s distribution plot will improve and series smoothens.
# interpreting NA values in our variable,
S2v2 <- na_interpolation(S2v2)
autoplot(S2v2)
hist(scale(S2v2),br=100,xlim=c(0,100) )
boxplot(S2v2)
Looking at above plots fixed distribution assumption holds good for V03, as histogram is bell-shaped, and the normal probability plot is linear. It has one outliers.
S02v2C <- tsclean(S2v1)
S02v3C <- S2v2
ggAcf(S02v2C)
ggAcf(S02v3C)
ACF plots for var02 & var03 shows Trend with insignificant seasonality.
fit_ets <- ets(S02v2C)
autoplot(fit_ets)
checkresiduals(fit_ets)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,A,N)
## Q* = 75.32, df = 6, p-value = 3.297e-14
##
## Model df: 4. Total lags used: 10
fit_ets %>% forecast(h=140) %>%
autoplot() +
ylab("Forecast for Var02")
#Var 03
fit_ets_V03 <- ets(S02v3C)
autoplot(fit_ets_V03)
checkresiduals(fit_ets_V03)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,N,N)
## Q* = 83.799, df = 8, p-value = 8.327e-15
##
## Model df: 2. Total lags used: 10
fit_ets_V03 %>% forecast(h=140) %>%
autoplot() +
ylab("Forecast for Var03")
arima_fit_v02 <- auto.arima(S02v2C)
summary(arima_fit_v02)
## Series: S02v2C
## ARIMA(1,1,1)
##
## Coefficients:
## ar1 ma1
## 0.5075 -0.9502
## s.e. 0.0283 0.0128
##
## sigma^2 estimated as 2.166e+14: log likelihood=-29053.65
## AIC=58113.31 AICc=58113.32 BIC=58129.48
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -421309.4 14704115 10777336 -8.965164 24.71449 0.9189302
## ACF1
## Training set -0.004111647
autoplot(forecast(arima_fit_v02, h=140))
checkresiduals(arima_fit_v02)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)
## Q* = 10.552, df = 8, p-value = 0.2284
##
## Model df: 2. Total lags used: 10
The auto.arima results with ARIMA(1,0,1) model with no drift ,
arima_fit_v03 <- auto.arima(S02v3C)
summary(arima_fit_v03)
## Series: S02v3C
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## -0.6687
## s.e. 0.0191
##
## sigma^2 estimated as 0.6075: log likelihood=-1895.94
## AIC=3795.87 AICc=3795.88 BIC=3806.66
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.005200808 0.7789411 0.2901101 -0.102729 2.13786 1.383264
## ACF1
## Training set 0.006273859
autoplot(forecast(arima_fit_v03, h=140))
checkresiduals(arima_fit_v03)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)
## Q* = 1.8347, df = 9, p-value = 0.9938
##
## Model df: 1. Total lags used: 10
A portmanteau test returns a large p-value 0.9938, also suggesting that the residuals are white noise. The ACF plot of the residuals from the ARIMA(0,1,1) model shows that all autocorrelations are within the threshold limits, indicating that the residuals are behaving like white noise
print(paste0("Accuracy for Var 02"))
## [1] "Accuracy for Var 02"
print(paste0("MAPE for S02 Var02 using ETS model ::: ", MLmetrics::MAPE(fit_ets$fitted,S02v2C)))
## [1] "MAPE for S02 Var02 using ETS model ::: 0.279104207788701"
print(paste0("MAPE for S02 Var02 using Auto ARIMA model ::: ", MLmetrics::MAPE(arima_fit_v02$fitted,S02v2C)))
## [1] "MAPE for S02 Var02 using Auto ARIMA model ::: 0.247144896628619"
print(paste0("Accuracy for Var 03"))
## [1] "Accuracy for Var 03"
print(paste0("MAPE for S02 Var03 using ETS model ::: ", MLmetrics::MAPE(fit_ets_V03$fitted,S02v3C)))
## [1] "MAPE for S02 Var03 using ETS model ::: 0.0156105029139132"
print(paste0("MAPE for S02 Var03 using Auto ARIMA model ::: ", MLmetrics::MAPE(arima_fit_v03$fitted,S02v3C)))
## [1] "MAPE for S02 Var03 using Auto ARIMA model ::: 0.0213786043674216"
Looking at MAPE we are using ARIMA for forcast of Var03.
Also looking at the residuals for both models variables is having constant variance and normal distrubution and also residuals are uncorrelated with nearly zero mean.The mean of the residuals is close to zero and there is no significant correlation in the residuals series.
fc <- forecast(arima_fit_v03, h=140)
fc$mean<-fc$mean
fc$upper<-fc$upper
fc$lower<-fc$lower
fc$x<-fc$x
#fc
write.csv(fc,"s02v03.csv")
fc_V02 <- forecast(arima_fit_v02, h=140)
fc_V02$mean<-fc_V02$mean
fc_V02$upper<-fc_V02$upper
fc_V02$lower<-fc_V02$lower
fc_V02$x<-fc_V02$x
#fc_V02
write.csv(fc_V02,"s02v02.csv")