Series S02

Import the necessary libraries

knitr::opts_chunk$set(echo = TRUE)

library(fpp2)
## Warning: package 'fpp2' was built under R version 3.5.3
## Loading required package: ggplot2
## Loading required package: forecast
## Warning: package 'forecast' was built under R version 3.5.3
## Loading required package: fma
## Warning: package 'fma' was built under R version 3.5.3
## Loading required package: expsmooth
## Warning: package 'expsmooth' was built under R version 3.5.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(imputeTS)
## Warning: package 'imputeTS' was built under R version 3.5.3
library(urca)
## Warning: package 'urca' was built under R version 3.5.3
library(MLmetrics)
## Warning: package 'MLmetrics' was built under R version 3.5.3
## 
## Attaching package: 'MLmetrics'
## The following object is masked from 'package:base':
## 
##     Recall

Load the dataset

data_project <- readxl::read_excel("./project1data/Data Set for class.xls")
head(data_project)

Series 02 Subset

S02 <- subset(data_project, group == 'S02', select = c(SeriesInd, Var02, Var03))
head(S02)

Exploratory Analysis of variables in Series S02

predictobs <- 1623:1762
S2 <- ts(S02[-predictobs, 2:3])

S2v1 <- ts(S02[-predictobs,2])
S2v2 <- ts(S02[-predictobs,3])


summary(S2v1)
##      Var02          
##  Min.   :  7128800  
##  1st Qu.: 27880300  
##  Median : 39767500  
##  Mean   : 50633098  
##  3rd Qu.: 59050900  
##  Max.   :480879500
summary(S2v2)
##      Var03      
##  Min.   : 8.82  
##  1st Qu.:11.82  
##  Median :13.76  
##  Mean   :13.68  
##  3rd Qu.:15.52  
##  Max.   :38.28  
##  NA's   :4

Var02 has no missing values and Var03 has 4 missing values.

Exploratory Analysis of Var02 in series 2

autoplot(S2v1)

hist(scale(S2v1),br=100,xlim=c(0,100) )

boxplot(S2v1)

Looking at above plots fixed distribution assumption do not hold true, as histogram is not be bell-shaped, and the normal probability plot is not linear. it is right skewed. Box plot show many outliers. May be after suppressing outlier’s distribution plot will improve and series smoothens.

Exploratory Analysis of Var03 in series 2

# interpreting NA values in our variable,
S2v2 <- na_interpolation(S2v2)  


autoplot(S2v2)

hist(scale(S2v2),br=100,xlim=c(0,100) )

boxplot(S2v2)

Looking at above plots fixed distribution assumption holds good for V03, as histogram is bell-shaped, and the normal probability plot is linear. It has one outliers.

Suppressing Outlier for var02

S02v2C <- tsclean(S2v1)

S02v3C <- S2v2

ACF of Var02 and Var03

ggAcf(S02v2C)

ggAcf(S02v3C)

ACF plots for var02 & var03 shows Trend with insignificant seasonality.

Applying Models

ETS model to Var02

fit_ets <- ets(S02v2C)
autoplot(fit_ets)

checkresiduals(fit_ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,A,N)
## Q* = 75.32, df = 6, p-value = 3.297e-14
## 
## Model df: 4.   Total lags used: 10
fit_ets %>% forecast(h=140) %>%
  autoplot() +
  ylab("Forecast for Var02")

ETS model to Var03

#Var 03
fit_ets_V03 <- ets(S02v3C)
autoplot(fit_ets_V03)

checkresiduals(fit_ets_V03)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,N,N)
## Q* = 83.799, df = 8, p-value = 8.327e-15
## 
## Model df: 2.   Total lags used: 10
fit_ets_V03 %>% forecast(h=140) %>%
  autoplot() +
  ylab("Forecast for Var03")

Applying ARIMA Model to Var02.

arima_fit_v02 <- auto.arima(S02v2C)

summary(arima_fit_v02)
## Series: S02v2C 
## ARIMA(1,1,1) 
## 
## Coefficients:
##          ar1      ma1
##       0.5075  -0.9502
## s.e.  0.0283   0.0128
## 
## sigma^2 estimated as 2.166e+14:  log likelihood=-29053.65
## AIC=58113.31   AICc=58113.32   BIC=58129.48
## 
## Training set error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -421309.4 14704115 10777336 -8.965164 24.71449 0.9189302
##                      ACF1
## Training set -0.004111647
autoplot(forecast(arima_fit_v02, h=140))

checkresiduals(arima_fit_v02)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1)
## Q* = 10.552, df = 8, p-value = 0.2284
## 
## Model df: 2.   Total lags used: 10

The auto.arima results with ARIMA(1,0,1) model with no drift ,

Arima model for Var 03

arima_fit_v03 <- auto.arima(S02v3C)

summary(arima_fit_v03)
## Series: S02v3C 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.6687
## s.e.   0.0191
## 
## sigma^2 estimated as 0.6075:  log likelihood=-1895.94
## AIC=3795.87   AICc=3795.88   BIC=3806.66
## 
## Training set error measures:
##                       ME      RMSE       MAE       MPE    MAPE     MASE
## Training set 0.005200808 0.7789411 0.2901101 -0.102729 2.13786 1.383264
##                     ACF1
## Training set 0.006273859
autoplot(forecast(arima_fit_v03, h=140))

checkresiduals(arima_fit_v03)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)
## Q* = 1.8347, df = 9, p-value = 0.9938
## 
## Model df: 1.   Total lags used: 10

A portmanteau test returns a large p-value 0.9938, also suggesting that the residuals are white noise. The ACF plot of the residuals from the ARIMA(0,1,1) model shows that all autocorrelations are within the threshold limits, indicating that the residuals are behaving like white noise

MAPE Calculation:

print(paste0("Accuracy for Var 02"))
## [1] "Accuracy for Var 02"
print(paste0("MAPE for S02 Var02 using ETS model           :::   ",  MLmetrics::MAPE(fit_ets$fitted,S02v2C)))
## [1] "MAPE for S02 Var02 using ETS model           :::   0.279104207788701"
print(paste0("MAPE for S02 Var02 using Auto ARIMA model    :::   ",  MLmetrics::MAPE(arima_fit_v02$fitted,S02v2C)))
## [1] "MAPE for S02 Var02 using Auto ARIMA model    :::   0.247144896628619"
print(paste0("Accuracy for Var 03"))
## [1] "Accuracy for Var 03"
print(paste0("MAPE for S02 Var03 using ETS model           :::   ", MLmetrics::MAPE(fit_ets_V03$fitted,S02v3C)))
## [1] "MAPE for S02 Var03 using ETS model           :::   0.0156105029139132"
print(paste0("MAPE for S02 Var03 using Auto ARIMA model    :::   ", MLmetrics::MAPE(arima_fit_v03$fitted,S02v3C)))
## [1] "MAPE for S02 Var03 using Auto ARIMA model    :::   0.0213786043674216"

Looking at MAPE we are using ARIMA for forcast of Var03.

Also looking at the residuals for both models variables is having constant variance and normal distrubution and also residuals are uncorrelated with nearly zero mean.The mean of the residuals is close to zero and there is no significant correlation in the residuals series.

Writing forcast of V03 to csv

fc <- forecast(arima_fit_v03, h=140)
fc$mean<-fc$mean
fc$upper<-fc$upper
fc$lower<-fc$lower
fc$x<-fc$x

#fc

write.csv(fc,"s02v03.csv")

fc_V02 <- forecast(arima_fit_v02, h=140)
fc_V02$mean<-fc_V02$mean
fc_V02$upper<-fc_V02$upper
fc_V02$lower<-fc_V02$lower
fc_V02$x<-fc_V02$x

#fc_V02

write.csv(fc_V02,"s02v02.csv")