library(rdatamarket)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(forecast)
## Warning: package 'forecast' was built under R version 3.4.4
library(tseries)
## Warning: package 'tseries' was built under R version 3.4.4
a. IMPORT AND CLEAN THE DATA SET
x<-ts(rdatamarket::dmseries("https://datamarket.com/data/set/24kw/monthly-average-residential-electricity-price#!ds=24kw!2r02=2&display=line
")[,1],start(1976,07), frequency=12)
#90%of data in train, rest 10%in test
train<-ts(x[1:427],frequency = 12)
test<-ts(x[427:474],frequency = 12)
plot the data set
#There's a downward trend in 26th years, that was in year 2002, and the plot shows there is seaonsal.
plot(train)#it is a seasonal plot

#check stationary and autocorreltation
#to start a stationary test and the test shows data is non stationary. And there is autocorrelation in the data.
adf.test(train, alternative="stationary",k=12)
##
## Augmented Dickey-Fuller Test
##
## data: train
## Dickey-Fuller = -1.0106, Lag order = 12, p-value = 0.9365
## alternative hypothesis: stationary
#first look of acf and pacf
acf(train) #check acf to check stationary, it is non stationary data

pacf(train)

b. Identify my ARIMA MODEL
#I take out the seasonal differecning to make data stationary.
#first seasonal difference
diffx=diff(train,12)
plot(diffx)

acf(diffx)

pacf(diffx)

#first difference
diffx1=diff(diffx)
plot(diffx1)

acf(diffx1)

pacf(diffx1)

#at this time, the data looks stationary after one time differneces, so d=1
#by looking at graph acf and pacf, there is a single spike at lag 1 in both acf and pacf graph, then it is a MA(1) signature, and p=0,q=1
#for the seasonal term, after differening, there is spike at lag 1, so q=1,p=1
fit1<-arima(train, order=c(0,1,1), seasonal =c(1,1,1))
fit1
##
## Call:
## arima(x = train, order = c(0, 1, 1), seasonal = c(1, 1, 1))
##
## Coefficients:
## ma1 sar1 sma1
## -0.2054 -0.0362 -0.7701
## s.e. 0.0563 0.0697 0.0466
##
## sigma^2 estimated as 0.03625: log likelihood = 93.51, aic = -179.02
tsdisplay(residuals(fit1))

##c. check residual, white noise
Box.test(fit1$residuals, type="Ljung-Box")
##
## Box-Ljung test
##
## data: fit1$residuals
## X-squared = 0.2912, df = 1, p-value = 0.5895
acf(fit1$residuals)

pacf(fit1$residuals)

checkresiduals(fit1)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)(1,1,1)[12]
## Q* = 41.917, df = 21, p-value = 0.00431
##
## Model df: 3. Total lags used: 24
#By checking acf graph, there are majority not exceed the blue line, so it is white noise
#using Ljung-box test to verify that p value=0.5895>0.05,it does reject, so it proves that my guess is right, residuals are white noise
d. forecasting using ARIMA for next four years
fcastmyarima=forecast(fit1, h=48)
plot(fcastmyarima)

#the forecasting graph looks good, it follows the trends.
e. Identify my ETS model
#first I try "ZZZ" model, then, I try "MAN"model, to compare their aic, and model"ZZZ"has lower aic, so I choose to use "ZZZ" model
ets1=ets(train,model="ZZZ")
ets1#aic=1141.389
## ETS(M,Ad,M)
##
## Call:
## ets(y = train, model = "ZZZ")
##
## Smoothing parameters:
## alpha = 0.7288
## beta = 0.0082
## gamma = 1e-04
## phi = 0.9765
##
## Initial states:
## l = 15.5366
## b = 0.0663
## s = 1.0415 1.0218 0.9928 0.9691 0.9458 0.9351
## 0.9537 0.9897 1.024 1.0373 1.0446 1.0447
##
## sigma: 0.0132
##
## AIC AICc BIC
## 1141.389 1143.065 1214.411
ets2=ets(train,model= "MAN")
ets2#aic=1165.419
## ETS(M,A,N)
##
## Call:
## ets(y = train, model = "MAN")
##
## Smoothing parameters:
## alpha = 0.9618
## beta = 0.8467
##
## Initial states:
## l = 16.9091
## b = -0.6871
##
## sigma: 0.0247
##
## AIC AICc BIC
## 1665.419 1665.562 1685.703
f. check residuals white noise?
Box.test(ets1$residuals, type="Ljung-Box")
##
## Box-Ljung test
##
## data: ets1$residuals
## X-squared = 0.50233, df = 1, p-value = 0.4785
acf(ets1$residuals)

pacf(ets1$residuals)

checkresiduals(ets1)

##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,M)
## Q* = 31.654, df = 7, p-value = 4.707e-05
##
## Model df: 17. Total lags used: 24
#by looking at acf, most lags are within 0.05 range, so it has white noise
#using Ljung-Box test, p =0.4785>0.05, so residuals are white noise
g. ets forecasting
fcastmyets=forecast(ets1, h=48)
plot(fcastmyets)

#the forecasting graph looks good.
h. compare ets and arima
accarima=accuracy(fcastmyarima,test[1:12])
accets=accuracy(fcastmyets,test[1:12])
#compare two accuracy
accarima
## ME RMSE MAE MPE MAPE
## Training set -0.001199607 0.1874845 0.1362785 -0.0001672381 0.993814
## Test set -0.386116524 0.4564924 0.3861165 -3.1154627917 3.115463
## MASE ACF1
## Training set 0.4763421 0.02602294
## Test set 1.3496153 NA
accets
## ME RMSE MAE MPE MAPE
## Training set -0.01208133 0.1812985 0.1331199 -0.08749404 0.9644755
## Test set -0.40651829 0.5013798 0.4184103 -3.27018885 3.3695008
## MASE ACF1
## Training set 0.4653016 0.05261198
## Test set 1.4624937 NA
#By comparing ME, RMSE, MAE, MPE, MAPE, MASE, ARIMA model has smaller errors, so ARIMA model is better than ETS model