hw1 chapter 8

library(rdatamarket)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library(forecast)

## Warning: package 'forecast' was built under R version 3.4.4

library(tseries)

## Warning: package 'tseries' was built under R version 3.4.4

a. IMPORT AND CLEAN THE DATA SET

x<-ts(rdatamarket::dmseries("https://datamarket.com/data/set/24kw/monthly-average-residential-electricity-price#!ds=24kw!2r02=2&display=line
")[,1],start(1976,07), frequency=12)
#90%of data in train, rest 10%in test 
train<-ts(x[1:427],frequency = 12)
test<-ts(x[427:474],frequency = 12)

plot the data set

#There's a downward trend in 26th years, that was in year 2002, and the plot shows there is seaonsal. 
plot(train)#it is a seasonal plot

#check stationary and autocorreltation
#to start a stationary test and the test shows data is non stationary. And there is autocorrelation in the data.
adf.test(train, alternative="stationary",k=12)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  train
## Dickey-Fuller = -1.0106, Lag order = 12, p-value = 0.9365
## alternative hypothesis: stationary

#first look of acf and pacf
acf(train) #check acf to check stationary, it is non stationary data

pacf(train)

b. Identify my ARIMA MODEL

#I take out the seasonal differecning to make data stationary. 
#first seasonal difference 
diffx=diff(train,12)
plot(diffx)

acf(diffx)

pacf(diffx)

#first difference 
diffx1=diff(diffx)
plot(diffx1)

acf(diffx1)

pacf(diffx1)

#at this time, the data looks stationary after one time differneces, so d=1 
#by looking at graph acf and pacf, there is a single spike at lag 1 in both acf and pacf graph, then it is a MA(1) signature, and p=0,q=1
#for the seasonal term, after differening, there is spike at lag 1, so q=1,p=1
fit1<-arima(train, order=c(0,1,1), seasonal =c(1,1,1))
fit1

## 
## Call:
## arima(x = train, order = c(0, 1, 1), seasonal = c(1, 1, 1))
## 
## Coefficients:
##           ma1     sar1     sma1
##       -0.2054  -0.0362  -0.7701
## s.e.   0.0563   0.0697   0.0466
## 
## sigma^2 estimated as 0.03625:  log likelihood = 93.51,  aic = -179.02

tsdisplay(residuals(fit1))

##c. check residual, white noise
Box.test(fit1$residuals, type="Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  fit1$residuals
## X-squared = 0.2912, df = 1, p-value = 0.5895

acf(fit1$residuals)

pacf(fit1$residuals)

checkresiduals(fit1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)(1,1,1)[12]
## Q* = 41.917, df = 21, p-value = 0.00431
## 
## Model df: 3.   Total lags used: 24

#By checking acf graph, there are majority not exceed the blue line, so it is white noise
#using Ljung-box test to verify that p value=0.5895>0.05,it does reject, so it proves that my guess is right, residuals are white noise

d. forecasting using ARIMA for next four years

fcastmyarima=forecast(fit1, h=48)
plot(fcastmyarima)

#the forecasting graph looks good, it follows the trends.

e. Identify my ETS model

#first I try "ZZZ" model, then, I try "MAN"model, to compare their aic, and model"ZZZ"has lower aic, so I choose to use "ZZZ" model
ets1=ets(train,model="ZZZ")
ets1#aic=1141.389

## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = train, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.7288 
##     beta  = 0.0082 
##     gamma = 1e-04 
##     phi   = 0.9765 
## 
##   Initial states:
##     l = 15.5366 
##     b = 0.0663 
##     s = 1.0415 1.0218 0.9928 0.9691 0.9458 0.9351
##            0.9537 0.9897 1.024 1.0373 1.0446 1.0447
## 
##   sigma:  0.0132
## 
##      AIC     AICc      BIC 
## 1141.389 1143.065 1214.411

ets2=ets(train,model= "MAN")
ets2#aic=1165.419

## ETS(M,A,N) 
## 
## Call:
##  ets(y = train, model = "MAN") 
## 
##   Smoothing parameters:
##     alpha = 0.9618 
##     beta  = 0.8467 
## 
##   Initial states:
##     l = 16.9091 
##     b = -0.6871 
## 
##   sigma:  0.0247
## 
##      AIC     AICc      BIC 
## 1665.419 1665.562 1685.703

f. check residuals white noise?

Box.test(ets1$residuals, type="Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  ets1$residuals
## X-squared = 0.50233, df = 1, p-value = 0.4785

acf(ets1$residuals)

pacf(ets1$residuals)

checkresiduals(ets1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,Ad,M)
## Q* = 31.654, df = 7, p-value = 4.707e-05
## 
## Model df: 17.   Total lags used: 24

#by looking at acf, most lags are within 0.05 range, so it has white noise
#using Ljung-Box test, p =0.4785>0.05, so residuals are white noise

g. ets forecasting

fcastmyets=forecast(ets1, h=48)
plot(fcastmyets)

#the forecasting graph looks good.

h. compare ets and arima

accarima=accuracy(fcastmyarima,test[1:12])
accets=accuracy(fcastmyets,test[1:12])
#compare two accuracy
accarima

##                        ME      RMSE       MAE           MPE     MAPE
## Training set -0.001199607 0.1874845 0.1362785 -0.0001672381 0.993814
## Test set     -0.386116524 0.4564924 0.3861165 -3.1154627917 3.115463
##                   MASE       ACF1
## Training set 0.4763421 0.02602294
## Test set     1.3496153         NA

accets

##                       ME      RMSE       MAE         MPE      MAPE
## Training set -0.01208133 0.1812985 0.1331199 -0.08749404 0.9644755
## Test set     -0.40651829 0.5013798 0.4184103 -3.27018885 3.3695008
##                   MASE       ACF1
## Training set 0.4653016 0.05261198
## Test set     1.4624937         NA

#By comparing ME, RMSE, MAE, MPE, MAPE, MASE, ARIMA model has smaller errors, so ARIMA model is better than ETS model

hw1 chapter 8

Jingwen Zhang

a. IMPORT AND CLEAN THE DATA SET

plot the data set

b. Identify my ARIMA MODEL

d. forecasting using ARIMA for next four years

e. Identify my ETS model

f. check residuals white noise?

g. ets forecasting

h. compare ets and arima