library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(forecast)
y <- read.csv('https://raw.githubusercontent.com/mariocastro73/ML2020-2021/master/datasets/elec-demand.csv')
y <- ts(y$x,start=1975,frequency = 12)
head(y,30)
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 1975  7621.422  7715.866  8767.733  7917.311  9046.280  9253.413  9875.043
## 1976  8376.159  7889.076  8612.186  8252.772  8849.933  9697.925 10411.592
## 1977  7843.597  8089.115  8452.316  8398.840  8950.342  9768.560          
##            Aug       Sep       Oct       Nov       Dec
## 1975  9580.643  8889.871  8486.199  8433.107  8211.550
## 1976  9618.656  9424.734  8729.287  8676.467  8359.750
## 1977
ggtsdisplay(y) # Seasonal differencing is mandatory

# ggtsdisplay(y,lag=200)# Let's do a zoom
BoxCox.lambda(y) # Close to 1, so do nothing
## [1] 0.9760307
library(tseries)
adf.test(y)
## Warning in adf.test(y): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y
## Dickey-Fuller = -6.1676, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
ndiffs(y)
## [1] 1
y.sdiff <- diff(y, lag = 12, differences = 1)
ggtsdisplay(y.sdiff)# Requires regular differencing

ndiffs(y.sdiff)
## [1] 1
y.rdiff <- diff(y, lag = 1, differences = 1)
ggtsdisplay(y.rdiff,lag=48) # Requires seasonal differencing

y.rdiff.sdiff <- diff(y.rdiff, lag = 12, differences = 1) 
ggtsdisplay(y.rdiff.sdiff) # Sweet!

### Part II

# Fitting
arima.fit <- Arima(y, order=c(0,1,0),
                   seasonal = list(order=c(0,1,1),period=12),
                   lambda = NULL,
                   include.constant = TRUE)
arima.fit
## Series: y 
## ARIMA(0,1,0)(0,1,1)[12] 
## 
## Coefficients:
##          sma1
##       -0.2272
## s.e.   0.0974
## 
## sigma^2 estimated as 165911:  log likelihood=-809.52
## AIC=1623.05   AICc=1623.16   BIC=1628.43
ggtsdisplay(arima.fit$residuals)

ggtsdisplay(arima.fit$residuals)

ggtsdisplay(arima.fit$residuals,lag=36)

arima.fit2 <- Arima(y, order=c(0,1,1),
                   seasonal = list(order=c(0,1,1),period=12),
                   lambda = NULL, 
                   include.constant = TRUE)
AIC(arima.fit,arima.fit2)
ggtsdisplay(arima.fit2$residuals)

ggtsdisplay(arima.fit2$residuals,lag=80)

autoplot(arima.fit)

autoplot(arima.fit2)

summary(arima.fit)
## Series: y 
## ARIMA(0,1,0)(0,1,1)[12] 
## 
## Coefficients:
##          sma1
##       -0.2272
## s.e.   0.0974
## 
## sigma^2 estimated as 165911:  log likelihood=-809.52
## AIC=1623.05   AICc=1623.16   BIC=1628.43
## 
## Training set error measures:
##                     ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -4.850103 383.2384 291.5163 -0.1083654 2.780272 0.5715697
##                    ACF1
## Training set -0.5854456
summary(arima.fit2)
## Series: y 
## ARIMA(0,1,1)(0,1,1)[12] 
## 
## Coefficients:
##           ma1     sma1
##       -0.7636  -0.2656
## s.e.   0.0705   0.0955
## 
## sigma^2 estimated as 98515:  log likelihood=-781.18
## AIC=1568.35   AICc=1568.58   BIC=1576.43
## 
## Training set error measures:
##                     ME     RMSE      MAE         MPE     MAPE      MASE
## Training set 0.7373329 293.9437 218.1653 -0.04179531 2.063443 0.4277518
##                     ACF1
## Training set -0.06099511
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
coeftest(arima.fit)
## 
## z test of coefficients:
## 
##       Estimate Std. Error z value Pr(>|z|)  
## sma1 -0.227228   0.097426 -2.3323  0.01968 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
coeftest(arima.fit2)
## 
## z test of coefficients:
## 
##       Estimate Std. Error  z value  Pr(>|z|)    
## ma1  -0.763564   0.070543 -10.8240 < 2.2e-16 ***
## sma1 -0.265649   0.095497  -2.7818  0.005407 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
autoplot(y) +
  autolayer(arima.fit2$fitted,series="Fit")

library(ggplot2)
df <- data.frame(y=y,x=arima.fit2$fitted)
ggplot(df,aes(x=x,y=y)) + geom_point() + 
  geom_smooth(method='lm',formula=y~x)
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.
## Don't know how to automatically pick scale for object of type ts. Defaulting to continuous.

Box.test(arima.fit2$residuals)
## 
##  Box-Pierce test
## 
## data:  arima.fit2$residuals
## X-squared = 0.45389, df = 1, p-value = 0.5005
forecast(arima.fit2,h=12)
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Mar 1985       12628.04 12225.80 13030.28 12012.86 13243.22
## Apr 1985       12835.13 12421.80 13248.46 12202.99 13467.27
## May 1985       12836.46 12412.33 13260.60 12187.81 13485.12
## Jun 1985       13277.09 12842.42 13711.75 12612.32 13941.85
## Jul 1985       13856.32 13411.37 14301.26 13175.83 14536.81
## Aug 1985       14433.55 13978.55 14888.54 13737.69 15129.41
## Sep 1985       14640.12 14175.29 15104.95 13929.22 15351.02
## Oct 1985       14083.95 13609.49 14558.41 13358.32 14809.57
## Nov 1985       13047.59 12563.69 13531.49 12307.53 13787.65
## Dec 1985       13121.34 12628.18 13614.49 12367.12 13875.55
## Jan 1986       12812.97 12310.72 13315.21 12044.85 13581.08
## Feb 1986       13040.43 12529.26 13551.60 12258.66 13822.19