PROBLEMA - SERIE GAS

library(car)
library(urca)
library(forecast)
library(tseries)
#library(FitAR)
library(ggfortify)
library(TSstudio)

Producción de gas mensual en Australia 1956-1995

data(gas)

ts_plot(gas)

ts_decompose(gas, type = "both")

#autoplot(gas)
#autoplot(stl(gas, s.window = 12))

summary(gas)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1646    2675   16788   21415   38628   66600

Transformación de los datos

z1<-forecast::BoxCox.lambda(ts(gas))
z1

## [1] -0.1204643

#FitAR::BoxCox.ts(gas)  ### lamda =0.12
z1 <- forecast::BoxCox(gas,-0.1204643)
ts_plot(z1)

ts_decompose(z1)

Identificación

par(mfrow=c(3,3))
plot(z1,type="o")
acf(z1, lag.max=40)
pacf(z1, lag.max=40)
plot(diff(z1),type="o")
abline(h=2*sqrt(var(diff(z1))),col="red",lty=2)
abline(h=-2*sqrt(var(diff(z1))),col="red",lty=2)
acf(diff(z1), lag.max=40)
pacf(diff(z1), lag.max=40)
plot(diff(z1,12),type="o")
acf(diff(z1,12), lag.max=40)
pacf(diff(z1,12), lag.max=40)

Test DF sobre la serie desestacionalizada

Dickey-Fuller

adf.test(z1)

## Warning in adf.test(z1): p-value greater than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  z1
## Dickey-Fuller = -0.2816, Lag order = 7, p-value = 0.99
## alternative hypothesis: stationary

pp.test(z1)

## 
##  Phillips-Perron Unit Root Test
## 
## data:  z1
## Dickey-Fuller Z(alpha) = -24.673, Truncation lag parameter = 5, p-value
## = 0.02566
## alternative hypothesis: stationary

De acuerdo a la prueba de Dickey- Fuller el valor p obtenido fue 0.9586 por lo que no es posible rechazar la hipotesis nula y diremos no es estacionaria

Dickey Fuller con una diferencia

adf.test(diff(z1))  # estacionaria

## Warning in adf.test(diff(z1)): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(z1)
## Dickey-Fuller = -17.266, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary

pp.test(diff(z1))

## Warning in pp.test(diff(z1)): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff(z1)
## Dickey-Fuller Z(alpha) = -284.81, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary

Los resultados de la serie anterior nos llevan a concluir que las serie con una diferencia ordinaria es estacionaria

D-F y PP sobre la serie desestacionalizada

adf.test(diff(z1,12))

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(z1, 12)
## Dickey-Fuller = -3.4699, Lag order = 7, p-value = 0.04535
## alternative hypothesis: stationary

pp.test(diff(z1,12))

## Warning in pp.test(diff(z1, 12)): p-value smaller than printed p-value

## 
##  Phillips-Perron Unit Root Test
## 
## data:  diff(z1, 12)
## Dickey-Fuller Z(alpha) = -37.479, Truncation lag parameter = 5, p-value
## = 0.01
## alternative hypothesis: stationary

Las pruebas de Dickey Fuller y Phillips Perron nos indican que la serie z1 con una diferencia estacional es estacionaria

adf.test(diff(diff(z1,12)))

## Warning in adf.test(diff(diff(z1, 12))): p-value smaller than printed p-value

## 
##  Augmented Dickey-Fuller Test
## 
## data:  diff(diff(z1, 12))
## Dickey-Fuller = -7.7054, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary

par(mfrow=c(4,3))
plot(z1,type="o")
acf(z1, lag.max=40)
pacf(z1, lag.max=40)
plot(diff(z1),type="o")
abline(h=2*sqrt(var(diff(z1))),col="red",lty=2)
abline(h=-2*sqrt(var(diff(z1))),col="red",lty=2)
acf(diff(z1), lag.max=40)
pacf(diff(z1), lag.max=40)
plot(diff(z1,12),type="o")
acf(diff(z1,12), lag.max=40)
pacf(diff(z1,12), lag.max=40)
plot(diff(diff(z1,12)),type="o")
abline(h=2*sqrt(var(diff(diff(z1,12)))),col="red",lty=2)
abline(h=-2*sqrt(var(diff(diff(z1,12)))),col="red",lty=2)
acf(diff(diff(z1,12)), lag.max=40)
pacf(diff(diff(z1,12)), lag.max=40)

ts_plot(diff(diff(z1,12)))

ts_cor(diff(diff(z1,12)), lag.max = 40)

SARIMA(0,1,1)(0,1,1) BIC -347.7133 Lags:1,1

SARIMA(0,1,5)(0,1,1) BIC -329.9128 Lags:1:5,1

SARIMA(0,1,5)(0,1,1) BIC -341.6644 Lags:1,3,5;1

SARIMA(0,1,5)(0,1,1) BIC -345.9592 Lags:1,5;1

SARIMA(0,1,11)(0,1,1) BIC -344.8457 Lags:1,5, 10, 11;1

SARIMA(0,1,11)(0,1,1) BIC -349.3178 Lags:1,5, 11;1

SARIMA(0,1,16)(0,1,1) BIC -344.7575 Lags:1,5,11,16;1

SARIMA(16,1,11)(0,1,1) BIC -347.3572 Lags:16;1,5,11;1

SARIMA(16,1,19)(0,1,1) BIC -342.4163 Lags:16;1,5,11,18,19;1

SARIMA(19,1,18)(0,1,1) BIC -342.1835 Lags:16,19;1,5,11,18;1

SARIMA(16,1,18)(0,1,1) BIC -347.9725 Lags:16;1,5,11,18;1

Ajuste del Modelo

#modelo1<-stats::arima(z1,
                      #order=c(16,1,18), 
                      #seasonal=list(order=c(0,1,1),
                      #              period=12), 
                      #fixed=c(rep(0,15),NA,NA,0,0,0,NA,0,0,0,0,0,NA,0,0,0,0,0,0,NA,NA)) 
#modelo1

#tt <- modelo1$coef[which(modelo1$coef!=0)]/sqrt(diag(modelo1$var.coef))
#1 - pt(abs(tt),(modelo1$nobs - length(modelo1$coef[which(modelo1$coef!=0)])))
#BIC(modelo1)

modelo1<-stats::arima(z1,
                      order=c(11,1,0), 
                      seasonal=list(order=c(0,1,1),
                                    period=12), 
                      fixed=c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA)) 
modelo1

## 
## Call:
## stats::arima(x = z1, order = c(11, 1, 0), seasonal = list(order = c(0, 1, 1), 
##     period = 12), fixed = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4     ar5      ar6      ar7      ar8
##       -0.2815  -0.0154  -0.0378  -0.0476  0.0422  -0.0408  -0.0227  -0.0684
## s.e.   0.0489   0.0495   0.0483   0.0474  0.0477   0.0500   0.0489   0.0480
##          ar9    ar10    ar11     sma1
##       0.0084  0.0976  0.1627  -0.7305
## s.e.  0.0478  0.0479  0.0462   0.0507
## 
## sigma^2 estimated as 0.0002642:  log likelihood = 1245.16,  aic = -2464.32

tt <- modelo1$coef[which(modelo1$coef!=0)]/sqrt(diag(modelo1$var.coef))
1 - pt(abs(tt),(modelo1$nobs - length(modelo1$coef[which(modelo1$coef!=0)])))

##          ar1          ar2          ar3          ar4          ar5          ar6 
## 7.901425e-09 3.776986e-01 2.169212e-01 1.578721e-01 1.884207e-01 2.071925e-01 
##          ar7          ar8          ar9         ar10         ar11         sma1 
## 3.210333e-01 7.741953e-02 4.303488e-01 2.096597e-02 2.390366e-04 0.000000e+00

BIC(modelo1)

## [1] -2410.531

modelo1<-stats::arima(z1,
                      order=c(11,1,0), 
                      seasonal=list(order=c(0,1,1),
                                    period=12), 
                      fixed=c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA)) 
modelo1

## 
## Call:
## stats::arima(x = z1, order = c(11, 1, 0), seasonal = list(order = c(0, 1, 1), 
##     period = 12), fixed = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA))
## 
## Coefficients:
##           ar1      ar2      ar3      ar4     ar5      ar6      ar7      ar8
##       -0.2815  -0.0154  -0.0378  -0.0476  0.0422  -0.0408  -0.0227  -0.0684
## s.e.   0.0489   0.0495   0.0483   0.0474  0.0477   0.0500   0.0489   0.0480
##          ar9    ar10    ar11     sma1
##       0.0084  0.0976  0.1627  -0.7305
## s.e.  0.0478  0.0479  0.0462   0.0507
## 
## sigma^2 estimated as 0.0002642:  log likelihood = 1245.16,  aic = -2464.32

tt <- modelo1$coef[which(modelo1$coef!=0)]/sqrt(diag(modelo1$var.coef))
1 - pt(abs(tt),(modelo1$nobs - length(modelo1$coef[which(modelo1$coef!=0)])))

##          ar1          ar2          ar3          ar4          ar5          ar6 
## 7.901425e-09 3.776986e-01 2.169212e-01 1.578721e-01 1.884207e-01 2.071925e-01 
##          ar7          ar8          ar9         ar10         ar11         sma1 
## 3.210333e-01 7.741953e-02 4.303488e-01 2.096597e-02 2.390366e-04 0.000000e+00

BIC(modelo1)

## [1] -2410.531

Diagnóstico

et<-residuals(modelo1)
x1.fit <- fitted(modelo1)

par(mfrow=c(3,2))
plot(z1,type="l",lty=2)
lines(x1.fit,type="l",col="red")
plot(scale(et),type="l",main="Residuales")
abline(h=2*sqrt(var(scale(et))),col="red",lty=2)
abline(h=-2*sqrt(var(scale(et))),col="red",lty=2)
acf(et)
pacf(et)
qqPlot(scale(et))

## [1] 170 447

acf(abs(et)) #Mide Estructura Heteroced?stica

Test de Autocorrelacion de Ljung-Box

\(H_0\): \(r_1=r_2=r_3=...=r_{lag}=0\)

\(H_a\): Al menos una es diferente de cero

#autoplot(modelo1)

?tsdiag

tsdiag(modelo1, gof.lag=20)

#LBQPlot(et)

Box.test(et,lag=20,type="Ljung-Box")

## 
##  Box-Ljung test
## 
## data:  et
## X-squared = 17.802, df = 20, p-value = 0.6005

Test de Normalidad basado en Sesgo y Curtosis

\(H_0=\): Datos provienen de una Dist. Normal

\(H_a\): Los datos no provienen de una Dist. Normal

jarque.bera.test(et)

## 
##  Jarque Bera Test
## 
## data:  et
## X-squared = 382.66, df = 2, p-value < 2.2e-16

Test de Aleatoriedad

\(H_0:\) Residuales exhiben un comport. de Aleatoriedad \(H_a:\) Residuales no exhiben estructura (Tendencia, o cualquier otro comportamiento predecible)

runs.test(as.factor(sign(et)), alternative="two.sided")

## 
##  Runs Test
## 
## data:  as.factor(sign(et))
## Standard Normal = 1.0195, p-value = 0.308
## alternative hypothesis: two.sided

Pronóstico Fuera Muestra

plot(forecast(modelo1,h=12, fan=T))
lines(fitted(modelo1), col="red")

inversa de boxcox

lambda <- -0.12 
predt <- forecast(modelo1,h=12)
z2inv <- forecast::InvBoxCox(predt$mean,lambda)
z2inv.li <- forecast::InvBoxCox(predt$lower[,2],lambda) #Linf IC 95%
z2inv.ls <- forecast::InvBoxCox(predt$upper[,2],lambda) #lsUP ic 95%
z1.fit <- forecast::InvBoxCox(x1.fit,lambda)

plot(ts(c(gas,z2inv),start=c(1956,1),freq=12), type="l", col="blue", lwd=2, 
     main="Pronóstico h=12 Pasos al Frente Gas",
     xlab="Anual",
     ylab="", 
     ylim=c(min(gas,z2inv.li,z2inv.ls),max(gas,z2inv.li,z2inv.ls)))
polygon(c(time(z2inv.li),rev(time(z2inv.ls))),
        c(z2inv.li,rev(z2inv.ls)),
        col="gray", border=NA)
lines(z2inv, type="b", col="blue", lwd=2) 
lines(z1.fit, type="l", col="red", lty=2, lwd=3)

z3<-ts(tail(z1.fit,n=10))
plot(z3)

predt

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Sep 1995       6.071976 6.051145 6.092807 6.040117 6.103834
## Oct 1995       6.045016 6.019366 6.070666 6.005787 6.084244
## Nov 1995       6.022036 5.991647 6.052425 5.975560 6.068512
## Dec 1995       5.995130 5.961141 6.029119 5.943148 6.047112
## Jan 1996       5.988190 5.951144 6.025235 5.931533 6.044846
## Feb 1996       5.995504 5.955152 6.035856 5.933791 6.057218
## Mar 1996       6.009886 5.966998 6.052775 5.944294 6.075478
## Apr 1996       6.027967 5.982592 6.073342 5.958572 6.097362
## May 1996       6.074618 6.027322 6.121913 6.002285 6.146950
## Jun 1996       6.094886 6.045471 6.144301 6.019312 6.170460
## Jul 1996       6.107882 6.055905 6.159858 6.028390 6.187373
## Aug 1996       6.099517 6.044368 6.154666 6.015174 6.183860

Ejemplo Serie gas

Wilson Sandoval Rodriguez

2022-09-10