library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
library(tibble)
## Warning: package 'tibble' was built under R version 4.2.3
library(tsibble)
## Warning: package 'tsibble' was built under R version 4.2.3
library(zoo)
## Warning: package 'zoo' was built under R version 4.2.3
library(timetk)
## Warning: package 'timetk' was built under R version 4.2.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.2.3
library(DT)
## Warning: package 'DT' was built under R version 4.2.3
library(readxl)
library(ggplot2)
library(forecast)
## Warning: package 'forecast' was built under R version 4.2.3
library(MASS)
## Warning: package 'MASS' was built under R version 4.2.3
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.2.3
library(VGAM)
## Warning: package 'VGAM' was built under R version 4.2.3
library(car)
## Warning: package 'car' was built under R version 4.2.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
library(timetk)
library(tibble)
library(zoo)
library(tsibble)
library(feasts)
## Warning: package 'feasts' was built under R version 4.2.3
## Warning: package 'fabletools' was built under R version 4.2.3
library(fable)
## Warning: package 'fable' was built under R version 4.2.3
library(astsa)
## Warning: package 'astsa' was built under R version 4.2.3
library(nonlinearTseries)
## Warning: package 'nonlinearTseries' was built under R version 4.2.3
library(tseriesChaos)
## Warning: package 'tseriesChaos' was built under R version 4.2.3
library(fabletools)
library(TSA)
## Warning: package 'TSA' was built under R version 4.2.3
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
library(kableExtra)
library(plotly)
library(greybox)
## Warning: package 'greybox' was built under R version 4.2.3
library(readr)
library(fabletools)
require(TSstudio)
## Warning: package 'TSstudio' was built under R version 4.2.3
require(plotly)
library(dplyr)
library(knitr)
## Warning: package 'knitr' was built under R version 4.2.3
library(lme4)
## Warning: package 'lme4' was built under R version 4.2.3
## Warning: package 'Matrix' was built under R version 4.2.3
library(urca)
## Warning: package 'urca' was built under R version 4.2.3
library(forecast)
library(tseries)
## Warning: package 'tseries' was built under R version 4.2.3
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.2.3
library(uroot)
## Warning: package 'uroot' was built under R version 4.2.3
library(fUnitRoots)
## Warning: package 'fUnitRoots' was built under R version 4.2.3
library(aTSA)
## Warning: package 'aTSA' was built under R version 4.2.3
library(stringr)
library(tsibble)
library(fable)
library(fabletools)
library(urca)
library(forecast)
library(tseries)
library(lmtest)
library(uroot)
library(fUnitRoots)
library(aTSA)
library(TSA)
library(tsoutliers)
## Warning: package 'tsoutliers' was built under R version 4.2.3
tsdf <- ts(ivan$KMS, start = c(2018, 52.18), frequency = 52.18)

Empezamos explorando la auto correlación de independencia, Simple y Parcial

ancho= sqrt(268)

acf(tsdf, lag.max =  ancho)

acf(tsdf, lag.max =  ancho, ci.type='ma')

pacf(tsdf, lag.max =  ancho)

ar(tsdf)
## 
## Call:
## ar(x = tsdf)
## 
## Coefficients:
##      1       2  
## 0.3189  0.1103  
## 
## Order selected 2  sigma^2 estimated as  2113

En el primer gráfico vemos que no hay independencia, en el segundo nos muestra que un posible modelo es MA(2), y el gráfico 3 un AR(1), sin embargo ar(tsdf) nos dice que posible candidato es AR(2). Es asi que un posible modelo Maximo sera ARMA(2,2)

Prueba de Dickey-Fuller

#Prueba de estacionariedad
# Hacer la prueba de Dickey-Fuller aumentada (ADF) para verificar si es estacionaria
aTSA::adf.test(tsdf, nlag=3)
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag   ADF p.value
## [1,]   0 -3.33  0.0100
## [2,]   1 -2.17  0.0306
## [3,]   2 -1.86  0.0643
## Type 2: with drift no trend 
##      lag    ADF p.value
## [1,]   0 -11.31    0.01
## [2,]   1  -8.29    0.01
## [3,]   2  -7.74    0.01
## Type 3: with drift and trend 
##      lag    ADF p.value
## [1,]   0 -11.46    0.01
## [2,]   1  -8.44    0.01
## [3,]   2  -7.92    0.01
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

Nos dice que hay presencia de raíz unitaria sin drift ni tendencia

summary(ur.df(tsdf,type ="none", lags=2))
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -120.148  -33.336    9.745   42.027  154.758 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## z.lag.1     -0.04329    0.02333  -1.856   0.0646 .  
## z.diff.lag1 -0.43002    0.06244  -6.887 4.22e-11 ***
## z.diff.lag2 -0.14079    0.06103  -2.307   0.0218 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 50.3 on 262 degrees of freedom
## Multiple R-squared:  0.1888, Adjusted R-squared:  0.1795 
## F-statistic: 20.33 on 3 and 262 DF,  p-value: 7.125e-12
## 
## 
## Value of test-statistic is: -1.8555 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62
summary(ur.df(tsdf,type ="drift", lags=2))
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression drift 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -116.24  -33.58    3.11   36.34  126.03 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 76.74498   10.27513   7.469 1.21e-12 ***
## z.lag.1     -0.60072    0.07759  -7.742 2.16e-13 ***
## z.diff.lag1 -0.07548    0.07401  -1.020    0.309    
## z.diff.lag2  0.05345    0.06129   0.872    0.384    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 45.74 on 261 degrees of freedom
## Multiple R-squared:  0.3317, Adjusted R-squared:  0.324 
## F-statistic: 43.17 on 3 and 261 DF,  p-value: < 2.2e-16
## 
## 
## Value of test-statistic is: -7.7423 29.9745 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau2 -3.44 -2.87 -2.57
## phi1  6.47  4.61  3.79

De esta prueba recalcamos que es significativo un retardo de orden 2.

Como se detecto una raíz unitaria, entonces diferenciamos y chequeamos si debemos diferenciar mas.

divan <- diff(tsdf)
plot(tsdf)

plot(divan)

Y ya no se debe diferenciar mas la serie

pacf(divan,lag.max =  ancho)

acf(divan,lag.max =  ancho)

ar(divan)
## 
## Call:
## ar(x = divan)
## 
## Coefficients:
##       1        2        3        4        5        6        7        8  
## -0.5481  -0.3506  -0.3549  -0.2439  -0.2265  -0.2054  -0.1649  -0.1563  
## 
## Order selected 8  sigma^2 estimated as  2366
aTSA::adf.test(divan, nlag=9)
## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##       lag    ADF p.value
##  [1,]   0 -24.72    0.01
##  [2,]   1 -15.93    0.01
##  [3,]   2 -14.17    0.01
##  [4,]   3 -11.98    0.01
##  [5,]   4 -10.57    0.01
##  [6,]   5  -9.98    0.01
##  [7,]   6  -9.39    0.01
##  [8,]   7  -9.44    0.01
##  [9,]   8  -9.02    0.01
## Type 2: with drift no trend 
##       lag    ADF p.value
##  [1,]   0 -24.68    0.01
##  [2,]   1 -15.90    0.01
##  [3,]   2 -14.15    0.01
##  [4,]   3 -11.96    0.01
##  [5,]   4 -10.55    0.01
##  [6,]   5  -9.96    0.01
##  [7,]   6  -9.37    0.01
##  [8,]   7  -9.42    0.01
##  [9,]   8  -9.00    0.01
## Type 3: with drift and trend 
##       lag    ADF p.value
##  [1,]   0 -24.63    0.01
##  [2,]   1 -15.87    0.01
##  [3,]   2 -14.12    0.01
##  [4,]   3 -11.93    0.01
##  [5,]   4 -10.53    0.01
##  [6,]   5  -9.95    0.01
##  [7,]   6  -9.35    0.01
##  [8,]   7  -9.41    0.01
##  [9,]   8  -8.99    0.01
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01
summary(ur.df(tsdf,type ="none", lags=8))
## 
## ############################################### 
## # Augmented Dickey-Fuller Test Unit Root Test # 
## ############################################### 
## 
## Test regression none 
## 
## 
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -110.693  -32.679    8.702   36.552  142.982 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## z.lag.1     -0.01506    0.02324  -0.648 0.517563    
## z.diff.lag1 -0.56692    0.06534  -8.677 5.30e-16 ***
## z.diff.lag2 -0.38311    0.07356  -5.208 3.99e-07 ***
## z.diff.lag3 -0.39379    0.07600  -5.182 4.53e-07 ***
## z.diff.lag4 -0.28348    0.07805  -3.632 0.000341 ***
## z.diff.lag5 -0.25985    0.07776  -3.342 0.000960 ***
## z.diff.lag6 -0.23163    0.07468  -3.102 0.002144 ** 
## z.diff.lag7 -0.17793    0.07148  -2.489 0.013446 *  
## z.diff.lag8 -0.16652    0.06220  -2.677 0.007919 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 48.15 on 250 degrees of freedom
## Multiple R-squared:  0.2811, Adjusted R-squared:  0.2552 
## F-statistic: 10.86 on 9 and 250 DF,  p-value: 3.089e-14
## 
## 
## Value of test-statistic is: -0.648 
## 
## Critical values for test statistics: 
##       1pct  5pct 10pct
## tau1 -2.58 -1.95 -1.62

Vemos que el rezago 8 es significativa y ademas se ve que no hay presencia de raíz unitaria con el test de Dickey-Fuller. Y asi no se hace mas diferenciación y se estabilizo la tendencia.

monthplot(divan)

spectrum(divan)

El periodo correspondiente es aproximadamente: 16.875. Potencialmente muestra una componente estacional, que se podrá modelar vía fourier y dummy.

Revisemos el orden de p y q

acf(divan,lag.max =  ancho,ci.type="ma")

pacf(divan,lag.max =  ancho)

Me sugiere un retardo 1 para MA y de orden 8 para AR, es decir ARMA(1,8)

modelo.automatico1=auto.arima(divan,d=0,D=0,max.p=2,max.q=16,start.p=0, start.q=0,seasonal=FALSE,max.order=12,stationary=TRUE,ic="aicc",stepwise=FALSE,allowmean = TRUE)
modelo.automatico1
## Series: divan 
## ARIMA(0,0,3) with zero mean 
## 
## Coefficients:
##           ma1      ma2      ma3
##       -0.6610  -0.1144  -0.2025
## s.e.   0.0609   0.0712   0.0563
## 
## sigma^2 = 2124:  log likelihood = -1401.55
## AIC=2811.11   AICc=2811.26   BIC=2825.46

El modelo automático me sugiere un ARMA(0,3) sin media para la serie diferenciada.

Ajuste del Modelo

### un modelo ajustado dado el automatico
AjusteArimaAut=forecast::Arima(tsdf,order = c(0,1,3),lambda=NULL,include.drift = FALSE)
summary(AjusteArimaAut)
## Series: tsdf 
## ARIMA(0,1,3) 
## 
## Coefficients:
##           ma1      ma2      ma3
##       -0.6610  -0.1144  -0.2025
## s.e.   0.0609   0.0712   0.0563
## 
## sigma^2 = 2124:  log likelihood = -1401.55
## AIC=2811.11   AICc=2811.26   BIC=2825.46
## 
## Training set error measures:
##                    ME     RMSE     MAE       MPE     MAPE      MASE        ACF1
## Training set 4.197052 45.74429 38.6338 -81393.94 81419.37 0.7517525 0.001854678
coeftest(AjusteArimaAut)
## 
## z test of coefficients:
## 
##      Estimate Std. Error  z value  Pr(>|z|)    
## ma1 -0.661000   0.060866 -10.8599 < 2.2e-16 ***
## ma2 -0.114407   0.071200  -1.6068 0.1080912    
## ma3 -0.202549   0.056265  -3.5999 0.0003183 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Podemos ver que el ma2 no es significativo, corregimos esto:

AjusteArimaAut=forecast::Arima(tsdf,order = c(0,1,3),lambda=NULL,include.drift = FALSE, fixed=c(NA,0,NA))
summary(AjusteArimaAut)
## Series: tsdf 
## ARIMA(0,1,3) 
## 
## Coefficients:
##           ma1  ma2      ma3
##       -0.7256    0  -0.2508
## s.e.   0.0456    0   0.0445
## 
## sigma^2 = 2136:  log likelihood = -1402.76
## AIC=2811.51   AICc=2811.6   BIC=2822.27
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 4.126033 45.95397 38.76943 -83155.48 83180.89 0.7543917 0.06114132
coeftest(AjusteArimaAut)
## 
## z test of coefficients:
## 
##      Estimate Std. Error  z value  Pr(>|z|)    
## ma1 -0.725638   0.045590 -15.9166 < 2.2e-16 ***
## ma3 -0.250775   0.044547  -5.6295 1.808e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### un modelo con los hiperparametros que encontramos
AjusteArima=forecast::Arima(tsdf,order = c(1,1,8),lambda = NULL,include.drift = FALSE)
summary(AjusteArima)
## Series: tsdf 
## ARIMA(1,1,8) 
## 
## Coefficients:
##          ar1      ma1     ma2      ma3     ma4      ma5     ma6     ma7     ma8
##       0.5837  -1.2458  0.2951  -0.1187  0.1314  -0.1054  0.0192  0.0027  0.0303
## s.e.  1.0640   1.0550  0.6976   0.1365  0.2132   0.1129  0.1369  0.1079  0.0813
## 
## sigma^2 = 2156:  log likelihood = -1400.49
## AIC=2820.98   AICc=2821.84   BIC=2856.85
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 4.252487 45.55696 38.23065 -78281.34 78306.61 0.7439079
##                      ACF1
## Training set -0.002018937
coeftest(AjusteArima)
## 
## z test of coefficients:
## 
##      Estimate Std. Error z value Pr(>|z|)
## ar1  0.583728   1.063962  0.5486   0.5833
## ma1 -1.245758   1.055046 -1.1808   0.2377
## ma2  0.295124   0.697609  0.4231   0.6723
## ma3 -0.118693   0.136497 -0.8696   0.3845
## ma4  0.131431   0.213223  0.6164   0.5376
## ma5 -0.105363   0.112854 -0.9336   0.3505
## ma6  0.019163   0.136861  0.1400   0.8886
## ma7  0.002744   0.107941  0.0254   0.9797
## ma8  0.030286   0.081298  0.3725   0.7095

Y vemos que hay algunos que no son significativos, por lo que corrigiendo, tenemos:

### un modelo con los hiperparametros que encontramos
AjusteArima=forecast::Arima(tsdf,order = c(1,1,8),lambda = NULL,include.drift = FALSE, fixed=c(NA,NA,0,0,0,0,0,0,0))
summary(AjusteArima)
## Series: tsdf 
## ARIMA(1,1,8) 
## 
## Coefficients:
##          ar1      ma1  ma2  ma3  ma4  ma5  ma6  ma7  ma8
##       0.3572  -0.9862    0    0    0    0    0    0    0
## s.e.  0.0592   0.0131    0    0    0    0    0    0    0
## 
## sigma^2 = 2142:  log likelihood = -1403.17
## AIC=2812.34   AICc=2812.43   BIC=2823.1
## 
## Training set error measures:
##                    ME     RMSE     MAE       MPE    MAPE      MASE        ACF1
## Training set 4.241335 46.02293 38.7308 -83192.89 83218.4 0.7536401 -0.04148467
coeftest(AjusteArima)
## 
## z test of coefficients:
## 
##      Estimate Std. Error  z value  Pr(>|z|)    
## ar1  0.357177   0.059221   6.0313 1.627e-09 ***
## ma1 -0.986217   0.013054 -75.5480 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Y como se vio que la componente AR(1) posiblemente no es significativo, otro posible modelo es:

### un modelo con los hiperparametros que encontramos
AjusteArima0=forecast::Arima(tsdf,order = c(0,1,8),lambda = NULL,include.drift = FALSE,fixed=c(NA,0,NA,0,0,0,0,0))
summary(AjusteArima0)
## Series: tsdf 
## ARIMA(0,1,8) 
## 
## Coefficients:
##           ma1  ma2      ma3  ma4  ma5  ma6  ma7  ma8
##       -0.7256    0  -0.2508    0    0    0    0    0
## s.e.   0.0456    0   0.0445    0    0    0    0    0
## 
## sigma^2 = 2136:  log likelihood = -1402.76
## AIC=2811.51   AICc=2811.6   BIC=2822.27
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
## Training set 4.126033 45.95397 38.76943 -83155.48 83180.89 0.7543917 0.06114132
coeftest(AjusteArima0)
## 
## z test of coefficients:
## 
##      Estimate Std. Error  z value  Pr(>|z|)    
## ma1 -0.725638   0.045590 -15.9166 < 2.2e-16 ***
## ma3 -0.250775   0.044547  -5.6295 1.808e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

En resumen, escogemos estos tres posibles modelo:

  1. Arima(tsdf, order = c(0,1,3), lambda=NULL, include.drift = FALSE, fixed=c(NA,0,NA))

  2. Arima(tsdf, order = c(1,1,8), lambda = NULL, include.drift = FALSE, fixed=c(NA,NA,0,0,0,0,0,0,0))

Comprobando raíces estacionales

En este caso dice que no es necesario, debido a la no presencia de posibles raíces estacionales, sin embargo, detectamos un periodo de alrededor de 17 semanas. A manera de visualizar ajustaremos un modelo SARIMA y ver si es bueno a comparación de algún ARIMA que escogimos anteriormente

nsdiffs(divan)
## [1] 0
nsdiffs(tsdf)
## [1] 0
Ddivan <- diff(divan,lag=52)
par(mfrow=c(2,1))
plot(divan)
plot(Ddivan)

par(mfrow=c(1,1))
monthplot(Ddivan)

acf(Ddivan,lag.max = ancho)

spectrum(Ddivan)

nsdiffs(Ddivan)
## [1] 0

Pero vemos que las media en monthplot no se mueven alrededor de cero y se deja hasta aquí nuestra posible búsqueda de un modelo SARIMA.

Análisis de Residuales

M1 <- Arima(tsdf, order = c(0,1,3), lambda=NULL, include.drift = FALSE, fixed=c(NA,0,NA))

M2 <- Arima(tsdf, order = c(1,1,8), lambda = NULL, include.drift = FALSE, fixed=c(NA,NA,0,0,0,0,0,0,0))

RM1 <- M1$residuals

RM2 <- M2$residuals

plot(RM1)

plot(RM2)

No vemos ningún patrón en ninguno de los dos residuales

acf(RM1, lag.max = ancho)

acf(RM2, lag.max = ancho)

pacf(RM1, lag.max = ancho)

pacf(RM2, lag.max = ancho)

Vemos que no se sale nada del acf y pacf, es decir que todo se esta viendo explicado por el modelo.

Test de normalidad

jarque.bera.test(RM1)
## 
##  Jarque Bera Test
## 
## data:  RM1
## X-squared = 4.275, df = 2, p-value = 0.1179
jarque.bera.test(RM2)
## 
##  Jarque Bera Test
## 
## data:  RM2
## X-squared = 4.5848, df = 2, p-value = 0.101

Se puede observar que hay normalidad en los residuales, es decir vamos con buen ajuste por el momento,

Test de auto correlación

length(RM1)/4
## [1] 67
Box.test(RM1, lag =67 , type = "Ljung-Box", fitdf = 2) # se estimaron 2 parametros
## 
##  Box-Ljung test
## 
## data:  RM1
## X-squared = 73.59, df = 65, p-value = 0.2176
Box.test(RM2, lag =67 , type = "Ljung-Box", fitdf = 2) # se estimaron 2 parametros
## 
##  Box-Ljung test
## 
## data:  RM2
## X-squared = 81.925, df = 65, p-value = 0.07643

Me dice que no es significativa es decir ya no tenemos auto correlación por explicar.

spectrum(RM1)

spectrum(RM2)

Vemos unos picos , y es por eso que procedemos a analizar si hay presencia de outliers mas adelante.

CUMSUM y CUMSUMQ

Para el primer modelo M1

#cumsum
cum=cumsum(RM1)/sd(RM1)
N=length(RM1)
cumq=cumsum(RM1^2)/sum(RM1^2)

Af=0.948 ###Cuantil del 95% para la estad?stica cusum
co=0.10169

LS=Af*sqrt(N)+2*Af*c(1:length(RM1))/sqrt(N)
LI=-LS
LQS=co+(1:length(RM1))/N
LQI=-co+(1:length(RM1))/N
{plot(cum,type="l",ylim=c(min(LI),max(LS)),xlab="t",ylab="",main="CUSUM")
  lines(LS,type="S",col="red")
  lines(LI,type="S",col="red")}

#cumsumq
{plot(cumq,type="l",xlab="t",ylab="",main="CUSUMSQ")                      
  lines(LQS,type="S",col="red")                                                                           
  lines(LQI,type="S",col="red")}

Para el segundo modelo M2

#cumsum
cum=cumsum(RM2)/sd(RM2)
N=length(RM2)
cumq=cumsum(RM2^2)/sum(RM2^2)

Af=0.948 ###Cuantil del 95% para la estad?stica cusum
co=0.10169

LS=Af*sqrt(N)+2*Af*c(1:length(RM2))/sqrt(N)
LI=-LS
LQS=co+(1:length(RM2))/N
LQI=-co+(1:length(RM2))/N
{plot(cum,type="l",ylim=c(min(LI),max(LS)),xlab="t",ylab="",main="CUSUM")
  lines(LS,type="S",col="red")
  lines(LI,type="S",col="red")}

#cumsumq
{plot(cumq,type="l",xlab="t",ylab="",main="CUSUMSQ")                      
  lines(LQS,type="S",col="red")                                                                           
  lines(LQI,type="S",col="red")}

Análisis de outliers

Para M1

coef_M1= coefs2poly(M1)
outliers_M1= locate.outliers(RM1,coef_M1)
outliers_M1
## [1] type    ind     coefhat tstat  
## <0 rows> (or 0-length row.names)

Vemos que con el primer modelo, no obtenemos algún outlier.

Para M2

coef_M2= coefs2poly(M2)
outliers_M2= locate.outliers(RM2,coef_M2)
outliers_M2
## [1] type    ind     coefhat tstat  
## <0 rows> (or 0-length row.names)

Vemos que con el segundo modelo, no obtenemos algún outlier.

Componentes de Fourier y Dummy

tsibble_ivan=as_tsibble(tsdf)
ajuste_final<-tsibble_ivan%>%model(
  `M1F21`=ARIMA(value~1+fourier(K=21)+pdq(0,1,3)+PDQ(0,0,0), fixed=c(NA,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0)),
  `M1F4` =ARIMA(value~1+fourier(K=4)+pdq(0,1,3)+PDQ(0,0,0), fixed=c(NA,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,0)),
  `M1F3` =ARIMA(value~1+fourier(K=3)+pdq(0,1,3)+PDQ(0,0,0), fixed=c(NA,0,NA,NA,NA,NA,NA,NA,NA,0)),
  `M1F2` =ARIMA(value~1+fourier(K=2)+pdq(0,1,3)+PDQ(0,0,0), fixed=c(NA,0,NA,NA,NA,NA,NA,0)),
  `M1F1` =ARIMA(value~1+fourier(K=1)+pdq(0,1,3)+PDQ(0,0,0), fixed=c(NA,0,NA,NA,NA,0)),
  `M1S`  =ARIMA(value~1+season()+pdq(0,1,3)+PDQ(0,0,0)),
  `M1`   =ARIMA(value~1+pdq(0,1,3)+PDQ(0,0,0), fixed=c(NA,0,NA,0)),
  `M2F21`=ARIMA(value~1+fourier(K=21)+pdq(1,1,8)+PDQ(0,0,0), fixed=c(NA,NA,0,0,0,0,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0)),
  `M2F4` =ARIMA(value~1+fourier(K=4)+pdq(1,1,8)+PDQ(0,0,0), fixed=c(NA,NA,0,0,0,0,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,0)),
  `M2F3` =ARIMA(value~1+fourier(K=3)+pdq(1,1,8)+PDQ(0,0,0), fixed=c(NA,NA,0,0,0,0,0,0,0,NA,NA,NA,NA,NA,NA,0)),
  `M2F3` =ARIMA(value~1+fourier(K=2)+pdq(1,1,8)+PDQ(0,0,0), fixed=c(NA,NA,0,0,0,0,0,0,0,NA,NA,NA,NA,0)),
  `M2F3` =ARIMA(value~1+fourier(K=1)+pdq(1,1,8)+PDQ(0,0,0), fixed=c(NA,NA,0,0,0,0,0,0,0,NA,NA,0)),
  `M2S`  =ARIMA(value~1+season()+pdq(1,1,8)+PDQ(0,0,0)), 
  `M2`   =ARIMA(value~1+pdq(1,1,8)+PDQ(0,0,0), fixed=c(NA,NA,0,0,0,0,0,0,0,0)),
  )

glance(ajuste_final)
## # A tibble: 12 × 8
##    .model sigma2 log_lik   AIC  AICc   BIC ar_roots  ma_roots 
##    <chr>   <dbl>   <dbl> <dbl> <dbl> <dbl> <list>    <list>   
##  1 M1F21   1909.  -1365. 2819. 2838. 2981. <cpl [0]> <cpl [3]>
##  2 M1F4    2129.  -1398. 2818. 2820. 2858. <cpl [0]> <cpl [3]>
##  3 M1F3    2121.  -1399. 2816. 2816. 2848. <cpl [0]> <cpl [3]>
##  4 M1F2    2130.  -1400. 2815. 2815. 2840. <cpl [0]> <cpl [3]>
##  5 M1F1    2146.  -1402. 2815. 2815. 2833. <cpl [0]> <cpl [3]>
##  6 M1S     1914.  -1359. 2831. 2861. 3032. <cpl [0]> <cpl [3]>
##  7 M1      2136.  -1403. 2812. 2812. 2822. <cpl [0]> <cpl [3]>
##  8 M2F21   1998.  -1371. 2832. 2850. 2993. <cpl [1]> <cpl [1]>
##  9 M2F4    2136.  -1399. 2819. 2820. 2859. <cpl [1]> <cpl [1]>
## 10 M2F3    2153.  -1403. 2816. 2816. 2834. <cpl [1]> <cpl [1]>
## 11 M2S     1932.  -1357. 2838. 2876. 3060. <cpl [1]> <cpl [8]>
## 12 M2      2142.  -1403. 2812. 2812. 2823. <cpl [1]> <cpl [1]>

De lo que podemos decir que es mejor el modelo

Arima(tsdf, order = c(0,1,3), lambda=NULL, include.drift = FALSE, fixed=c(NA,0,NA))

respecto a BIC, AIC, AICc contra los demás modelos. Ademas, el segundo mejor modelo es:

Arima(tsdf, order = c(1,1,8), lambda = NULL, include.drift = FALSE, fixed=c(NA,NA,0,0,0,0,0,0,0))

Pronósticos

Con M1

#Rolling
h=1
lserie=length(tsdf)
ntrain=trunc(length(tsdf)*0.8)
ntrain
## [1] 214
time(tsdf)[ntrain]###Me entrega la ultima fecha de la posici?n ntrain
## [1] 2023.063
train=window(tsdf,end=c(2023.063))
test=window(tsdf,start=c(2023.082))

ntest=length(test)

length(train)+length(test) ## comprobar que esten todos los datos
## [1] 268
fcmat=matrix(0,nrow=ntest,ncol=h)

fitmodelo <- Arima(tsdf, order=c(0,1,3), lambda=NULL, include.drift = FALSE, fixed=c(NA,0,NA))
for(i in 1:ntest){
  x=window(tsdf,end=c(2023.063)+(i-1)/52.18)
  refit=Arima(x, model=fitmodelo)
  fcmat[i,]=test[i]-forecast::forecast(refit,h=1)$mean
}
ECM=mean(fcmat^2)
RECM=sqrt(mean(fcmat^2))
print("La raiz del error cuadratico medio es:")
## [1] "La raiz del error cuadratico medio es:"
RECM
## [1] 47.31168

Con M2

fcmat=matrix(0,nrow=ntest,ncol=h)

fitmodelo <- Arima(tsdf, order = c(1,1,8), lambda = NULL, include.drift = FALSE, fixed=c(NA,NA,0,0,0,0,0,0,0))
for(i in 1:ntest){
  x=window(tsdf,end=c(2023.063)+(i-1)/52.18)
  refit=Arima(x, model=fitmodelo)
  fcmat[i,]=test[i]-forecast::forecast(refit,h=1)$mean
}
ECM=mean(fcmat^2)
RECM=sqrt(mean(fcmat^2))
print("La raiz del error cuadratico medio es:")
## [1] "La raiz del error cuadratico medio es:"
RECM
## [1] 50.43648

Y finalmente, encontramos que el mejor modelo, es M1