library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
library(tibble)
## Warning: package 'tibble' was built under R version 4.2.3
library(tsibble)
## Warning: package 'tsibble' was built under R version 4.2.3
library(zoo)
## Warning: package 'zoo' was built under R version 4.2.3
library(timetk)
## Warning: package 'timetk' was built under R version 4.2.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.2.3
library(DT)
## Warning: package 'DT' was built under R version 4.2.3
library(readxl)
library(ggplot2)
library(forecast)
## Warning: package 'forecast' was built under R version 4.2.3
library(MASS)
## Warning: package 'MASS' was built under R version 4.2.3
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.2.3
library(VGAM)
## Warning: package 'VGAM' was built under R version 4.2.3
library(car)
## Warning: package 'car' was built under R version 4.2.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
library(timetk)
library(tibble)
library(zoo)
library(tsibble)
library(feasts)
## Warning: package 'feasts' was built under R version 4.2.3
## Warning: package 'fabletools' was built under R version 4.2.3
library(fable)
## Warning: package 'fable' was built under R version 4.2.3
library(astsa)
## Warning: package 'astsa' was built under R version 4.2.3
library(nonlinearTseries)
## Warning: package 'nonlinearTseries' was built under R version 4.2.3
library(tseriesChaos)
## Warning: package 'tseriesChaos' was built under R version 4.2.3
library(fabletools)
library(TSA)
## Warning: package 'TSA' was built under R version 4.2.3
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
library(kableExtra)
library(plotly)
library(greybox)
## Warning: package 'greybox' was built under R version 4.2.3
library(readr)
library(fabletools)
require(TSstudio)
## Warning: package 'TSstudio' was built under R version 4.2.3
require(plotly)
library(dplyr)
library(knitr)
## Warning: package 'knitr' was built under R version 4.2.3
library(lme4)
## Warning: package 'lme4' was built under R version 4.2.3
## Warning: package 'Matrix' was built under R version 4.2.3
library(urca)
## Warning: package 'urca' was built under R version 4.2.3
library(forecast)
library(tseries)
## Warning: package 'tseries' was built under R version 4.2.3
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.2.3
library(uroot)
## Warning: package 'uroot' was built under R version 4.2.3
library(fUnitRoots)
## Warning: package 'fUnitRoots' was built under R version 4.2.3
library(aTSA)
## Warning: package 'aTSA' was built under R version 4.2.3
library(stringr)
library(tsibble)
library(fable)
library(fabletools)
library(urca)
library(forecast)
library(tseries)
library(lmtest)
library(uroot)
library(fUnitRoots)
library(aTSA)
library(TSA)
library(tsoutliers)
## Warning: package 'tsoutliers' was built under R version 4.2.3
tsdf <- ts(ivan$KMS, start = c(2018, 52.18), frequency = 52.18)
Empezamos explorando la auto correlación de independencia, Simple y Parcial
ancho= sqrt(268)
acf(tsdf, lag.max = ancho)
acf(tsdf, lag.max = ancho, ci.type='ma')
pacf(tsdf, lag.max = ancho)
ar(tsdf)
##
## Call:
## ar(x = tsdf)
##
## Coefficients:
## 1 2
## 0.3189 0.1103
##
## Order selected 2 sigma^2 estimated as 2113
En el primer gráfico vemos que no hay independencia, en el segundo nos muestra que un posible modelo es MA(2), y el gráfico 3 un AR(1), sin embargo ar(tsdf) nos dice que posible candidato es AR(2). Es asi que un posible modelo Maximo sera ARMA(2,2)
#Prueba de estacionariedad
# Hacer la prueba de Dickey-Fuller aumentada (ADF) para verificar si es estacionaria
aTSA::adf.test(tsdf, nlag=3)
## Augmented Dickey-Fuller Test
## alternative: stationary
##
## Type 1: no drift no trend
## lag ADF p.value
## [1,] 0 -3.33 0.0100
## [2,] 1 -2.17 0.0306
## [3,] 2 -1.86 0.0643
## Type 2: with drift no trend
## lag ADF p.value
## [1,] 0 -11.31 0.01
## [2,] 1 -8.29 0.01
## [3,] 2 -7.74 0.01
## Type 3: with drift and trend
## lag ADF p.value
## [1,] 0 -11.46 0.01
## [2,] 1 -8.44 0.01
## [3,] 2 -7.92 0.01
## ----
## Note: in fact, p.value = 0.01 means p.value <= 0.01
Nos dice que hay presencia de raíz unitaria sin drift ni tendencia
summary(ur.df(tsdf,type ="none", lags=2))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -120.148 -33.336 9.745 42.027 154.758
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 -0.04329 0.02333 -1.856 0.0646 .
## z.diff.lag1 -0.43002 0.06244 -6.887 4.22e-11 ***
## z.diff.lag2 -0.14079 0.06103 -2.307 0.0218 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 50.3 on 262 degrees of freedom
## Multiple R-squared: 0.1888, Adjusted R-squared: 0.1795
## F-statistic: 20.33 on 3 and 262 DF, p-value: 7.125e-12
##
##
## Value of test-statistic is: -1.8555
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
summary(ur.df(tsdf,type ="drift", lags=2))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -116.24 -33.58 3.11 36.34 126.03
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 76.74498 10.27513 7.469 1.21e-12 ***
## z.lag.1 -0.60072 0.07759 -7.742 2.16e-13 ***
## z.diff.lag1 -0.07548 0.07401 -1.020 0.309
## z.diff.lag2 0.05345 0.06129 0.872 0.384
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 45.74 on 261 degrees of freedom
## Multiple R-squared: 0.3317, Adjusted R-squared: 0.324
## F-statistic: 43.17 on 3 and 261 DF, p-value: < 2.2e-16
##
##
## Value of test-statistic is: -7.7423 29.9745
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.44 -2.87 -2.57
## phi1 6.47 4.61 3.79
De esta prueba recalcamos que es significativo un retardo de orden 2.
Como se detecto una raíz unitaria, entonces diferenciamos y chequeamos si debemos diferenciar mas.
divan <- diff(tsdf)
plot(tsdf)
plot(divan)
Y ya no se debe diferenciar mas la serie
pacf(divan,lag.max = ancho)
acf(divan,lag.max = ancho)
ar(divan)
##
## Call:
## ar(x = divan)
##
## Coefficients:
## 1 2 3 4 5 6 7 8
## -0.5481 -0.3506 -0.3549 -0.2439 -0.2265 -0.2054 -0.1649 -0.1563
##
## Order selected 8 sigma^2 estimated as 2366
aTSA::adf.test(divan, nlag=9)
## Augmented Dickey-Fuller Test
## alternative: stationary
##
## Type 1: no drift no trend
## lag ADF p.value
## [1,] 0 -24.72 0.01
## [2,] 1 -15.93 0.01
## [3,] 2 -14.17 0.01
## [4,] 3 -11.98 0.01
## [5,] 4 -10.57 0.01
## [6,] 5 -9.98 0.01
## [7,] 6 -9.39 0.01
## [8,] 7 -9.44 0.01
## [9,] 8 -9.02 0.01
## Type 2: with drift no trend
## lag ADF p.value
## [1,] 0 -24.68 0.01
## [2,] 1 -15.90 0.01
## [3,] 2 -14.15 0.01
## [4,] 3 -11.96 0.01
## [5,] 4 -10.55 0.01
## [6,] 5 -9.96 0.01
## [7,] 6 -9.37 0.01
## [8,] 7 -9.42 0.01
## [9,] 8 -9.00 0.01
## Type 3: with drift and trend
## lag ADF p.value
## [1,] 0 -24.63 0.01
## [2,] 1 -15.87 0.01
## [3,] 2 -14.12 0.01
## [4,] 3 -11.93 0.01
## [5,] 4 -10.53 0.01
## [6,] 5 -9.95 0.01
## [7,] 6 -9.35 0.01
## [8,] 7 -9.41 0.01
## [9,] 8 -8.99 0.01
## ----
## Note: in fact, p.value = 0.01 means p.value <= 0.01
summary(ur.df(tsdf,type ="none", lags=8))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -110.693 -32.679 8.702 36.552 142.982
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 -0.01506 0.02324 -0.648 0.517563
## z.diff.lag1 -0.56692 0.06534 -8.677 5.30e-16 ***
## z.diff.lag2 -0.38311 0.07356 -5.208 3.99e-07 ***
## z.diff.lag3 -0.39379 0.07600 -5.182 4.53e-07 ***
## z.diff.lag4 -0.28348 0.07805 -3.632 0.000341 ***
## z.diff.lag5 -0.25985 0.07776 -3.342 0.000960 ***
## z.diff.lag6 -0.23163 0.07468 -3.102 0.002144 **
## z.diff.lag7 -0.17793 0.07148 -2.489 0.013446 *
## z.diff.lag8 -0.16652 0.06220 -2.677 0.007919 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 48.15 on 250 degrees of freedom
## Multiple R-squared: 0.2811, Adjusted R-squared: 0.2552
## F-statistic: 10.86 on 9 and 250 DF, p-value: 3.089e-14
##
##
## Value of test-statistic is: -0.648
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.58 -1.95 -1.62
Vemos que el rezago 8 es significativa y ademas se ve que no hay presencia de raíz unitaria con el test de Dickey-Fuller. Y asi no se hace mas diferenciación y se estabilizo la tendencia.
monthplot(divan)
spectrum(divan)
El periodo correspondiente es aproximadamente: 16.875. Potencialmente muestra una componente estacional, que se podrá modelar vía fourier y dummy.
acf(divan,lag.max = ancho,ci.type="ma")
pacf(divan,lag.max = ancho)
Me sugiere un retardo 1 para MA y de orden 8 para AR, es decir ARMA(1,8)
modelo.automatico1=auto.arima(divan,d=0,D=0,max.p=2,max.q=16,start.p=0, start.q=0,seasonal=FALSE,max.order=12,stationary=TRUE,ic="aicc",stepwise=FALSE,allowmean = TRUE)
modelo.automatico1
## Series: divan
## ARIMA(0,0,3) with zero mean
##
## Coefficients:
## ma1 ma2 ma3
## -0.6610 -0.1144 -0.2025
## s.e. 0.0609 0.0712 0.0563
##
## sigma^2 = 2124: log likelihood = -1401.55
## AIC=2811.11 AICc=2811.26 BIC=2825.46
El modelo automático me sugiere un ARMA(0,3) sin media para la serie diferenciada.
### un modelo ajustado dado el automatico
AjusteArimaAut=forecast::Arima(tsdf,order = c(0,1,3),lambda=NULL,include.drift = FALSE)
summary(AjusteArimaAut)
## Series: tsdf
## ARIMA(0,1,3)
##
## Coefficients:
## ma1 ma2 ma3
## -0.6610 -0.1144 -0.2025
## s.e. 0.0609 0.0712 0.0563
##
## sigma^2 = 2124: log likelihood = -1401.55
## AIC=2811.11 AICc=2811.26 BIC=2825.46
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.197052 45.74429 38.6338 -81393.94 81419.37 0.7517525 0.001854678
coeftest(AjusteArimaAut)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.661000 0.060866 -10.8599 < 2.2e-16 ***
## ma2 -0.114407 0.071200 -1.6068 0.1080912
## ma3 -0.202549 0.056265 -3.5999 0.0003183 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Podemos ver que el ma2 no es significativo, corregimos esto:
AjusteArimaAut=forecast::Arima(tsdf,order = c(0,1,3),lambda=NULL,include.drift = FALSE, fixed=c(NA,0,NA))
summary(AjusteArimaAut)
## Series: tsdf
## ARIMA(0,1,3)
##
## Coefficients:
## ma1 ma2 ma3
## -0.7256 0 -0.2508
## s.e. 0.0456 0 0.0445
##
## sigma^2 = 2136: log likelihood = -1402.76
## AIC=2811.51 AICc=2811.6 BIC=2822.27
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.126033 45.95397 38.76943 -83155.48 83180.89 0.7543917 0.06114132
coeftest(AjusteArimaAut)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.725638 0.045590 -15.9166 < 2.2e-16 ***
## ma3 -0.250775 0.044547 -5.6295 1.808e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### un modelo con los hiperparametros que encontramos
AjusteArima=forecast::Arima(tsdf,order = c(1,1,8),lambda = NULL,include.drift = FALSE)
summary(AjusteArima)
## Series: tsdf
## ARIMA(1,1,8)
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma8
## 0.5837 -1.2458 0.2951 -0.1187 0.1314 -0.1054 0.0192 0.0027 0.0303
## s.e. 1.0640 1.0550 0.6976 0.1365 0.2132 0.1129 0.1369 0.1079 0.0813
##
## sigma^2 = 2156: log likelihood = -1400.49
## AIC=2820.98 AICc=2821.84 BIC=2856.85
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 4.252487 45.55696 38.23065 -78281.34 78306.61 0.7439079
## ACF1
## Training set -0.002018937
coeftest(AjusteArima)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.583728 1.063962 0.5486 0.5833
## ma1 -1.245758 1.055046 -1.1808 0.2377
## ma2 0.295124 0.697609 0.4231 0.6723
## ma3 -0.118693 0.136497 -0.8696 0.3845
## ma4 0.131431 0.213223 0.6164 0.5376
## ma5 -0.105363 0.112854 -0.9336 0.3505
## ma6 0.019163 0.136861 0.1400 0.8886
## ma7 0.002744 0.107941 0.0254 0.9797
## ma8 0.030286 0.081298 0.3725 0.7095
Y vemos que hay algunos que no son significativos, por lo que corrigiendo, tenemos:
### un modelo con los hiperparametros que encontramos
AjusteArima=forecast::Arima(tsdf,order = c(1,1,8),lambda = NULL,include.drift = FALSE, fixed=c(NA,NA,0,0,0,0,0,0,0))
summary(AjusteArima)
## Series: tsdf
## ARIMA(1,1,8)
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma8
## 0.3572 -0.9862 0 0 0 0 0 0 0
## s.e. 0.0592 0.0131 0 0 0 0 0 0 0
##
## sigma^2 = 2142: log likelihood = -1403.17
## AIC=2812.34 AICc=2812.43 BIC=2823.1
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.241335 46.02293 38.7308 -83192.89 83218.4 0.7536401 -0.04148467
coeftest(AjusteArima)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ar1 0.357177 0.059221 6.0313 1.627e-09 ***
## ma1 -0.986217 0.013054 -75.5480 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Y como se vio que la componente AR(1) posiblemente no es significativo, otro posible modelo es:
### un modelo con los hiperparametros que encontramos
AjusteArima0=forecast::Arima(tsdf,order = c(0,1,8),lambda = NULL,include.drift = FALSE,fixed=c(NA,0,NA,0,0,0,0,0))
summary(AjusteArima0)
## Series: tsdf
## ARIMA(0,1,8)
##
## Coefficients:
## ma1 ma2 ma3 ma4 ma5 ma6 ma7 ma8
## -0.7256 0 -0.2508 0 0 0 0 0
## s.e. 0.0456 0 0.0445 0 0 0 0 0
##
## sigma^2 = 2136: log likelihood = -1402.76
## AIC=2811.51 AICc=2811.6 BIC=2822.27
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.126033 45.95397 38.76943 -83155.48 83180.89 0.7543917 0.06114132
coeftest(AjusteArima0)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## ma1 -0.725638 0.045590 -15.9166 < 2.2e-16 ***
## ma3 -0.250775 0.044547 -5.6295 1.808e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
En resumen, escogemos estos tres posibles modelo:
Arima(tsdf, order = c(0,1,3), lambda=NULL, include.drift = FALSE, fixed=c(NA,0,NA))
Arima(tsdf, order = c(1,1,8), lambda = NULL, include.drift = FALSE, fixed=c(NA,NA,0,0,0,0,0,0,0))
En este caso dice que no es necesario, debido a la no presencia de posibles raíces estacionales, sin embargo, detectamos un periodo de alrededor de 17 semanas. A manera de visualizar ajustaremos un modelo SARIMA y ver si es bueno a comparación de algún ARIMA que escogimos anteriormente
nsdiffs(divan)
## [1] 0
nsdiffs(tsdf)
## [1] 0
Ddivan <- diff(divan,lag=52)
par(mfrow=c(2,1))
plot(divan)
plot(Ddivan)
par(mfrow=c(1,1))
monthplot(Ddivan)
acf(Ddivan,lag.max = ancho)
spectrum(Ddivan)
nsdiffs(Ddivan)
## [1] 0
Pero vemos que las media en monthplot no se mueven alrededor de cero y se deja hasta aquí nuestra posible búsqueda de un modelo SARIMA.
M1 <- Arima(tsdf, order = c(0,1,3), lambda=NULL, include.drift = FALSE, fixed=c(NA,0,NA))
M2 <- Arima(tsdf, order = c(1,1,8), lambda = NULL, include.drift = FALSE, fixed=c(NA,NA,0,0,0,0,0,0,0))
RM1 <- M1$residuals
RM2 <- M2$residuals
plot(RM1)
plot(RM2)
No vemos ningún patrón en ninguno de los dos residuales
acf(RM1, lag.max = ancho)
acf(RM2, lag.max = ancho)
pacf(RM1, lag.max = ancho)
pacf(RM2, lag.max = ancho)
Vemos que no se sale nada del acf y pacf, es decir que todo se esta viendo explicado por el modelo.
jarque.bera.test(RM1)
##
## Jarque Bera Test
##
## data: RM1
## X-squared = 4.275, df = 2, p-value = 0.1179
jarque.bera.test(RM2)
##
## Jarque Bera Test
##
## data: RM2
## X-squared = 4.5848, df = 2, p-value = 0.101
Se puede observar que hay normalidad en los residuales, es decir vamos con buen ajuste por el momento,
length(RM1)/4
## [1] 67
Box.test(RM1, lag =67 , type = "Ljung-Box", fitdf = 2) # se estimaron 2 parametros
##
## Box-Ljung test
##
## data: RM1
## X-squared = 73.59, df = 65, p-value = 0.2176
Box.test(RM2, lag =67 , type = "Ljung-Box", fitdf = 2) # se estimaron 2 parametros
##
## Box-Ljung test
##
## data: RM2
## X-squared = 81.925, df = 65, p-value = 0.07643
Me dice que no es significativa es decir ya no tenemos auto correlación por explicar.
spectrum(RM1)
spectrum(RM2)
Vemos unos picos , y es por eso que procedemos a analizar si hay presencia de outliers mas adelante.
Para el primer modelo M1
#cumsum
cum=cumsum(RM1)/sd(RM1)
N=length(RM1)
cumq=cumsum(RM1^2)/sum(RM1^2)
Af=0.948 ###Cuantil del 95% para la estad?stica cusum
co=0.10169
LS=Af*sqrt(N)+2*Af*c(1:length(RM1))/sqrt(N)
LI=-LS
LQS=co+(1:length(RM1))/N
LQI=-co+(1:length(RM1))/N
{plot(cum,type="l",ylim=c(min(LI),max(LS)),xlab="t",ylab="",main="CUSUM")
lines(LS,type="S",col="red")
lines(LI,type="S",col="red")}
#cumsumq
{plot(cumq,type="l",xlab="t",ylab="",main="CUSUMSQ")
lines(LQS,type="S",col="red")
lines(LQI,type="S",col="red")}
Para el segundo modelo M2
#cumsum
cum=cumsum(RM2)/sd(RM2)
N=length(RM2)
cumq=cumsum(RM2^2)/sum(RM2^2)
Af=0.948 ###Cuantil del 95% para la estad?stica cusum
co=0.10169
LS=Af*sqrt(N)+2*Af*c(1:length(RM2))/sqrt(N)
LI=-LS
LQS=co+(1:length(RM2))/N
LQI=-co+(1:length(RM2))/N
{plot(cum,type="l",ylim=c(min(LI),max(LS)),xlab="t",ylab="",main="CUSUM")
lines(LS,type="S",col="red")
lines(LI,type="S",col="red")}
#cumsumq
{plot(cumq,type="l",xlab="t",ylab="",main="CUSUMSQ")
lines(LQS,type="S",col="red")
lines(LQI,type="S",col="red")}
Para M1
coef_M1= coefs2poly(M1)
outliers_M1= locate.outliers(RM1,coef_M1)
outliers_M1
## [1] type ind coefhat tstat
## <0 rows> (or 0-length row.names)
Vemos que con el primer modelo, no obtenemos algún outlier.
Para M2
coef_M2= coefs2poly(M2)
outliers_M2= locate.outliers(RM2,coef_M2)
outliers_M2
## [1] type ind coefhat tstat
## <0 rows> (or 0-length row.names)
Vemos que con el segundo modelo, no obtenemos algún outlier.
tsibble_ivan=as_tsibble(tsdf)
ajuste_final<-tsibble_ivan%>%model(
`M1F21`=ARIMA(value~1+fourier(K=21)+pdq(0,1,3)+PDQ(0,0,0), fixed=c(NA,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0)),
`M1F4` =ARIMA(value~1+fourier(K=4)+pdq(0,1,3)+PDQ(0,0,0), fixed=c(NA,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,0)),
`M1F3` =ARIMA(value~1+fourier(K=3)+pdq(0,1,3)+PDQ(0,0,0), fixed=c(NA,0,NA,NA,NA,NA,NA,NA,NA,0)),
`M1F2` =ARIMA(value~1+fourier(K=2)+pdq(0,1,3)+PDQ(0,0,0), fixed=c(NA,0,NA,NA,NA,NA,NA,0)),
`M1F1` =ARIMA(value~1+fourier(K=1)+pdq(0,1,3)+PDQ(0,0,0), fixed=c(NA,0,NA,NA,NA,0)),
`M1S` =ARIMA(value~1+season()+pdq(0,1,3)+PDQ(0,0,0)),
`M1` =ARIMA(value~1+pdq(0,1,3)+PDQ(0,0,0), fixed=c(NA,0,NA,0)),
`M2F21`=ARIMA(value~1+fourier(K=21)+pdq(1,1,8)+PDQ(0,0,0), fixed=c(NA,NA,0,0,0,0,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,0)),
`M2F4` =ARIMA(value~1+fourier(K=4)+pdq(1,1,8)+PDQ(0,0,0), fixed=c(NA,NA,0,0,0,0,0,0,0,NA,NA,NA,NA,NA,NA,NA,NA,0)),
`M2F3` =ARIMA(value~1+fourier(K=3)+pdq(1,1,8)+PDQ(0,0,0), fixed=c(NA,NA,0,0,0,0,0,0,0,NA,NA,NA,NA,NA,NA,0)),
`M2F3` =ARIMA(value~1+fourier(K=2)+pdq(1,1,8)+PDQ(0,0,0), fixed=c(NA,NA,0,0,0,0,0,0,0,NA,NA,NA,NA,0)),
`M2F3` =ARIMA(value~1+fourier(K=1)+pdq(1,1,8)+PDQ(0,0,0), fixed=c(NA,NA,0,0,0,0,0,0,0,NA,NA,0)),
`M2S` =ARIMA(value~1+season()+pdq(1,1,8)+PDQ(0,0,0)),
`M2` =ARIMA(value~1+pdq(1,1,8)+PDQ(0,0,0), fixed=c(NA,NA,0,0,0,0,0,0,0,0)),
)
glance(ajuste_final)
## # A tibble: 12 × 8
## .model sigma2 log_lik AIC AICc BIC ar_roots ma_roots
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <list> <list>
## 1 M1F21 1909. -1365. 2819. 2838. 2981. <cpl [0]> <cpl [3]>
## 2 M1F4 2129. -1398. 2818. 2820. 2858. <cpl [0]> <cpl [3]>
## 3 M1F3 2121. -1399. 2816. 2816. 2848. <cpl [0]> <cpl [3]>
## 4 M1F2 2130. -1400. 2815. 2815. 2840. <cpl [0]> <cpl [3]>
## 5 M1F1 2146. -1402. 2815. 2815. 2833. <cpl [0]> <cpl [3]>
## 6 M1S 1914. -1359. 2831. 2861. 3032. <cpl [0]> <cpl [3]>
## 7 M1 2136. -1403. 2812. 2812. 2822. <cpl [0]> <cpl [3]>
## 8 M2F21 1998. -1371. 2832. 2850. 2993. <cpl [1]> <cpl [1]>
## 9 M2F4 2136. -1399. 2819. 2820. 2859. <cpl [1]> <cpl [1]>
## 10 M2F3 2153. -1403. 2816. 2816. 2834. <cpl [1]> <cpl [1]>
## 11 M2S 1932. -1357. 2838. 2876. 3060. <cpl [1]> <cpl [8]>
## 12 M2 2142. -1403. 2812. 2812. 2823. <cpl [1]> <cpl [1]>
De lo que podemos decir que es mejor el modelo
Arima(tsdf, order = c(0,1,3), lambda=NULL, include.drift = FALSE, fixed=c(NA,0,NA))
respecto a BIC, AIC, AICc contra los demás modelos. Ademas, el segundo mejor modelo es:
Arima(tsdf, order = c(1,1,8), lambda = NULL, include.drift = FALSE, fixed=c(NA,NA,0,0,0,0,0,0,0))
Con M1
#Rolling
h=1
lserie=length(tsdf)
ntrain=trunc(length(tsdf)*0.8)
ntrain
## [1] 214
time(tsdf)[ntrain]###Me entrega la ultima fecha de la posici?n ntrain
## [1] 2023.063
train=window(tsdf,end=c(2023.063))
test=window(tsdf,start=c(2023.082))
ntest=length(test)
length(train)+length(test) ## comprobar que esten todos los datos
## [1] 268
fcmat=matrix(0,nrow=ntest,ncol=h)
fitmodelo <- Arima(tsdf, order=c(0,1,3), lambda=NULL, include.drift = FALSE, fixed=c(NA,0,NA))
for(i in 1:ntest){
x=window(tsdf,end=c(2023.063)+(i-1)/52.18)
refit=Arima(x, model=fitmodelo)
fcmat[i,]=test[i]-forecast::forecast(refit,h=1)$mean
}
ECM=mean(fcmat^2)
RECM=sqrt(mean(fcmat^2))
print("La raiz del error cuadratico medio es:")
## [1] "La raiz del error cuadratico medio es:"
RECM
## [1] 47.31168
Con M2
fcmat=matrix(0,nrow=ntest,ncol=h)
fitmodelo <- Arima(tsdf, order = c(1,1,8), lambda = NULL, include.drift = FALSE, fixed=c(NA,NA,0,0,0,0,0,0,0))
for(i in 1:ntest){
x=window(tsdf,end=c(2023.063)+(i-1)/52.18)
refit=Arima(x, model=fitmodelo)
fcmat[i,]=test[i]-forecast::forecast(refit,h=1)$mean
}
ECM=mean(fcmat^2)
RECM=sqrt(mean(fcmat^2))
print("La raiz del error cuadratico medio es:")
## [1] "La raiz del error cuadratico medio es:"
RECM
## [1] 50.43648
Y finalmente, encontramos que el mejor modelo, es M1