library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
getSymbols.yahoo("NFLX", env = globalenv(), from = as.Date("2016-01-01"), to = as.Date("2020-01-01"), periodicity = 'monthly')
## [1] "NFLX"
head(NFLX, 5) # Mostrar los primeros datos de la serie descargada
## NFLX.Open NFLX.High NFLX.Low NFLX.Close NFLX.Volume NFLX.Adjusted
## 2016-01-01 109.00 122.18 90.11 91.84 488193200 91.84
## 2016-02-01 91.79 97.48 79.95 93.41 389268900 93.41
## 2016-03-01 94.58 104.91 93.61 102.23 311333700 102.23
## 2016-04-01 102.93 111.85 88.21 90.03 340174300 90.03
## 2016-05-01 90.41 104.00 85.74 102.57 264997900 102.57
tail(NFLX, 5) # Mostrar los últimos datos de la serie descargada
## NFLX.Open NFLX.High NFLX.Low NFLX.Close NFLX.Volume NFLX.Adjusted
## 2019-08-01 324.25 328.58 287.20 293.75 137076700 293.75
## 2019-09-01 290.82 301.55 252.28 267.62 175411300 267.62
## 2019-10-01 267.35 308.75 257.01 287.41 231556400 287.41
## 2019-11-01 288.70 316.82 281.14 314.66 113645900 314.66
## 2019-12-01 314.39 338.00 292.02 323.57 124723600 323.57
length(NFLX) # Mostrar el número de elementos de la serie.
## [1] 288
#Gráfico de cotización
#--------------------------------
chartSeries(NFLX, theme = "white", name = "Cotización de Netflix")
#Gráfico de cotización
#--------------------------------
chartSeries(NFLX$NFLX.Adjusted, theme = "white", name = "Cotización de Netflix")
Vemos que no es estacionaria, sino que tiene una tendencia alcista o
creciente.
#Seleccionamos precio ajustado
#-----------------------------------
pre.net <- NFLX$NFLX.Adjusted
head(pre.net)
## NFLX.Adjusted
## 2016-01-01 91.84
## 2016-02-01 93.41
## 2016-03-01 102.23
## 2016-04-01 90.03
## 2016-05-01 102.57
## 2016-06-01 91.48
rendnet <- diff(log(pre.net)) #Rentabilidad
head(rendnet)
## NFLX.Adjusted
## 2016-01-01 NA
## 2016-02-01 0.01695055
## 2016-03-01 0.09022676
## 2016-04-01 -0.12708228
## 2016-05-01 0.13040255
## 2016-06-01 -0.11442508
rendnet <- rendnet[-1]
head(rendnet)
## NFLX.Adjusted
## 2016-02-01 0.016950552
## 2016-03-01 0.090226765
## 2016-04-01 -0.127082275
## 2016-05-01 0.130402555
## 2016-06-01 -0.114425083
## 2016-07-01 -0.002517413
#Gráfico de rentabilidades
#---------------------------------
chartSeries(rendnet, theme = "white", name = "Rendimientos de Netflix")
Método Box Jenkins
En esta sección se aplicará la metodología de Box-Jenkis para identificar los posibles modelos ARIMA a partir de los datos obtenidos.
Por esa razón, primero se analizarán los componentes de la serie temporal, segundo se estudiará el orden de integración que presenta la serie, tercero se estima un modelo ARIMA o ARMA y se transformará a la serie estacionaria, finalmente se graficará y analizará las funciones de autocorrelación simple y parcial con el fin de elegir posibles modelos ARIMA.
Componentes de la serie temporal
#Convertir la serie TS
#--------------------------------
pre.ts <- ts(rendnet, start = c(2016,1), frequency = 12)
head(pre.ts)
## NFLX.Adjusted
## [1,] 0.016950552
## [2,] 0.090226765
## [3,] -0.127082275
## [4,] 0.130402555
## [5,] -0.114425083
## [6,] -0.002517413
pre.ts
## Jan Feb Mar Apr May
## 2016 0.016950552 0.090226765 -0.127082275 0.130402555 -0.114425083
## 2017 0.010041084 0.039185483 0.029267777 0.068984176 -0.087485372
## 2018 0.075095866 0.013532840 0.056315319 0.118017750 0.107312498
## 2019 0.053338347 -0.004309772 0.038458902 -0.076414987 0.067686986
## Jun Jul Aug Sep Oct
## 2016 -0.002517413 0.065736402 0.011224670 0.236709154 -0.065099283
## 2017 0.195442599 -0.039009333 0.037301404 0.079877197 -0.046100666
## 2018 -0.148389294 0.085795587 0.017390369 -0.214905073 -0.053252001
## 2019 -0.128612090 -0.094892267 -0.093161020 0.071341735 0.090582922
## Nov Dec
## 2016 0.056493450 0.128033698
## 2017 0.023081621 0.342245356
## 2018 -0.066728738 0.237756415
## 2019 0.027922794
Por otro lado, se aparta una porción de la serie para poder efectuar en lo que sigue del trabajo los modelos y las proyecciones de los retornos de las acciones de Netflix.
pre.ts1 = pre.ts[1:38]
#Descomponer la serie de rantabilidad
#-----------------------------------------
despre <- decompose(pre.ts)
plot(despre)
Integración y test de raíz unitaria
library(urca)
summary(ur.df(pre.ts,type = "none", selectlags = "AIC"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression none
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 - 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.21960 -0.05059 0.03112 0.07333 0.34686
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## z.lag.1 -0.99672 0.22209 -4.488 5.31e-05 ***
## z.diff.lag -0.06781 0.15225 -0.445 0.658
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1126 on 43 degrees of freedom
## Multiple R-squared: 0.5401, Adjusted R-squared: 0.5187
## F-statistic: 25.25 on 2 and 43 DF, p-value: 5.583e-08
##
##
## Value of test-statistic is: -4.4878
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau1 -2.62 -1.95 -1.61
-4.48 < -1.95. Se rechaza H0. La serie es estacionaria.
summary(ur.df(pre.ts,type = "drift", selectlags = "AIC"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression drift
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24193 -0.06472 0.00532 0.05580 0.31630
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.029123 0.017531 1.661 0.104
## z.lag.1 -1.130232 0.232049 -4.871 1.62e-05 ***
## z.diff.lag -0.002416 0.154328 -0.016 0.988
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1103 on 42 degrees of freedom
## Multiple R-squared: 0.5684, Adjusted R-squared: 0.5479
## F-statistic: 27.66 on 2 and 42 DF, p-value: 2.167e-08
##
##
## Value of test-statistic is: -4.8707 11.8621
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau2 -3.58 -2.93 -2.60
## phi1 7.06 4.86 3.94
-4.87 < -2.93. Se rechaza H0. La serie es estacionaria.
summary(ur.df(pre.ts,type = "trend", selectlags = "AIC"))
##
## ###############################################
## # Augmented Dickey-Fuller Test Unit Root Test #
## ###############################################
##
## Test regression trend
##
##
## Call:
## lm(formula = z.diff ~ z.lag.1 + 1 + tt + z.diff.lag)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.231703 -0.075554 0.003266 0.049986 0.313382
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.057266 0.037060 1.545 0.130
## z.lag.1 -1.175503 0.238601 -4.927 1.42e-05 ***
## tt -0.001123 0.001302 -0.863 0.393
## z.diff.lag 0.021473 0.157257 0.137 0.892
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1107 on 41 degrees of freedom
## Multiple R-squared: 0.5761, Adjusted R-squared: 0.5451
## F-statistic: 18.58 on 3 and 41 DF, p-value: 9.158e-08
##
##
## Value of test-statistic is: -4.9266 8.108 12.1616
##
## Critical values for test statistics:
## 1pct 5pct 10pct
## tau3 -4.15 -3.50 -3.18
## phi2 7.02 5.13 4.31
## phi3 9.31 6.73 5.61
-4.92 < -3.50. Se rechaza H0. La serie es estacionaria.
En las tres especificaciones se rechaza H0, indicando que la rentabilidad de la acción no presenta tendencia estocástica y es estacionaria.
Modelo ARIMA
library(forecast)
m1 = auto.arima(pre.ts1, d=1, D=0, max.p = 12, max.q = 12, max.P = 1, max.Q =1,
max.d = 0, max.D = 0, stationary = FALSE, seasonal = TRUE,ic = c("bic"))
summary(m1)
## Series: pre.ts1
## ARIMA(2,1,0)
##
## Coefficients:
## ar1 ar2
## -0.8325 -0.4636
## s.e. 0.1465 0.1484
##
## sigma^2 = 0.01633: log likelihood = 24.21
## AIC=-42.42 AICc=-41.69 BIC=-37.59
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.002426832 0.1226472 0.08775853 46.23088 189.2541 0.6234767
## ACF1
## Training set -0.1176001
En este caso, vemos que el mejor modelo ARIMA es (2,1,0).
#Correlogramas
#---------------------------------
par(mfrow=c(1,2))
acf(pre.ts, 20)
pacf(pre.ts, 20)
Vemos que el modelo está bien construido, puesto que tiene ruido blanco
al estar los correlogramas dentro de las líneas discontinuas o
intervarlos de confianza.
par(mfrow=c(1,1))
Modelos seleccionados
m1 = auto.arima(pre.ts1, d=1, D=0, max.p = 12, max.q = 12, max.P = 1, max.Q =1,
max.d = 0, max.D = 0, stationary = FALSE, seasonal = TRUE,ic = c("bic"))
m2 = arima(pre.ts1 , order = c(0,1,1), seasonal=list(order=c(0,1,0), period=12))
m3 = arima(pre.ts1 , order = c(1,1,1), seasonal=list(order=c(0,1,0), period=12))
summary(m1)
## Series: pre.ts1
## ARIMA(2,1,0)
##
## Coefficients:
## ar1 ar2
## -0.8325 -0.4636
## s.e. 0.1465 0.1484
##
## sigma^2 = 0.01633: log likelihood = 24.21
## AIC=-42.42 AICc=-41.69 BIC=-37.59
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.002426832 0.1226472 0.08775853 46.23088 189.2541 0.6234767
## ACF1
## Training set -0.1176001
summary(m2)
##
## Call:
## arima(x = pre.ts1, order = c(0, 1, 1), seasonal = list(order = c(0, 1, 0), period = 12))
##
## Coefficients:
## ma1
## -0.9470
## s.e. 0.1773
##
## sigma^2 estimated as 0.0183: log likelihood = 13.43, aic = -22.87
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01244407 0.1097143 0.06201652 19.05786 84.65909 0.4405937
## ACF1
## Training set -0.3091419
summary(m3)
##
## Call:
## arima(x = pre.ts1, order = c(1, 1, 1), seasonal = list(order = c(0, 1, 0), period = 12))
##
## Coefficients:
## ar1 ma1
## -0.3257 -0.8405
## s.e. 0.2027 0.1482
##
## sigma^2 estimated as 0.01696: log likelihood = 14.58, aic = -23.16
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.01157357 0.1056229 0.06006425 1.143709 102.9869 0.4267239
## ACF1
## Training set -0.08788431
El modelo 2 es el mejor modelo de acuerdo al MAPE.
library(PerformanceAnalytics)
##
## Attaching package: 'PerformanceAnalytics'
## The following object is masked from 'package:graphics':
##
## legend
dif2.pre1 = m1$fitted
dif2.pre1<-ts(dif2.pre1,start = c(2016,2),frequency=12)
dif2.pre2 = fitted(m2) #se usa el comando fitted porque a diferencia del auto.arima, no es automático
dif2.pre2<-ts(dif2.pre2,start = c(2016,2),frequency=12)
dif2.pre3 = fitted(m3)
dif2.pre3<-ts(dif2.pre3,start = c(2016,2),frequency=12)
Model.Series1 = cbind(pre.ts1, dif2.pre1)
Model.Series2 = cbind(pre.ts1, dif2.pre2)
Model.Series3 = cbind(pre.ts1, dif2.pre3)
names(Model.Series1) = c('Observado', 'Estimado m1')
names(Model.Series2) = c('Observado', 'Estimado m2')
names(Model.Series3) = c('Observado', 'Estimado m3')
par(mfrow=c(3,1))
chart.TimeSeries(Model.Series1, main = "auto.arima:ARIMA(2,1,0)(0,1,0)", legend.loc = "bottomleft")
chart.TimeSeries(Model.Series2, main = "arima(0,1,1)(0,1,0)", legend.loc = "bottomleft")
chart.TimeSeries(Model.Series3, main = "arima(1,1,1)(0,1,0)", legend.loc = "bottomleft")
par(mfrow=c(1,1))
Pruebas de diagnosis
checkresiduals(m1) #Análisis para el Modelo 1
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,0)
## Q* = 6.7255, df = 6, p-value = 0.347
##
## Model df: 2. Total lags used: 8
checkresiduals(m2) #Análisis para el Modelo 2
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)(0,1,0)[12]
## Q* = 11.267, df = 7, p-value = 0.1274
##
## Model df: 1. Total lags used: 8
checkresiduals(m3) #Análisis para el Modelo 3
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(0,1,0)[12]
## Q* = 6.0477, df = 6, p-value = 0.4179
##
## Model df: 2. Total lags used: 8
library(tseries)
library(forecast)
jarque.bera.test(m1$residuals)
##
## Jarque Bera Test
##
## data: m1$residuals
## X-squared = 7.5261, df = 2, p-value = 0.02321
El p valor es 0.023 < 0.05. Rechazo H0. No hay normalidad.
jarque.bera.test(m2$residuals)
##
## Jarque Bera Test
##
## data: m2$residuals
## X-squared = 19.97, df = 2, p-value = 4.608e-05
El p valor es 0.000 < 0.05. Rechazo H0. No hay normalidad.
jarque.bera.test(m3$residuals)
##
## Jarque Bera Test
##
## data: m3$residuals
## X-squared = 18.702, df = 2, p-value = 8.687e-05
El p valor es 0.000 < 0.05. Rechazo H0. No hay normalidad.
Predicciones
Para poder predecir con el modelo empleado, inicialmente se extrajeron 10 datos para poder analizar la capacidad predictiva, ejecutamos el siguiente código:
M1_F = forecast(m1, h = 4, level = c(30,60,90))
M2_F = forecast(m2, h = 4, level = c(30,60,90))
M3_F = forecast(m3, h = 4, level = c(30,60,90))
par(mfrow=c(1,3))
plot(M1_F, main = "Forecast arima(2,1,0)(0,1,0)")
plot(M2_F, main = "Forecast arima(0,1,0)(0,1,0)")
plot(M3_F, main = "Forecast arima(0,1,1)(0,1,0)")
par(mfrow=c(1,1))
library(Metrics)
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
##
## accuracy
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following objects are masked from 'package:Metrics':
##
## precision, recall
pre.ts.2 = pre.ts[38:47]
pre.ts.2 <-ts(pre.ts.2, start = c(2019,3),frequency=12)
M1_F$mean<-ts(M1_F$mean,start = c(2019,3),frequency=12)
M2_F$mean<-ts(M2_F$mean,start = c(2019,3),frequency=12)
M3_F$mean<-ts(M3_F$mean,start = c(2019,3),frequency=12)
#MCE
MSE1.F = mse(M1_F$mean, pre.ts.2)
MSE2.F = mse(M2_F$mean, pre.ts.2)
MSE3.F = mse(M3_F$mean, pre.ts.2)
MSE.F = cbind(MSE1.F, MSE2.F, MSE3.F )
MSE.F
## MSE1.F MSE2.F MSE3.F
## [1,] 0.008739162 0.02197279 0.02210596
El modelo de menor MSE (Error Cuadrático Medio) es M1. Es el mejor modelo.