remove(list = ls())SESIÓN 4. Función de Transferencia y ARIMAX en R
Módelo de Función de Transferencia
Funciones de transferencia utilizando el Índice Global de la Actividad Económica
\underline{Nota}. Con este código se pueden pronósticar series de tiempo multivariadas
Análisis Preliminar
Activar librerías
#install.packages("devtools")
#devtools::install_github("r-lib/conflicted")
library(conflicted)
library(readxl)
library(dplyr)
library(stats)
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats 1.0.0 ✔ readr 2.1.5
✔ ggplot2 3.5.1 ✔ stringr 1.5.1
✔ lubridate 1.9.3 ✔ tibble 3.2.1
✔ purrr 1.0.2 ✔ tidyr 1.3.1
library(tseries)Registered S3 method overwritten by 'quantmod':
method from
as.zoo.data.frame zoo
library(urca)
library(forecast)Importar datos
#getwd()
sales_x <- read_excel("dataseries.xlsx")
sales_x <- ts(sales_x, start = c(2014,01), frequency = 12)
head(sales_x) sales igae
Jan 2014 1290114 98.69242
Feb 2014 1035753 96.92693
Mar 2014 1239129 101.10788
Apr 2014 1264683 100.32346
May 2014 1133499 104.89845
Jun 2014 1215006 102.80824
ts.plot(sales_x, col = c(1, 2), lwd = c(1, 2)) # la roja IGAEGráfico de datos en niveles
ts.plot(scale(sales_x), col = c(1, 2), lwd = c(1, 2)) # la roja IGAEPrueba de estacionariedad para las ventas
adf.test(sales_x[,1])
Augmented Dickey-Fuller Test
data: sales_x[, 1]
Dickey-Fuller = -3.3512, Lag order = 4, p-value = 0.07011
alternative hypothesis: stationary
ur.kpss(sales_x[,1]) %>% summary()
#######################
# KPSS Unit Root Test #
#######################
Test is of type: mu with 3 lags.
Value of test-statistic is: 1.5454
Critical value for a significance level of:
10pct 5pct 2.5pct 1pct
critical values 0.347 0.463 0.574 0.739
- Los datos en niveles no son estacionarios
ur.kpss(diff(sales_x[,1])) %>% summary()
#######################
# KPSS Unit Root Test #
#######################
Test is of type: mu with 3 lags.
Value of test-statistic is: 0.0281
Critical value for a significance level of:
10pct 5pct 2.5pct 1pct
critical values 0.347 0.463 0.574 0.739
- Los datos diferenciados son estacionarios
Prueba de estacionariedad para el IGAE
ur.kpss(sales_x[,2]) %>% summary()
#######################
# KPSS Unit Root Test #
#######################
Test is of type: mu with 3 lags.
Value of test-statistic is: 1.7413
Critical value for a significance level of:
10pct 5pct 2.5pct 1pct
critical values 0.347 0.463 0.574 0.739
- Los datos en niveles no son estacionarios
ur.kpss(diff(sales_x[,2])) %>% summary()
#######################
# KPSS Unit Root Test #
#######################
Test is of type: mu with 3 lags.
Value of test-statistic is: 0.0377
Critical value for a significance level of:
10pct 5pct 2.5pct 1pct
critical values 0.347 0.463 0.574 0.739
- Los datos diferenciados son estacionarios
Modelo ARIMAX
NOTA.
Donde cada dato significa:
- AR representa una regresión de la variable con otros valores pasados de ella misma
- MA representa la media móvil ponderada de los errores pasados del pronóstico
- X representa una variable o un conjunto de variables explicativas
Combinando los dos modelos:
- p = parte autorregresiva
- d = grado de diferenciación
- q = parte media movil
library(forecast)
tsdisplay(sales_x[,1])tsdisplay(diff(sales_x[,1]))mod=arima(sales_x[,1],xreg=sales_x[,2],order = c(1,1,2),seasonal = list(order=c(1,0,1)))
mod
Call:
arima(x = sales_x[, 1], order = c(1, 1, 2), seasonal = list(order = c(1, 0,
1)), xreg = sales_x[, 2])
Coefficients:
ar1 ma1 ma2 sar1 sma1 sales_x[, 2]
-0.2104 -0.5070 -0.0664 0.6822 -0.3585 -12469.64
s.e. 0.7300 0.7263 0.5149 0.2644 0.3465 11414.03
sigma^2 estimated as 7.693e+10: log likelihood = -1033.97, aic = 2081.93
plot(mod$residuals)nsdiffs(sales_x[,1])[1] 0
tsdiag(mod)Box.test(mod$residuals)
Box-Pierce test
data: mod$residuals
X-squared = 0.0066918, df = 1, p-value = 0.9348
checkresiduals(mod)
Ljung-Box test
data: Residuals from ARIMA(1,1,2)(1,0,1)[12]
Q* = 6.3055, df = 10, p-value = 0.789
Model df: 5. Total lags used: 15
Selección automática del modelo ARIMA
modelo_x <- auto.arima(sales_x[,1], xreg = sales_x[,2], seasonal=T, stepwise=T, approximation=T)
summary(modelo_x)Series: sales_x[, 1]
Regression with ARIMA(0,1,1)(0,0,2)[12] errors
Coefficients:
ma1 sma1 sma2 xreg
-0.6906 0.3294 0.2178 -13458.06
s.e. 0.1030 0.1179 0.1284 12046.14
sigma^2 = 8.303e+10: log likelihood = -1034.58
AIC=2079.15 AICc=2080.03 BIC=2090.67
Training set error measures:
ME RMSE MAE MPE MAPE MASE
Training set 25449.17 278377.7 179495.1 -0.2161992 10.65046 0.666288
ACF1
Training set -0.04421421
checkresiduals(modelo_x)
Ljung-Box test
data: Residuals from Regression with ARIMA(0,1,1)(0,0,2)[12] errors
Q* = 6.6078, df = 12, p-value = 0.8824
Model df: 3. Total lags used: 15
- Los residuales no están correlacionados
Pronóstico
autoplot(forecast(modelo_x, xreg = rep(mean(sales_x[,2]),36), h=36))autoplot(forecast(modelo_x, xreg = rep(mean(sales_x[,2]),12), h=12))pron=forecast(modelo_x, xreg = rep(mean(sales_x[,2]),12), h=12)
pron Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
Apr 2020 2099024 1729745 2468303 1534261 2663787
May 2020 1960469 1573919 2347019 1369292 2551646
Jun 2020 2021897 1618816 2424979 1405437 2638357
Jul 2020 2067965 1649004 2486927 1427219 2708711
Aug 2020 2090937 1656676 2525198 1426792 2755082
Sep 2020 2032742 1583703 2481781 1345996 2719489
Oct 2020 2218855 1755508 2682202 1510227 2927483
Nov 2020 2196921 1719696 2674147 1467068 2926775
Dec 2020 1797502 1306790 2288214 1047023 2547981
Jan 2021 2084239 1580401 2588076 1313686 2854792
Feb 2021 2014117 1497487 2530746 1224000 2804233
Mar 2021 2063566 1534454 2592679 1254359 2872774
Práctica. Análisis con módelos económicos
Activar librerías
library(readxl)
library(tidyverse)
library(tseries)
library(urca)
library(forecast)
library(zoo)Importando datos de precios de cierre de Starbucks y Microsoft
library(readr)
sbux.df <- read_csv("sbuxPrices.csv")Rows: 181 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Date, Adj.Close
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(sbux.df)sbux.ts = ts(data=sbux.df$Adj.Close, frequency = 12,
start=c(1993,3), end=c(2008,3))
class(sbux.ts) [1] "ts"
msft.df <- read_csv("msftPrices.csv")Rows: 181 Columns: 2
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): Date, Adj.Close
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(sbux.df)
msft.ts = ts(data=msft.df$Adj.Close, frequency = 12,
start=c(1993,3), end=c(2008,3))Fechas y frecuencia de la serie
sbux.ts Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
1993 1.13 1.15 1.43 1.46 1.41 1.44 1.63 1.59 1.32 1.32
1994 1.43 1.38 1.45 1.77 1.69 1.5 1.72 1.68 1.37 1.61 1.59 1.63
1995 1.43 1.42 1.43 1.4 1.73 2.12 2.22 2.38 2.25 2.33 2.51 2.5
1996 1.99 02.09 2.77 3.22 3.22 3.36 03.09 3.89 3.92 3.86 4.12 3.4
1997 04.07 4 3.52 3.55 3.74 4.63 4.87 4.87 4.97 3.92 4.15 4.56
1998 4.35 4.7 5.39 5.72 5.71 6.35 4.98 3.75 4.3 5.16 5.48 6.67
1999 6.19 6.29 6.67 8.78 8.77 8.93 5.53 5.44 5.89 6.46 6.31 5.76
2000 7.61 8.35 10.65 7.19 08.08 09.08 8.91 8.71 9.52 10.62 10.83 10.52
2001 11.87 11.32 10.09 9.2 9.28 10.94 8.58 08.02 7.1 8.14 8.42 09.06
2002 11.3 10.94 11 10.85 11.54 11.81 9.33 9.56 9.82 11.33 10.34 9.69
2003 10.8 11.15 12.25 11.18 11.73 11.67 12.99 13.5 13.69 15.02 15.3 15.77
2004 17.41 17.78 18.01 18.5 19.3 20.68 22.34 20.56 21.61 25.14 26.75 29.65
2005 25.67 24.63 24.56 23.54 26.05 24.56 24.98 23.31 23.82 26.89 28.95 28.54
2006 30.14 34.54 35.78 35.44 33.9 35.91 32.55 29.49 32.38 35.9 33.56 33.68
2007 33.22 29.38 29.82 29.5 27.4 24.95 25.37 26.2 24.91 25.37 22.24 19.46
2008 17.98 17.1 16.64
start(sbux.ts) [1] 1993 3
end(sbux.ts) [1] 2008 3
frequency(sbux.ts) [1] 12
Subconjunto de la serie de tiempo
tmp = sbux.ts[1:5]
class(tmp)[1] "character"
tmp = window(sbux.ts, start=c(1993, 3), end=c(1993,8))
class(tmp)[1] "ts"
Combinando dos series (dos columnas)
sbuxmsft.ts = cbind(sbux.ts, msft.ts)
class(sbuxmsft.ts) [1] "mts" "ts" "matrix" "array"
- mts: multiple time series
Seleccionando las primeras 5 filas
window(sbuxmsft.ts, start=c(1993, 3), end=c(1993,7)) sbux.ts msft.ts
Mar 1993 1.13 1.849.489
Apr 1993 1.15 1.859.486
May 1993 1.43 1.794.505
Jun 1993 1.46 1.829.494
Jul 1993 1.41 1.794.505
Plot objeto ts
plot(sbux.ts, col="blue", lwd=2, ylab="Adjusted close",
main="Monthly closing price of SBUX") Dibujar un subconjunto (acercar)
library(TSstudio)
autoplot(window(sbux.ts, start=c(2000,3), end=c(2008,3)),
ylab="Adjusted close",col="blue", lwd=2,
main="Monthly closing price of SBUX") Plot para múltiples columnas
Caso 1. En gráficos diferentes
autoplot(sbuxmsft.ts) Warning in data.frame(y = as.numeric(c(object)), x =
rep(as.numeric(time(object)), : NAs introducidos por coerción
Caso 2. En el mismo gráfico
plot(sbuxmsft.ts, plot.type="single", lwd=2, ylim=c(0,180),
main="Monthly closing prices on SBUX and MSFT",
ylab="Adjusted close price",
col=c("blue", "red"), lty=1:2)Warning in xy.coords(x = matrix(rep.int(tx, k), ncol = k), y = x, log = log, :
NAs introducidos por coerción
Warning in xy.coords(x, y): NAs introducidos por coerción
legend(1994, 35, legend=c("SBUX","MSFT"), col=c("blue", "red"),
lty=1:2)- Activar librería zoo
library(zoo) Fecha
td = seq(as.Date("1993/3/1"), as.Date("2008/3/1"), "months")
class(td) [1] "Date"
head(td) [1] "1993-03-01" "1993-04-01" "1993-05-01" "1993-06-01" "1993-07-01"
[6] "1993-08-01"
Alternativa
td2 = as.Date(sbux.df$Date, format="%m/%d/%Y")
head(td2) [1] "1993-03-31" "1993-04-01" "1993-05-03" "1993-06-01" "1993-07-01"
[6] "1993-08-02"
Combinando el índice de tiempo a las dos series de precios
sbux.z = zoo(x=sbux.df$Adj.Close, order.by=td)
msft.z = zoo(x=msft.df$Adj.Close, order.by=td) class(sbux.z) [1] "zoo"
str(sbux.z) 'zoo' series from 1993-03-01 to 2008-03-01
Data: chr [1:181] "1.13" "1.15" "1.43" "1.46" "1.41" "1.44" "1.63" "1.59" "1.32" ...
Index: Date[1:181], format: "1993-03-01" "1993-04-01" "1993-05-01" "1993-06-01" "1993-07-01" ...
head(sbux.z) 1993-03-01 1993-04-01 1993-05-01 1993-06-01 1993-07-01 1993-08-01
1.13 1.15 1.43 1.46 1.41 1.44
Extrayendo el índice de tiempo y los datos
index(sbux.z) [1] "1993-03-01" "1993-04-01" "1993-05-01" "1993-06-01" "1993-07-01"
[6] "1993-08-01" "1993-09-01" "1993-10-01" "1993-11-01" "1993-12-01"
[11] "1994-01-01" "1994-02-01" "1994-03-01" "1994-04-01" "1994-05-01"
[16] "1994-06-01" "1994-07-01" "1994-08-01" "1994-09-01" "1994-10-01"
[21] "1994-11-01" "1994-12-01" "1995-01-01" "1995-02-01" "1995-03-01"
[26] "1995-04-01" "1995-05-01" "1995-06-01" "1995-07-01" "1995-08-01"
[31] "1995-09-01" "1995-10-01" "1995-11-01" "1995-12-01" "1996-01-01"
[36] "1996-02-01" "1996-03-01" "1996-04-01" "1996-05-01" "1996-06-01"
[41] "1996-07-01" "1996-08-01" "1996-09-01" "1996-10-01" "1996-11-01"
[46] "1996-12-01" "1997-01-01" "1997-02-01" "1997-03-01" "1997-04-01"
[51] "1997-05-01" "1997-06-01" "1997-07-01" "1997-08-01" "1997-09-01"
[56] "1997-10-01" "1997-11-01" "1997-12-01" "1998-01-01" "1998-02-01"
[61] "1998-03-01" "1998-04-01" "1998-05-01" "1998-06-01" "1998-07-01"
[66] "1998-08-01" "1998-09-01" "1998-10-01" "1998-11-01" "1998-12-01"
[71] "1999-01-01" "1999-02-01" "1999-03-01" "1999-04-01" "1999-05-01"
[76] "1999-06-01" "1999-07-01" "1999-08-01" "1999-09-01" "1999-10-01"
[81] "1999-11-01" "1999-12-01" "2000-01-01" "2000-02-01" "2000-03-01"
[86] "2000-04-01" "2000-05-01" "2000-06-01" "2000-07-01" "2000-08-01"
[91] "2000-09-01" "2000-10-01" "2000-11-01" "2000-12-01" "2001-01-01"
[96] "2001-02-01" "2001-03-01" "2001-04-01" "2001-05-01" "2001-06-01"
[101] "2001-07-01" "2001-08-01" "2001-09-01" "2001-10-01" "2001-11-01"
[106] "2001-12-01" "2002-01-01" "2002-02-01" "2002-03-01" "2002-04-01"
[111] "2002-05-01" "2002-06-01" "2002-07-01" "2002-08-01" "2002-09-01"
[116] "2002-10-01" "2002-11-01" "2002-12-01" "2003-01-01" "2003-02-01"
[121] "2003-03-01" "2003-04-01" "2003-05-01" "2003-06-01" "2003-07-01"
[126] "2003-08-01" "2003-09-01" "2003-10-01" "2003-11-01" "2003-12-01"
[131] "2004-01-01" "2004-02-01" "2004-03-01" "2004-04-01" "2004-05-01"
[136] "2004-06-01" "2004-07-01" "2004-08-01" "2004-09-01" "2004-10-01"
[141] "2004-11-01" "2004-12-01" "2005-01-01" "2005-02-01" "2005-03-01"
[146] "2005-04-01" "2005-05-01" "2005-06-01" "2005-07-01" "2005-08-01"
[151] "2005-09-01" "2005-10-01" "2005-11-01" "2005-12-01" "2006-01-01"
[156] "2006-02-01" "2006-03-01" "2006-04-01" "2006-05-01" "2006-06-01"
[161] "2006-07-01" "2006-08-01" "2006-09-01" "2006-10-01" "2006-11-01"
[166] "2006-12-01" "2007-01-01" "2007-02-01" "2007-03-01" "2007-04-01"
[171] "2007-05-01" "2007-06-01" "2007-07-01" "2007-08-01" "2007-09-01"
[176] "2007-10-01" "2007-11-01" "2007-12-01" "2008-01-01" "2008-02-01"
[181] "2008-03-01"
coredata(sbux.z) [1] "1.13" "1.15" "1.43" "1.46" "1.41" "1.44" "1.63" "1.59" "1.32"
[10] "1.32" "1.43" "1.38" "1.45" "1.77" "1.69" "1.5" "1.72" "1.68"
[19] "1.37" "1.61" "1.59" "1.63" "1.43" "1.42" "1.43" "1.4" "1.73"
[28] "2.12" "2.22" "2.38" "2.25" "2.33" "2.51" "2.5" "1.99" "02.09"
[37] "2.77" "3.22" "3.22" "3.36" "03.09" "3.89" "3.92" "3.86" "4.12"
[46] "3.4" "04.07" "4" "3.52" "3.55" "3.74" "4.63" "4.87" "4.87"
[55] "4.97" "3.92" "4.15" "4.56" "4.35" "4.7" "5.39" "5.72" "5.71"
[64] "6.35" "4.98" "3.75" "4.3" "5.16" "5.48" "6.67" "6.19" "6.29"
[73] "6.67" "8.78" "8.77" "8.93" "5.53" "5.44" "5.89" "6.46" "6.31"
[82] "5.76" "7.61" "8.35" "10.65" "7.19" "08.08" "09.08" "8.91" "8.71"
[91] "9.52" "10.62" "10.83" "10.52" "11.87" "11.32" "10.09" "9.2" "9.28"
[100] "10.94" "8.58" "08.02" "7.1" "8.14" "8.42" "09.06" "11.3" "10.94"
[109] "11" "10.85" "11.54" "11.81" "9.33" "9.56" "9.82" "11.33" "10.34"
[118] "9.69" "10.8" "11.15" "12.25" "11.18" "11.73" "11.67" "12.99" "13.5"
[127] "13.69" "15.02" "15.3" "15.77" "17.41" "17.78" "18.01" "18.5" "19.3"
[136] "20.68" "22.34" "20.56" "21.61" "25.14" "26.75" "29.65" "25.67" "24.63"
[145] "24.56" "23.54" "26.05" "24.56" "24.98" "23.31" "23.82" "26.89" "28.95"
[154] "28.54" "30.14" "34.54" "35.78" "35.44" "33.9" "35.91" "32.55" "29.49"
[163] "32.38" "35.9" "33.56" "33.68" "33.22" "29.38" "29.82" "29.5" "27.4"
[172] "24.95" "25.37" "26.2" "24.91" "25.37" "22.24" "19.46" "17.98" "17.1"
[181] "16.64"
Start and End
start(sbux.z) [1] "1993-03-01"
end(sbux.z)[1] "2008-03-01"
- Ventaja de zoo: extraer subconjunto indexando con las fechas
sbux.z[as.Date(c("2000/3/1", "2003/3/1"))]2000-03-01 2003-03-01
10.65 12.25
- window() es una alternativa funcional
window(sbux.z, start=as.Date("2000/3/1"), end=as.Date("2003/3/1")) 2000-03-01 2000-04-01 2000-05-01 2000-06-01 2000-07-01 2000-08-01 2000-09-01
10.65 7.19 08.08 09.08 8.91 8.71 9.52
2000-10-01 2000-11-01 2000-12-01 2001-01-01 2001-02-01 2001-03-01 2001-04-01
10.62 10.83 10.52 11.87 11.32 10.09 9.2
2001-05-01 2001-06-01 2001-07-01 2001-08-01 2001-09-01 2001-10-01 2001-11-01
9.28 10.94 8.58 08.02 7.1 8.14 8.42
2001-12-01 2002-01-01 2002-02-01 2002-03-01 2002-04-01 2002-05-01 2002-06-01
09.06 11.3 10.94 11 10.85 11.54 11.81
2002-07-01 2002-08-01 2002-09-01 2002-10-01 2002-11-01 2002-12-01 2003-01-01
9.33 9.56 9.82 11.33 10.34 9.69 10.8
2003-02-01 2003-03-01
11.15 12.25
Combinando dos series
sbuxmsft.z = cbind(sbux.z, msft.z)
class(sbuxmsft.z) [1] "zoo"
head(sbuxmsft.z) sbux.z msft.z
1993-03-01 1.13 1.849.489
1993-04-01 1.15 1.859.486
1993-05-01 1.43 1.794.505
1993-06-01 1.46 1.829.494
1993-07-01 1.41 1.794.505
1993-08-01 1.44 1.804.501
Plot
plot(sbux.z, col="blue", lty=1, lwd=2, ylim=c(0,50),main="Monthly closing prices of SBUX and MFST",
ylab="Adjusted close price")
lines(msft.z, col="red", lty=2, lwd=2)Warning in xy.coords(x, y): NAs introducidos por coerción
legend(x="topleft", legend=c("SBUX","MSFT"), col=c("blue","red"),
lty=1:2)Alternativa (Las dos a la vez)
plot(sbuxmsft.z, plot.type="single", col=c("blue","red"), lty=1:2,
lwd=2,ylim=c(0,50),main="Monthly closing prices of SBUX and MFST",
ylab="Adjusted close price")Warning in xy.coords(x, y): NAs introducidos por coerción
legend(x="topleft", legend=c("SBUX","MSFT"), col=c("blue","red"),
lty=1:2) Importar datos directamente como objeto zoo
sbux.z2 = read.zoo("sbuxPrices.csv",
format="%m/%d/%Y", sep=",", header=T) Importar datos de Yahoo Finance
library(tseries)
SBUX.z = get.hist.quote(instrument="sbux", start="1993-03-01",
end="2020-06-01", quote="AdjClose",
provider="yahoo", origin="1970-01-01",
compression="d", retclass="zoo") time series ends 2020-05-29
View(SBUX.z)
MSFT.z = get.hist.quote(instrument="msft", start="1993-03-01",
end="2020-06-01", quote="AdjClose",
provider="yahoo", origin="1970-01-01",
compression="d", retclass="zoo") time series ends 2020-05-29
Plot
plot(cbind(SBUX.z,MSFT.z), plot.type="single", col=c("blue","red"), lty=1:2,
lwd=2,main="Monthly closing prices of SBUX and MFST",
ylab="Adjusted close price")
legend(x="topleft", legend=c("SBUX","MSFT"), col=c("blue","red"),
lty=1:2)Librería dygraphs
library(dygraphs)
dygraph(SBUX.z, "Monthly closing prices of SBUX")dygraph(cbind(SBUX.z,MSFT.z), "Monthly closing prices of SBUX and MFST")- 3 Datos diarios
Generamos datos aleatorios
datos <- rnorm(78, 0, 10)
fechas <- seq(as.Date("2020-03-06"), as.Date("2020-05-22"), by = "day")
as.numeric(format(fechas[1], "%j"))[1] 66
miserie.ts<-ts(datos,start=c(2016,66), frequency=365)
plot(miserie.ts)library(zoo)
miserie.z=zoo(datos, fechas)
plot(miserie.z)dygraph(miserie.z)