#import library
library(TSstudio)
En este capitulo se habla de como a el pasar del tiempo hay una
diferencia de pasajeros , usando funciones como la familia de modelos
ARIMA, que es uno de los enfoques centrales
Pronósticos a partir de datos de series de tiempo. Está maneja series
temporales tanto estacionales como no estacionales agregar información o
cambiar los componentes del modelo. Además, hemos visto aplicaciones
Diagramas ACF y PACF para identificar el tipo de proceso (por ejemplo,
AR, MA, ARMA etc) y su orden. Aunque el conocimiento del proceso de
ajuste de los modelos ARIMA es esencial, en la práctica, p. A medida
que aumenta el número de conjuntos de pronósticos, es posible que desee
automatizar este proceso. Él La función auto.arima es uno de los
enfoques más comunes de R para pronosticar con ARIMA. modelos porque
puede escalar cuando se tienen que predecir decenas de series. Por
último, pero no menos importante, vimos aplicaciones de regresión lineal
con el modelo de error ARIMA
para extraer la información faltante de los residuos del modelo.
set.seed(12345)
stationary_ts <- arima.sim(model = list(order = c(1,0,0),
ar = 0.5 ),
n = 500)
ts_plot(stationary_ts,
title = "Stationary Time Series",
Ytitle = "Value",
Xtitle = "Index")
3.se ve un aumento en los valores a el pasar de los indices , ts_ plot significalas series pueden tener diferentes bases de tiempo, pero deben tener la misma frecuencia
set.seed(12345)
non_stationary_ts <- arima.sim(model = list(order = c(1,1,0),ar = 0.3 ),n = 500)
ts_plot(non_stationary_ts,
title = "Non-Stationary Time Series",
Ytitle = "Value",
Xtitle = "Index")
data(AirPassengers)
ts_plot(AirPassengers,
title = "Monthly Airline Passenger Numbers 1949-1960",
Ytitle = "Thousands of Passengers",
Xtitle = "Year")
. Diferencia de miles de pasajeros en los anos transcurridos primera diferenciacion. donde va aumentado su intensidad con el pasar del tiempo
ts_plot(diff(AirPassengers, lag = 1),
title = "AirPassengers Series - First Differencing",
Xtitle = "Year",
Ytitle = "Differencing of Thousands of Passengers")
6.Diferencia de miles de pasajeros en los anos transcurridos primera y temporada diferenciacion , va por el cero y en 1960 es cuando mas pasajeros hubo .
ts_plot(diff(diff(AirPassengers, lag = 1), 12),
title = "AirPassengers Series - First and Seasonal Differencing",
Xtitle = "Year",
Ytitle = "Differencing of Thousands of Passengers")
7.Primera diferencia con transformacion logarÃtmica ano 1958 hubo menos flujo de pasajeros
ts_plot(diff(log(AirPassengers), lag = 1),
title = "AirPassengers Series - First Differencing with Log Transformation",
Xtitle = "Year",
Ytitle = "Differencing/Log of Thousands of Passengers")
set.seed(12345)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
p1 <- plot_ly()
p2 <- plot_ly()
for(i in 1:20){
rm <- NULL
rw <- arima.sim(model = list(order = c(0, 1, 0)), n = 500)
p1 <- p1 %>% add_lines(x = time(rw), y = as.numeric(rw))
p2 <- p2 %>% add_lines(x = time(diff(rw)), y = as.numeric(diff(rw)))
}
p1 %>% layout(title = "Simulate Random Walk",
yaxis = list(title = "Value"),
xaxis = list(title = "Index")) %>%
hide_legend()
p2 %>% layout(title = "Simulate Random Walk with First-Order Differencing",
yaxis = list(title = "Value"),
xaxis = list(title = "Index")) %>%
hide_legend()
set.seed(12345)
ar2 <- arima.sim(model = list(order = c(2,0,0),
ar = c(0.9, -0.3)),
n = 500)
ts_plot(ar2,
title = "Simulate AR(2) Series",
Ytitle = "Value",
Xtitle = "Index")
13.La secuencia de entrada es un proceso AR de segundo orden y proporciona una estimación bastante cercana Valor del coeficiente real, #1 = 0-88, gis = -0.20 (diferente del coeficiente real Valor del coeficiente, #1 = 0,1, Udy,
md_ar <- ar(ar2)
md_ar
##
## Call:
## ar(x = ar2)
##
## Coefficients:
## 1 2
## 0.8851 -0.2900
##
## Order selected 2 sigma^2 estimated as 1.049
14.se usaron las funciones acf y pacf para crear las dos graficas de series temporales de autorregulacion y si la salida de la ACF disminute gradualmente y la salida de la Partial ACF se corta en la variable p nos indica que es un proceso AR(p)
par(mfrow=c(1,2))
acf(ar2)
pacf(ar2)
15.
set.seed(12345)
ma2 <- arima.sim(model = list(order = c(0, 0, 2),
ma = c(0.5, -0.3)),
n = 500)
ts_plot(ma2,
title = "Simulate MA(2) Series",
Ytitle = "Value",
Xtitle = "Index")
md_ma <- arima(ma2, order = c(0,0,2), method = "ML")
md_ma
##
## Call:
## arima(x = ma2, order = c(0, 0, 2), method = "ML")
##
## Coefficients:
## ma1 ma2 intercept
## 0.530 -0.3454 0.0875
## s.e. 0.041 0.0406 0.0525
##
## sigma^2 estimated as 0.9829: log likelihood = -705.81, aic = 1419.62
18.La serie ma2 es un proceso MAA(2) , ya que la grafica ACF se corta en el segundo tramo y el PACF disminuye
par(mfrow=c(1,2))
acf(ma2)
pacf(ma2)
set.seed(12345)
arma <- arima.sim(model = list(order(1,0,2), ar = c(0.7), ma = c(0.5,-0.3)), n = 500)
20.el indice mas el promedio es el indice 14 casi lleganso a 6 y el menor 482 con mas de -4
ts_plot(arma,
title = "Simulate ARMA(1,2) Series",
Ytitle = "Value",
Xtitle = "Index")
arma_md <- arima(arma, order = c(1,0,2))
arma_md
##
## Call:
## arima(x = arma, order = c(1, 0, 2))
##
## Coefficients:
## ar1 ma1 ma2 intercept
## 0.7439 0.4785 -0.3954 0.2853
## s.e. 0.0657 0.0878 0.0849 0.1891
##
## sigma^2 estimated as 1.01: log likelihood = -713.18, aic = 1436.36
par(mfrow=c(1,2))
acf(arma)
pacf(arma)
23.
arima(arma, order = c(1, 0, 1), include.mean = FALSE)
##
## Call:
## arima(x = arma, order = c(1, 0, 1), include.mean = FALSE)
##
## Coefficients:
## ar1 ma1
## 0.4144 0.8864
## s.e. 0.0432 0.0248
##
## sigma^2 estimated as 1.051: log likelihood = -723, aic = 1452
arima(arma, order = c(2, 0, 1), include.mean = FALSE)
##
## Call:
## arima(x = arma, order = c(2, 0, 1), include.mean = FALSE)
##
## Coefficients:
## ar1 ar2 ma1
## 0.3136 0.1918 0.9227
## s.e. 0.0486 0.0484 0.0183
##
## sigma^2 estimated as 1.019: log likelihood = -715.26, aic = 1438.52
arima(arma, order = c(1, 0, 2), include.mean = FALSE)
##
## Call:
## arima(x = arma, order = c(1, 0, 2), include.mean = FALSE)
##
## Coefficients:
## ar1 ma1 ma2
## 0.7602 0.4654 -0.4079
## s.e. 0.0626 0.0858 0.0832
##
## sigma^2 estimated as 1.014: log likelihood = -714.27, aic = 1436.54
arima(arma, order = c(2, 0, 2), include.mean = FALSE)
##
## Call:
## arima(x = arma, order = c(2, 0, 2), include.mean = FALSE)
##
## Coefficients:
## ar1 ar2 ma1 ma2
## 0.7239 0.0194 0.4997 -0.3783
## s.e. 0.2458 0.1257 0.2427 0.2134
##
## sigma^2 estimated as 1.014: log likelihood = -714.26, aic = 1438.51
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
best_arma <- arima(arma, order = c(1, 0, 2), include.mean = FALSE)
checkresiduals(best_arma)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,2) with zero mean
## Q* = 5.3129, df = 7, p-value = 0.6218
##
## Model df: 3. Total lags used: 10
ar_fc <- forecast(md_ar, h = 100)
plot_forecast(ar_fc,
title = "Forecast AR(2) Model",
Ytitle = "Value",
Xtitle = "Year")
data(Coffee_Prices)
robusta_price <- window(Coffee_Prices[,1], start = c(2000, 1))
31.se puede decir que en el 2008 y en el 2011 el precio en estados unidos estuvo mas elevado
ts_plot(robusta_price,
title = "The Robusta Coffee Monthly Prices",
Ytitle = "Price in USD",
Xtitle = "Year")
acf(robusta_price)
33.
robusta_price_d1 <- diff(robusta_price)
par(mfrow=c(1,2))
acf(robusta_price_d1)
pacf(robusta_price_d1)
robusta_md <- arima(robusta_price, order = c(1, 1, 0))
robusta_md
##
## Call:
## arima(x = robusta_price, order = c(1, 1, 0))
##
## Coefficients:
## ar1
## 0.2780
## s.e. 0.0647
##
## sigma^2 estimated as 0.007142: log likelihood = 231.38, aic = -458.76
summary(robusta_md)
##
## Call:
## arima(x = robusta_price, order = c(1, 1, 0))
##
## Coefficients:
## ar1
## 0.2780
## s.e. 0.0647
##
## sigma^2 estimated as 0.007142: log likelihood = 231.38, aic = -458.76
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.002595604 0.08432096 0.06494772 0.08104715 4.254984 1.001542
## ACF1
## Training set 0.001526295
checkresiduals(robusta_md)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)
## Q* = 26.896, df = 23, p-value = 0.2604
##
## Model df: 1. Total lags used: 24
data(USgas)
ts_plot(USgas,
title = "US Monthly Natural Gas consumption",
Ytitle = "Billion Cubic Feet",
Xtitle = "Year")
USgas_split <- ts_split(USgas, sample.out = 12)
train <- USgas_split$train
test <- USgas_split$test
41.el ACF es una funcion que se utiliza para medir la correlacion entre los valores de una serie de tiempo en diferentes momentos en este caso se usa para identificar patrones o tendencias ,
par(mfrow=c(1,2))
acf(train, lag.max = 60)
pacf(train, lag.max = 60)
42.en el 2002 disminuyo pero en el 2018 fue donde mas subio el gas
USgas_d12 <- diff(train, 12)
ts_plot(USgas_d12,
title = "US Monthly Natural Gas consumption - First Seasonal Difference",
Ytitle = "Billion Cubic Feet (First Difference)",
Xtitle = "Year")
USgas_d12_1 <- diff(diff(USgas_d12, 1))
ts_plot(USgas_d12_1,
title = "US Monthly Natural Gas consumption - First Seasonal and Non-Seasonal Differencing",
Ytitle = "Billion Cubic Feet (Difference)",
Xtitle = "Year")
par(mfrow=c(1,2))
acf(USgas_d12_1, lag.max = 60)
pacf(USgas_d12_1, lag.max = 60)
45.
p <- q <- P <- Q <- 0:2
arima_grid <- expand.grid(p,q,P,Q)
names(arima_grid) <- c("p", "q", "P", "Q")
arima_grid$d <- 1
arima_grid$D <- 1
arima_grid$k <- rowSums(arima_grid)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
arima_grid <- arima_grid %>% filter(k <= 7)
arima_search <- lapply(1:nrow(arima_grid), function(i){
md <- NULL
# md <- arima(train, order = c(arima_grid$p[i], 1, arima_grid$q[i]),
# seasonal = list(order = c(arima_grid$P[i], 1, arima_grid$Q[i])))
# results <- data.frame(p = arima_grid$p[i], d = 1, q = arima_grid$q[i],
# P = arima_grid$P[i], D = 1, Q = arima_grid$Q[i],
# AIC = md$aic)
})
# %>% bind_rows() %>% arrange(AIC)
arima_search <- data.frame(p=rep(0,81), d=rep(0,81), q=rep(0,81))
head(arima_search)
## p d q
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
USgas_best_md <- arima(train, order = c(1,1,1), seasonal = list(order = c(2,1,1)))
USgas_best_md
##
## Call:
## arima(x = train, order = c(1, 1, 1), seasonal = list(order = c(2, 1, 1)))
##
## Coefficients:
## ar1 ma1 sar1 sar2 sma1
## 0.4247 -0.9180 0.0132 -0.2639 -0.7449
## s.e. 0.0770 0.0376 0.0894 0.0834 0.0753
##
## sigma^2 estimated as 10160: log likelihood = -1292.96, aic = 2597.91
library(forecast)
USgas_test_fc <- forecast(USgas_best_md, h = 12)
accuracy(USgas_test_fc, test)
## ME RMSE MAE MPE MAPE MASE
## Training set 6.081099 97.85701 73.36854 0.1298714 3.517097 0.6371821
## Test set 42.211253 104.79281 83.09943 1.4913412 3.314280 0.7216918
## ACF1 Theil's U
## Training set 0.004565602 NA
## Test set -0.049999868 0.3469228
test_forecast(USgas,
forecast.obj = USgas_test_fc,
test = test)
final_md <- arima(USgas, order = c(1,1,1), seasonal = list(order = c(2,1,1)))
checkresiduals(final_md)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,1)(2,1,1)[12]
## Q* = 30.173, df = 19, p-value = 0.04964
##
## Model df: 5. Total lags used: 24
USgas_fc <- forecast(final_md, h = 12)
plot_forecast(USgas_fc,
title = "US Natural Gas Consumption - Forecast",
Ytitle = "Billion Cubic Feet",
Xtitle = "Year")
USgas_auto_md1 <- auto.arima(train)
USgas_auto_md1
## Series: train
## ARIMA(2,1,1)(2,1,1)[12]
##
## Coefficients:
## ar1 ar2 ma1 sar1 sar2 sma1
## 0.4301 -0.0372 -0.9098 0.0117 -0.2673 -0.7431
## s.e. 0.0794 0.0741 0.0452 0.0887 0.0830 0.0751
##
## sigma^2 = 10446: log likelihood = -1292.83
## AIC=2599.67 AICc=2600.22 BIC=2623.2
USgas_auto_md2 <- auto.arima(train,
max.order = 5,
D = 1,
d = 1,
ic = "aic",
stepwise = FALSE,
approximation = FALSE)
USgas_auto_md2
## Series: train
## ARIMA(1,1,1)(2,1,1)[12]
##
## Coefficients:
## ar1 ma1 sar1 sar2 sma1
## 0.4247 -0.9180 0.0132 -0.2639 -0.7449
## s.e. 0.0770 0.0376 0.0894 0.0834 0.0753
##
## sigma^2 = 10405: log likelihood = -1292.96
## AIC=2597.91 AICc=2598.32 BIC=2618.08
df <- ts_to_prophet(AirPassengers) %>% setNames(c("date", "y"))
df$lag12 <- dplyr::lag(df$y, n = 12)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
df$month <- factor(month(df$date, label = TRUE), ordered = FALSE)
df$trend <- 1:nrow(df)
par <- ts_split(ts.obj = AirPassengers, sample.out = 12)
train <- par$train
test <- par$test
train_df <- df[1:(nrow(df) - 12), ]
test_df <- df[(nrow(df) - 12 + 1):nrow(df), ]
md1 <- tslm(train ~ season + trend + lag12, data = train_df)
checkresiduals(md1)
##
## Breusch-Godfrey test for serial correlation of order up to 24
##
## data: Residuals from Linear regression model
## LM test = 83.147, df = 24, p-value = 1.903e-08
md2 <- auto.arima(train,
xreg = cbind(model.matrix(~ month,train_df)[,-1],
train_df$trend,
train_df$lag12),
seasonal = TRUE,
stepwise = FALSE,
approximation = FALSE)
summary(md2)
## Series: train
## Regression with ARIMA(2,0,0)(2,0,0)[12] errors
##
## Coefficients:
## ar1 ar2 sar1 sar2 monthfeb monthmar monthabr monthmay
## 0.5849 0.3056 -0.4421 -0.2063 -2.7523 0.8231 0.2066 2.8279
## s.e. 0.0897 0.0903 0.1050 0.1060 1.8417 2.3248 2.4162 2.5560
## monthjun monthjul monthago monthsept monthoct monthnov monthdic
## 6.6057 11.2337 12.1909 3.8269 0.6350 -2.2723 -0.9918
## s.e. 3.2916 4.2324 4.1198 2.9243 2.3405 2.4211 1.9172
##
## 0.2726 1.0244
## s.e. 0.1367 0.0426
##
## sigma^2 = 72.82: log likelihood = -426.93
## AIC=889.86 AICc=896.63 BIC=940.04
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4117148 8.353629 6.397538 0.2571543 2.549682 0.2100998
## ACF1
## Training set 0.005966712
checkresiduals(md2)
##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(2,0,0)(2,0,0)[12] errors
## Q* = 25.816, df = 20, p-value = 0.172
##
## Model df: 4. Total lags used: 24
fc1 <- forecast(md1, newdata = test_df)
fc2 <- forecast(md2, xreg = cbind(model.matrix(~ month,test_df)[,-1],
test_df$trend,
test_df$lag12))
x <- fc2
accuracy(fc1, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -1.894492e-15 13.99206 11.47136 -0.1200023 4.472154 0.3541758
## Test set 1.079611e+01 20.82782 18.57391 1.9530865 3.828115 0.5734654
## ACF1 Theil's U
## Training set 0.7502578 NA
## Test set 0.1653119 0.4052175
accuracy(fc2, test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4117148 8.353629 6.397538 0.2571543 2.549682 0.2100998
## Test set -11.2123706 17.928195 13.353910 -2.4898843 2.924174 0.4385521
## ACF1 Theil's U
## Training set 0.005966712 NA
## Test set -0.325044929 0.3923795