library(tidyverse)
library(dplyr)
library(tidyquant)
library(ggplot2)
library(forecast)
library(TSstudio)
library(quantmod)
library(tsibble)
library(plotly)
library(tseries)Asignación 3
Series de Tiempo
Cargamos las librerías necesarias
Acciones de Tecnoglass
stock <- "TGLS"
end_date <- Sys.Date()
start_date <- end_date - 365
getSymbols(stock, src = "yahoo", from = start_date, to = end_date)[1] "TGLS"
tgls1 <- as.data.frame(TGLS)
head(tgls1) TGLS.Open TGLS.High TGLS.Low TGLS.Close TGLS.Volume TGLS.Adjusted
2023-04-19 44.46 44.72 43.65 44.58 231100 44.20089
2023-04-20 44.59 46.49 44.59 46.39 445500 45.99550
2023-04-21 46.44 46.79 45.63 46.30 282100 45.90626
2023-04-24 46.69 47.63 44.95 45.63 568700 45.24196
2023-04-25 45.19 45.66 43.76 44.67 385900 44.29012
2023-04-26 44.59 44.81 41.58 41.96 463300 41.60317
tgls1$Date <- time(TGLS)
colnames(tgls1) <- c("open", "high", "low", "close", "volume", "adjusted", "date")
rownames(tgls1) <- NULL
head(tgls1) open high low close volume adjusted date
1 44.46 44.72 43.65 44.58 231100 44.20089 2023-04-19
2 44.59 46.49 44.59 46.39 445500 45.99550 2023-04-20
3 46.44 46.79 45.63 46.30 282100 45.90626 2023-04-21
4 46.69 47.63 44.95 45.63 568700 45.24196 2023-04-24
5 45.19 45.66 43.76 44.67 385900 44.29012 2023-04-25
6 44.59 44.81 41.58 41.96 463300 41.60317 2023-04-26
plot_ly(tgls1, x = ~date, y = ~close, type = 'scatter', mode = 'lines') %>%
layout(title = "Precio de las acciones de Tecnoglass",
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precio en USD"),
hovermode = "x")De acuerdo con la gráfica, a pesar de la volatilidad en los precios, se puede observar una tendencia general en el alza de los precios de las acciones. Además, parece exhibir ciertos patrones estacionales, posiblemente asociados a eventos o noticias específicas relacionadas con la empresa o el mercado.
Prueba de estacionariedad
result <- adf.test(tgls1$close)
cat('ADF Statistic:', result$statistic, '\n')ADF Statistic: -1.108038
cat('p-value:', result$p.value, '\n')p-value: 0.9196713
De acuerdo con la prueba de estacionariedad de Dickey-Fuller, no hay evidencia suficiente para afirmar que la serie de tiempo es estacionaria.
par(mfrow=c(1,2))
acf(tgls1$close, lag.max = 60)
pacf(tgls1$close, lag.max = 60)Se observa que los valores de autocorrelación decaen lentamente hacia cero, lo que sugiere la presencia de una tendencia. También se observa que algunos valores de autocorrelación están fuera de los límites de confianza, indicando autocorrelación significativa en ciertos rezagos.
En la gráfica parcial de autocorrelación, se observa que los valores decaen rápidamente después del primer rezago, cuyo proceso autorregresivo puede ser de ese orden.
Entrenamiento y prueba
n_tgls <- length(tgls1$close)
n_test <- 28
ntrain <- n_tgls - n_test# Entrenamiento
train <- tgls1$close[1:ntrain]
dates_train <- tgls1$date[1:ntrain]
# Prueba
test<- tgls1$close[(ntrain + 1):(ntrain+ n_test)]
dates <- tgls1$date[(ntrain + 1):(ntrain + n_test)]
cat("train:", length(train), "\n")train: 223
cat("test:", length(test), "\n")test: 28
close_ts <- ts(train)
ctest_ts <- ts(test)Criterio AIC
best_aic <- Inf
best_order <- NULL
best_mdl <- NULL
pq_rng <- 0:4
d_rng <- 0:2
for (i in pq_rng) {
for (d in d_rng) {
for (j in pq_rng) {
tryCatch({
tmp_mdl <- tryCatch(Arima(train, order = c(i, d, j)), error = function(e) NULL)
if (!is.null(tmp_mdl)) {
tmp_aic <- AIC(tmp_mdl)
if (tmp_aic < best_aic) {
best_aic <- tmp_aic
best_order <- c(i, d, j)
best_mdl <- tmp_mdl
}
}
}, error = function(e) NULL)
}
}
}
cat("AIC:", best_aic, " | order:", best_order, "\n")AIC: 738.319 | order: 2 1 2
De acuerdo con la función, el mejor modelo ARIMA para este stock es el (1, 1, 1), donde se incluye 1 término autoregresivo, 1 diferenciación y 1 media móvil.
Rolling forecast
arima_rolling <- function(history, test, best_order) {
predictions <- numeric(length(test))
for (t in 1:length(test)) {
model <- Arima(history, order=best_order)
model_fit <- forecast(model)
yhat <- model_fit$mean[1]
predictions[t] <- yhat
obs <- test[t]
history <- c(history, obs)
cat("predicted=", yhat, ", expected=", obs, "\n")
}
return(predictions)
}
best_order= c(1, 1 ,1)pred <- arima_rolling(train, test, best_order)predicted= 45.20239 , expected= 44.88
predicted= 44.7239 , expected= 44.65
predicted= 44.73623 , expected= 45
predicted= 44.97916 , expected= 45.58
predicted= 45.66614 , expected= 45.46
predicted= 45.38237 , expected= 45.8
predicted= 45.89851 , expected= 45.42
predicted= 45.3017 , expected= 47.55
predicted= 47.89149 , expected= 50.43
predicted= 50.54942 , expected= 52.9
predicted= 53.15415 , expected= 52.26
predicted= 52.0004 , expected= 53.47
predicted= 53.80406 , expected= 53.6
predicted= 53.39487 , expected= 52.68
predicted= 52.69873 , expected= 52.03
predicted= 51.9321 , expected= 51.55
predicted= 51.55101 , expected= 50.75
predicted= 50.64011 , expected= 52.58
predicted= 52.90057 , expected= 52.46
predicted= 52.23494 , expected= 53.94
predicted= 54.27125 , expected= 55.44
predicted= 55.4146 , expected= 58.41
predicted= 58.82141 , expected= 59
predicted= 58.83031 , expected= 59
predicted= 59.10186 , expected= 57.67
predicted= 57.42979 , expected= 58.72
predicted= 58.99654 , expected= 57.8
predicted= 57.52836 , expected= 57.15
# Predicciones para 7 días
pred_7d <- head(pred, 7)
# Predicciones para 14 días
pred_14d <- head(pred, 14)
# Predicciones para 21 días
pred_21d <- head(pred, 21)
# Predicciones para 28 días
pred_28d <- predVentana de 7 días
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 7)
rf_7 <- data.frame(date = forecast_dates, prediction = as.numeric(pred_7d))p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = rf_7$date, y = rf_7$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pVentana de 14 días
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 14)
rf_14 <- data.frame(date = forecast_dates, prediction = as.numeric(pred_14d))p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = rf_14$date, y = rf_14$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pVentana de 21 días
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 21)
rf_21 <- data.frame(date = forecast_dates, prediction = as.numeric(pred_21d))p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = rf_21$date, y = rf_21$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pVentana de 28 días
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 28)
rf_28 <- data.frame(date = forecast_dates, prediction = as.numeric(pred_28d))p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = rf_28$date, y = rf_28$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pLa función de rolling forecast permite generar predicciones de precios futuros de una manera similar a los datos de test. Sugiere entonces una tendencia alcista en los precios de Tecnoglass para el período pronosticado, siguiendo un patrón similar al de los datos de prueba.
Forecast 7 días
model <- arima(train, order = c(1, 1, 1))
p7 <- forecast(model, h = 7)
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 7)
f_7 <- data.frame(date = forecast_dates, prediction = as.numeric(p7$mean))# Agregar datos de entrenamiento
p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = f_7$date, y = f_7$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pForecast 14 días
model <- arima(train, order = c(1, 1, 1))
p14 <- forecast(model, h = 14)
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 14)
f_14 <- data.frame(date = forecast_dates, prediction = as.numeric(p14$mean))# Agregar datos de entrenamiento
p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = f_14$date, y = f_14$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pForecast 21 días
model <- arima(train, order = c(1, 1, 1))
p21 <- forecast(model, h = 21)
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 21)
f_21 <- data.frame(date = forecast_dates, prediction = as.numeric(p21$mean))# Agregar datos de entrenamiento
p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = f_21$date, y = f_21$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pForecast 28 días
model <- arima(train, order = c(1, 1, 1))
p28 <- forecast(model, h = 28)
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 28)
f_28 <- data.frame(date = forecast_dates, prediction = as.numeric(p28$mean))# Agregar datos de entrenamiento
p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = f_28$date, y = f_28$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pSe observa una tendencia lineal en estos valores predichos. Por lo tanto, no arrojan predicciones precisas o similares a las de los datos de test.
Métricas - AIC
Rolling forecast
m7d <- accuracy(pred_7d, test[1:7])
m14d <- accuracy(pred_14d, test[1:14])
m21d <- accuracy(pred_21d, test[1:21])
m28d <- accuracy(pred_28d, test)metricas <- rbind( m7d,m14d,m21d,m28d)
rownames(metricas) <- c("RF 7 días", "RF 14 días", "RF 21 días", "RF 28 días")
metricas ME RMSE MAE MPE MAPE
RF 7 días 0.02875578 0.3752826 0.3375970 0.06076811 0.7438631
RF 14 días 0.49965723 1.2398698 0.9130892 0.99504384 1.8292595
RF 21 días 0.45316664 1.2147340 0.9471117 0.87831126 1.8577546
RF 28 días 0.39798651 1.2720903 0.9832144 0.75518742 1.8622734
Los valores de las métricas son cercanos a 0 en su mayoría, lo que sugiere que estos valores predichos se ajustan muy bien al modelo. Se evidencia también que la ventana de predicción de 21 días es más óptima que la de 14 y que la de 28 , mostrando que no siempre a mayor ventana, mejor es la predicción. En general, el modelo ARIMA (1, 1, 1) con rolling forecast se ajusta y predice muy bien los próximos datos.
Correlación
corr <- data.frame(predicted = c(44.3475, 45.20247, 44.72358, 44.73669, 44.97872, 45.66639, 45.38212, 45.8987, 45.30155, 47.89117, 50.54834, 53.15385, 52.00039, 53.80314, 53.39438, 52.69978, 51.93156, 51.55163, 50.64, 52.89992, 52.23478, 54.27109, 55.41371, 58.82054, 58.83012, 59.10253, 57.43018, 58.99618),
expected = c(45.01, 44.88, 44.65, 45, 45.58, 45.46, 45.8, 45.42, 47.55, 50.43, 52.9, 52.26, 53.47, 53.6, 52.68, 52.03, 51.55, 50.75, 52.58, 52.46, 53.94, 55.44, 58.41, 59, 59, 57.67, 58.72, 57.8))
correlation <- cor(corr$predicted, corr$expected)modelo <- lm(predicted ~ expected, data = corr)
corr_p <- plot_ly(corr, x = ~expected, y = ~predicted, type = "scatter", mode = "markers", name = "Predichos") %>%
add_lines(x = ~expected, y = modelo$fitted.values, line = list(color = 'red'), name = "Tendencia") %>%
layout(title = "Gráfico de Correlación entre Valor Esperado y Rolling Forecast",
xaxis = list(title = "Valor Esperado"),
yaxis = list(title = "Predicción"),
legend = list(title = "Leyendas",
traceorder = 'normal',
font = list(size = 12),
bgcolor = '#E2E2E2',
bordercolor = '#FFFFFF',
borderwidth = 2))
corr_pSe observa que la correlación es de 0.96, lo que demuestra que los datos predichos y los reales presentan una fuerte relación lineal entre ellas.
Forecast()
p7_m<- accuracy(p7, test[1:7])
p14_m <- accuracy(p14, test[1:14])
p21_m <- accuracy(p21, test[1:21])
p28_m <- accuracy(p28, test)
metricas2 <- rbind(
"p7" = p7_m,
"p14" = p14_m,
"p21" = p21_m,
"p28" = p28_m
)
metricas2 ME RMSE MAE MPE MAPE MASE
Training set 0.002475031 1.2571302 0.8934521 -0.0376349 2.1856215 0.9739572
Test set 0.119575837 0.4103461 0.3807112 0.2568575 0.8398704 0.4150155
Training set 0.002475031 1.2571302 0.8934521 -0.0376349 2.1856215 0.9739572
Test set 3.415874734 4.9619268 3.5464424 6.5335970 6.8251035 3.8659971
Training set 0.002475031 1.2571302 0.8934521 -0.0376349 2.1856215 0.9739572
Test set 4.793616063 6.0091211 4.8806612 9.1113262 9.3056638 5.3204365
Training set 0.002475031 1.2571302 0.8934521 -0.0376349 2.1856215 0.9739572
Test set 6.875345722 8.3803761 6.9406296 12.4620709 12.6078241 7.5660198
ACF1
Training set -0.01493599
Test set NA
Training set -0.01493599
Test set NA
Training set -0.01493599
Test set NA
Training set -0.01493599
Test set NA
Los valores de las métricas para el conjunto de prueba sugieren que el modelo no está generalizando bien y puede requerir ajustes o una mayor cantidad de datos de entrenamiento para mejorar su rendimiento predictivo con la función de forecast().
Criterio BIC
best_bic <- Inf
best_order <- NULL
best_mdl <- NULL
pq_rng <- 0:4
d_rng <- 0:2
for (i in pq_rng) {
for (d in d_rng) {
for (j in pq_rng) {
tryCatch({
tmp_mdl <- tryCatch(Arima(train, order = c(i, d, j)), error = function(e) NULL)
if (!is.null(tmp_mdl)) {
tmp_bic <- BIC(tmp_mdl) # Calcular el BIC del modelo
if (tmp_bic < best_bic) {
best_bic <- tmp_bic
best_order <- c(i, d, j)
best_mdl <- tmp_mdl
}
}
}, error = function(e) NULL)
}
}
}
cat("BIC:", best_bic, " | order:", best_order, "\n")BIC: 743.5439 | order: 0 1 0
De acuerdo con el criterio BIC, el mejor modelo ARIMA es el (0, 1, 0). Donde no se incluyen términos autoregresivos, 1 diferenciación y tampoco medias móviles.
Rolling forecast
arima_rolling2 <- function(history, test, best_order) {
predictions2 <- numeric(length(test))
for (t in 1:length(test)) {
model <- Arima(history, order=best_order)
model_fit <- forecast(model)
yhat <- model_fit$mean[1]
predictions2[t] <- yhat
obs <- test[t]
history <- c(history, obs)
cat("predicted=", yhat, ", expected=", obs, "\n")
}
return(predictions2)
}
best_order2= c(0, 1 ,0)pred2 <- arima_rolling2(train, test, best_order2)predicted= 45.01 , expected= 44.88
predicted= 44.88 , expected= 44.65
predicted= 44.65 , expected= 45
predicted= 45 , expected= 45.58
predicted= 45.58 , expected= 45.46
predicted= 45.46 , expected= 45.8
predicted= 45.8 , expected= 45.42
predicted= 45.42 , expected= 47.55
predicted= 47.55 , expected= 50.43
predicted= 50.43 , expected= 52.9
predicted= 52.9 , expected= 52.26
predicted= 52.26 , expected= 53.47
predicted= 53.47 , expected= 53.6
predicted= 53.6 , expected= 52.68
predicted= 52.68 , expected= 52.03
predicted= 52.03 , expected= 51.55
predicted= 51.55 , expected= 50.75
predicted= 50.75 , expected= 52.58
predicted= 52.58 , expected= 52.46
predicted= 52.46 , expected= 53.94
predicted= 53.94 , expected= 55.44
predicted= 55.44 , expected= 58.41
predicted= 58.41 , expected= 59
predicted= 59 , expected= 59
predicted= 59 , expected= 57.67
predicted= 57.67 , expected= 58.72
predicted= 58.72 , expected= 57.8
predicted= 57.8 , expected= 57.15
# Predicciones para 7 días
pred2_7d <- head(pred2, 7)
# Predicciones para 14 días
pred2_14d <- head(pred2, 14)
# Predicciones para 21 días
pred2_21d <- head(pred2, 21)
# Predicciones para 28 días
pred2_28d <- pred2Ventana de 7 días
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 7)
rf2_7 <- data.frame(date = forecast_dates, prediction = as.numeric(pred2_7d))p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = rf2_7$date, y = rf2_7$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pVentana de 14 días
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 14)
rf2_14 <- data.frame(date = forecast_dates, prediction = as.numeric(pred2_14d))p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = rf2_14$date, y = rf2_14$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pVentana de 21 días
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 21)
rf2_21 <- data.frame(date = forecast_dates, prediction = as.numeric(pred2_21d))p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = rf2_21$date, y = rf2_21$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pVentana de 28 días
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 28)
rf2_28 <- data.frame(date = forecast_dates, prediction = as.numeric(pred2_28d))p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = rf2_28$date, y = rf2_28$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pForecast 7 días
model <- arima(train, order = c(0, 1, 0))
pp7 <- forecast(model, h = 7)
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 7)
ff_7 <- data.frame(date = forecast_dates, prediction = as.numeric(pp7$mean))# Agregar datos de entrenamiento
p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = ff_7$date, y = ff_7$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pForecast 14 días
model <- arima(train, order = c(0, 1, 0))
pp14 <- forecast(model, h = 14)
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 14)
ff_14 <- data.frame(date = forecast_dates, prediction = as.numeric(pp14$mean))# Agregar datos de entrenamiento
p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = ff_14$date, y = ff_14$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pForecast 21 días
model <- arima(train, order = c(0, 1, 0))
pp21 <- forecast(model, h = 21)
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 21)
ff_21 <- data.frame(date = forecast_dates, prediction = as.numeric(pp21$mean))# Agregar datos de entrenamiento
p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = ff_21$date, y = ff_21$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pForecast 28 días
model <- arima(train, order = c(0, 1, 0))
pp28 <- forecast(model, h = 28)
last_date <- tail(dates_train, 1)
forecast_dates <- seq.Date(from = last_date + 1, by = "day", length.out = 28)
ff_28 <- data.frame(date = forecast_dates, prediction = as.numeric(pp28$mean))# Agregar datos de entrenamiento
p <- plot_ly(x = dates_train, y = close_ts, name = "Train", type = "scatter", mode = "lines", line = list(color = "blue"))
# Agregar datos de prueba
p <- add_trace(p, x = dates, y = ctest_ts, name = "Test", type = "scatter", mode = "lines", line = list(color = "green"))
# Agregar predictions
p <- add_trace(p, x = ff_28$date, y = ff_28$prediction, name = "Prediction", mode = "lines", line = list(color = "red"))
p <- layout(p,
xaxis = list(title = "Fecha", tickformat = "%Y-%m-%d"),
yaxis = list(title = "Precios"),
title = "Precios de Tecnoglass - Forecast")
pTodas las predicciones con el mejor modelo ARIMA calculado por BIC, tienen un comportamiento similar al mejor modelo ARIMA calculado por AIC. Así, estas predicciones siguen una tendencia lineal que no se asemeja a los datos de test.
Métricas - BIC
Rolling forecast
m7d2 <- accuracy(pred2_7d, test[1:7])
m14d2 <- accuracy(pred2_14d, test[1:14])
m21d2 <- accuracy(pred2_21d, test[1:21])
m28d2 <- accuracy(pred2_28d, test)metricas11 <- rbind( m7d2,m14d2,m21d2,m28d2)
# Asignar nombres a las filas
rownames(metricas11) <- c("RF 7 días", "RF 14 días", "RF 21 días", "RF 28 días")"Tabla ARIMA(1, 1, 1):"[1] "Tabla ARIMA(1, 1, 1):"
metricas ME RMSE MAE MPE MAPE
RF 7 días 0.02875578 0.3752826 0.3375970 0.06076811 0.7438631
RF 14 días 0.49965723 1.2398698 0.9130892 0.99504384 1.8292595
RF 21 días 0.45316664 1.2147340 0.9471117 0.87831126 1.8577546
RF 28 días 0.39798651 1.2720903 0.9832144 0.75518742 1.8622734
"Tabla ARIMA(0, 1, 0)"[1] "Tabla ARIMA(0, 1, 0)"
metricas11 ME RMSE MAE MPE MAPE
RF 7 días 0.05857141 0.3387164 0.3042859 0.1267486 0.6711441
RF 14 días 0.54785728 1.2669515 0.8935719 1.0915187 1.7881511
RF 21 días 0.49666668 1.2260842 0.9223814 0.9631225 1.8071174
RF 28 días 0.43357154 1.2660153 0.9600006 0.8236854 1.8163446
Comparando ambas tablas, se observan pequeñas diferencias en las métricas, sin embargo ambas indican que los modelos predicen muy bien los datos evaluados.
Forecast()
p72_m <- accuracy(pp7, test[1:7])
p142_m <- accuracy(pp14, test[1:14])
p212_m <- accuracy(pp21, test[1:21])
p282_m <- accuracy(pp28, test)
metricas22 <- rbind(p72_m,p142_m,p212_m, p282_m)"Tabla ARIMA (1, 1, 1):"[1] "Tabla ARIMA (1, 1, 1):"
metricas2 ME RMSE MAE MPE MAPE MASE
Training set 0.002475031 1.2571302 0.8934521 -0.0376349 2.1856215 0.9739572
Test set 0.119575837 0.4103461 0.3807112 0.2568575 0.8398704 0.4150155
Training set 0.002475031 1.2571302 0.8934521 -0.0376349 2.1856215 0.9739572
Test set 3.415874734 4.9619268 3.5464424 6.5335970 6.8251035 3.8659971
Training set 0.002475031 1.2571302 0.8934521 -0.0376349 2.1856215 0.9739572
Test set 4.793616063 6.0091211 4.8806612 9.1113262 9.3056638 5.3204365
Training set 0.002475031 1.2571302 0.8934521 -0.0376349 2.1856215 0.9739572
Test set 6.875345722 8.3803761 6.9406296 12.4620709 12.6078241 7.5660198
ACF1
Training set -0.01493599
Test set NA
Training set -0.01493599
Test set NA
Training set -0.01493599
Test set NA
Training set -0.01493599
Test set NA
"Tabla ARIMA (0, 1, 0):"[1] "Tabla ARIMA (0, 1, 0):"
metricas22 ME RMSE MAE MPE MAPE MASE
Training set 0.002128146 1.2728994 0.9134285 -0.04308119 2.231896 0.9957336
Test set 0.245716095 0.4576653 0.3885711 0.53569717 0.855165 0.4235835
Training set 0.002128146 1.2728994 0.9134285 -0.04308119 2.231896 0.9957336
Test set 3.538573129 5.0447820 3.6100006 6.78819826 6.947932 3.9352822
Training set 0.002128146 1.2728994 0.9134285 -0.04308119 2.231896 0.9957336
Test set 4.915239607 6.1047775 4.9628579 9.35671588 9.463205 5.4100397
Training set 0.002128146 1.2728994 0.9134285 -0.04308119 2.231896 0.9957336
Test set 6.996430125 8.4786252 7.0321439 12.69739332 12.777260 7.6657800
ACF1
Training set 0.08953453
Test set NA
Training set 0.08953453
Test set NA
Training set 0.08953453
Test set NA
Training set 0.08953453
Test set NA
Los datos de prueba del modelo ARIMA(0,1,0) parecen comportarse mucho mejor que los datos de prueba del primer modelo. Sin embargo, este método de forecast no es el mejor para realizar predicciones ya que, de acuerdo con las métricas, no genera buenas predicciones de los datos.
Criterio HQIC
best_hqic <- Inf
best_order_hqic <- NULL
best_mdl_hqic <- NULL
for (i in pq_rng) {
for (d in d_rng) {
for (j in pq_rng) {
tryCatch({
tmp_mdl_hqic <- tryCatch(Arima(train, order = c(i, d, j)), error = function(e) NULL)
if (!is.null(tmp_mdl_hqic)) {
tmp_hqic <- HQIC(tmp_mdl_hqic)
if (tmp_hqic < best_hqic) {
best_hqic <- tmp_hqic
best_order_hqic <- c(i, d, j)
best_mdl_hqic <- tmp_mdl_hqic
}
}
}, error = function(e) NULL)
}
}
}
if (!is.infinite(best_hqic)) {
cat("HQIC:", best_hqic, " | order:", best_order, "\n")
} else {
cat("No se pudo calcular el HQIC para ningún modelo.\n")
}No se pudo calcular el HQIC para ningún modelo.
Pruebas
ARIMA(1, 1 , 1) - AIC
model <- arima(train, order = c(1, 1, 1))
tgls_pred <- forecast(model)plot_forecast(tgls_pred,
title = "Precio de las acciones de Tecnoglass - Forecast",
Ytitle = "Precio en USD",
Xtitle = "Días")checkresiduals(model)
Ljung-Box test
data: Residuals from ARIMA(1,1,1)
Q* = 2.5828, df = 8, p-value = 0.9578
Model df: 2. Total lags used: 10
Primeramente, el test de Ljung-Box arroja un p-value de 0.96, que no permite rechazar la hipótesis nula. Además, los residuos no siguen una distribución de ruido blanco y el modelo no logra predecir adecuadamente el comportamiento de los precios de Tecnoglass, tal como se evidencia en la gráfica.
ARIMA(0, 1 , 0) - BIC
model2 <- arima(train, order = c(0, 1, 0))
tgls_pred2 <- forecast(model2)plot_forecast(tgls_pred2,
title = "Precio de las acciones de Tecnoglass - Forecast",
Ytitle = "Precio en USD",
Xtitle = "Días")checkresiduals(model2)
Ljung-Box test
data: Residuals from ARIMA(0,1,0)
Q* = 6.9845, df = 10, p-value = 0.7269
Model df: 0. Total lags used: 10
El test de Ljung-Box arroja un p-value de 0.73, que no permite rechazar la hipótesis nula. Los residuos sugieren, además, que este modelo no parece ser el adecuado para predecir estos datos sin ventanas de predicción.
Conclusiones
Las pruebas y los gráficos de residuos sugieren que los modelos ARIMA al predecir con forecast(), podrían no ser completamente adecuados para estos datos. Parece haber algunos patrones estacionales que el modelo no está capturando. Además, la presencia de valores atípicos podría estar afectando el ajuste del modelo. Una mejor opción podría ser explorar modelos ARIMA más complejos o modelos estacionales para intentar capturar mejor la estructura de los datos.
En resumen, se destaca una notable diferencia en los resultados de predicción al utilizar ventanas de predicción en comparación con la generación de predicciones utilizando únicamente los datos de entrenamiento, sin ningún otro contexto. Cuando se emplean ventanas de predicción, como en el caso de rolling forecast, se observa un rendimiento notablemente superior y esto se refleja en métricas de evaluación más favorables y en gráficos que muestran una correspondencia más cercana entre las predicciones y los valores reales.