1. Librerías
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(forecast)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
2. Simulación de serie temporal
set.seed(123)
n <- 200
time <- seq.Date(from = as.Date("2020-01-01"), by = "month", length.out = n)
data <- tibble(
date = time,
value = 100 + seq(1:n) * 0.5 + 10*sin(2*pi*(1:n)/12) + rnorm(n,0,3)
)
3. Visualización inicial
ggplot(data, aes(date, value)) +
geom_line() +
theme_minimal()

4. Convertir a ts
ts_data <- ts(data$value, frequency = 12, start = c(2020,1))
ts_data
## Jan Feb Mar Apr May Jun Jul
## 2020 103.81857 108.96972 116.17612 110.87178 107.88786 108.14519 99.88275
## 2021 112.70231 115.99230 115.83248 122.02099 114.99355 103.10015 106.60407
## 2022 115.62488 116.60017 126.01336 123.12037 116.08559 118.76144 111.77939
## 2023 125.16175 127.47452 128.58211 127.51884 123.41588 120.37625 112.70381
## 2024 131.83990 133.41015 136.25996 134.57461 131.37139 131.10581 121.82269
## 2025 136.63892 138.15328 140.50038 137.60453 134.28463 133.91059 129.84463
## 2026 144.51722 143.53265 145.43597 149.73697 142.64568 135.33785 135.04391
## 2027 146.83854 152.65560 156.79052 153.96580 148.52221 148.44642 143.48051
## 2028 160.06200 162.25809 158.79290 155.58099 153.36878 151.77065 145.75992
## 2029 158.35932 166.41724 163.77396 166.48415 156.64635 156.83331 154.05822
## 2030 165.85294 166.81783 170.02833 169.89198 173.03159 161.04415 159.20616
## 2031 171.62370 174.39276 171.34026 180.05427 169.11808 171.21984 170.22731
## 2032 172.69539 180.06753 179.11473 184.72400 185.80033 171.13891 172.86322
## 2033 185.18897 186.54294 192.43092 187.53651 188.65813 177.85247 172.71953
## 2034 191.05059 194.76715 194.85386 194.85613 191.39780 193.38536 180.27599
## 2035 192.31002 203.44981 200.45105 198.06372 196.79116 192.40847 191.82976
## 2036 201.78375 202.97416 203.56760 212.65189 205.30213 195.24619 192.66650
## Aug Sep Oct Nov Dec
## 2020 91.54456 92.43944 95.00276 104.17225 107.07944
## 2021 99.92137 97.29653 101.68582 103.42199 109.81333
## 2022 106.45453 109.18538 110.97415 114.96474 120.06592
## 2023 119.84661 116.12389 110.97042 117.29135 122.60003
## 2024 123.88916 113.85374 122.09359 124.87156 130.64782
## 2025 125.49876 127.26680 132.49000 129.02691 129.07249
## 2026 130.92307 130.51729 133.49559 135.38802 143.93313
## 2027 138.98494 137.21620 136.45603 146.58196 146.19922
## 2028 142.29712 139.64514 144.20466 146.14529 148.99617
## 2029 150.24321 148.81703 148.41763 151.95089 156.92761
## 2030 155.57363 151.61443 156.12582 164.83365 167.35451
## 2031 157.00807 162.60535 161.55315 161.78357 167.45600
## 2032 169.64687 167.49661 165.31462 172.14164 177.15881
## 2033 183.06287 171.24943 175.23443 180.40971 182.54866
## 2034 176.05176 178.61337 181.27119 185.80957 188.62490
## 2035 185.59396 186.76216 184.84187 191.14334 195.02594
## 2036 187.78331
5. Descomposición
decomp <- decompose(ts_data)
plot(decomp)

6. STL
stl_model <- stl(ts_data, s.window = "periodic")
plot(stl_model)

7. Test de estacionariedad
adf.test(ts_data)
## Warning in adf.test(ts_data): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ts_data
## Dickey-Fuller = -14.91, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
8. Diferenciación
ts_diff <- diff(ts_data)
adf.test(ts_diff)
## Warning in adf.test(ts_diff): p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: ts_diff
## Dickey-Fuller = -13.417, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary
9. ACF / PACF
acf(ts_diff)

pacf(ts_diff)

10. Split train/test
train <- window(ts_data, end = c(2032,12))
test <- window(ts_data, start = c(2033,1))
11. Modelo ARIMA automático
model_arima <- auto.arima(train)
summary(model_arima)
## Series: train
## ARIMA(1,0,0)(1,1,0)[12] with drift
##
## Coefficients:
## ar1 sar1 drift
## 0.0272 -0.6441 0.4943
## s.e. 0.0835 0.0652 0.0149
##
## sigma^2 = 11.21: log likelihood = -380.01
## AIC=768.03 AICc=768.31 BIC=779.9
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.02746231 3.182664 2.418592 -0.04633364 1.748719 0.3910958
## ACF1
## Training set 0.0005627227
12. Forecast ARIMA
forecast_arima <- forecast(model_arima, h=12)
plot(forecast_arima)

13. Evaluación ARIMA
accuracy(forecast_arima, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.02746231 3.182664 2.418592 -0.04633364 1.748719 0.3910958
## Test set 1.63519363 5.605551 4.647436 0.84476347 2.552538 0.7515087
## ACF1 Theil's U
## Training set 0.0005627227 NA
## Test set -0.4971940222 0.8702873
14. Modelo ETS
model_ets <- ets(train)
forecast_ets <- forecast(model_ets, h=12)
plot(forecast_ets)

15. Evaluación ETS
accuracy(forecast_ets, test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1703874 2.755783 2.185552 -0.2093589 1.625799 0.3534124
## Test set 1.6196110 3.925098 2.889394 0.8595473 1.578109 0.4672265
## ACF1 Theil's U
## Training set -0.004202457 NA
## Test set -0.507763469 0.6130883
16. Comparación modelos
acc_arima <- accuracy(forecast_arima, test)
acc_ets <- accuracy(forecast_ets, test)
acc_arima
## ME RMSE MAE MPE MAPE MASE
## Training set -0.02746231 3.182664 2.418592 -0.04633364 1.748719 0.3910958
## Test set 1.63519363 5.605551 4.647436 0.84476347 2.552538 0.7515087
## ACF1 Theil's U
## Training set 0.0005627227 NA
## Test set -0.4971940222 0.8702873
acc_ets
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1703874 2.755783 2.185552 -0.2093589 1.625799 0.3534124
## Test set 1.6196110 3.925098 2.889394 0.8595473 1.578109 0.4672265
## ACF1 Theil's U
## Training set -0.004202457 NA
## Test set -0.507763469 0.6130883
17. Modelo final
final_model <- model_arima
forecast_final <- forecast(final_model, h=24)
plot(forecast_final)

18. Diagnóstico de residuos
checkresiduals(final_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0)(1,1,0)[12] with drift
## Q* = 52.187, df = 22, p-value = 0.0002946
##
## Model df: 2. Total lags used: 24
19. Error analysis
residuals <- residuals(final_model)
summary(residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -8.42081 -2.06133 0.09758 -0.02746 1.95178 8.72475
20. Feature adicional (rolling mean)
data <- data %>%
mutate(roll_mean = zoo::rollmean(value, 3, fill = NA))
21. Visualización avanzada
ggplot(data, aes(date, value)) +
geom_line() +
geom_line(aes(y = roll_mean), color="red") +
theme_minimal()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).

22. Forecast extendido
future <- forecast(final_model, h=36)
autoplot(future)

23. Insights
cat("La serie presenta tendencia creciente y estacionalidad clara.")
## La serie presenta tendencia creciente y estacionalidad clara.
24. Recomendaciones
cat("Se recomienda monitorear drift y recalibrar modelo periódicamente.")
## Se recomienda monitorear drift y recalibrar modelo periódicamente.