1. Librerías

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(forecast)
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(ggplot2)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor

2. Simulación de serie temporal

set.seed(123)
n <- 200
time <- seq.Date(from = as.Date("2020-01-01"), by = "month", length.out = n)

data <- tibble(
  date = time,
  value = 100 + seq(1:n) * 0.5 + 10*sin(2*pi*(1:n)/12) + rnorm(n,0,3)
)

3. Visualización inicial

ggplot(data, aes(date, value)) +
  geom_line() +
  theme_minimal()

4. Convertir a ts

ts_data <- ts(data$value, frequency = 12, start = c(2020,1))
ts_data
##            Jan       Feb       Mar       Apr       May       Jun       Jul
## 2020 103.81857 108.96972 116.17612 110.87178 107.88786 108.14519  99.88275
## 2021 112.70231 115.99230 115.83248 122.02099 114.99355 103.10015 106.60407
## 2022 115.62488 116.60017 126.01336 123.12037 116.08559 118.76144 111.77939
## 2023 125.16175 127.47452 128.58211 127.51884 123.41588 120.37625 112.70381
## 2024 131.83990 133.41015 136.25996 134.57461 131.37139 131.10581 121.82269
## 2025 136.63892 138.15328 140.50038 137.60453 134.28463 133.91059 129.84463
## 2026 144.51722 143.53265 145.43597 149.73697 142.64568 135.33785 135.04391
## 2027 146.83854 152.65560 156.79052 153.96580 148.52221 148.44642 143.48051
## 2028 160.06200 162.25809 158.79290 155.58099 153.36878 151.77065 145.75992
## 2029 158.35932 166.41724 163.77396 166.48415 156.64635 156.83331 154.05822
## 2030 165.85294 166.81783 170.02833 169.89198 173.03159 161.04415 159.20616
## 2031 171.62370 174.39276 171.34026 180.05427 169.11808 171.21984 170.22731
## 2032 172.69539 180.06753 179.11473 184.72400 185.80033 171.13891 172.86322
## 2033 185.18897 186.54294 192.43092 187.53651 188.65813 177.85247 172.71953
## 2034 191.05059 194.76715 194.85386 194.85613 191.39780 193.38536 180.27599
## 2035 192.31002 203.44981 200.45105 198.06372 196.79116 192.40847 191.82976
## 2036 201.78375 202.97416 203.56760 212.65189 205.30213 195.24619 192.66650
##            Aug       Sep       Oct       Nov       Dec
## 2020  91.54456  92.43944  95.00276 104.17225 107.07944
## 2021  99.92137  97.29653 101.68582 103.42199 109.81333
## 2022 106.45453 109.18538 110.97415 114.96474 120.06592
## 2023 119.84661 116.12389 110.97042 117.29135 122.60003
## 2024 123.88916 113.85374 122.09359 124.87156 130.64782
## 2025 125.49876 127.26680 132.49000 129.02691 129.07249
## 2026 130.92307 130.51729 133.49559 135.38802 143.93313
## 2027 138.98494 137.21620 136.45603 146.58196 146.19922
## 2028 142.29712 139.64514 144.20466 146.14529 148.99617
## 2029 150.24321 148.81703 148.41763 151.95089 156.92761
## 2030 155.57363 151.61443 156.12582 164.83365 167.35451
## 2031 157.00807 162.60535 161.55315 161.78357 167.45600
## 2032 169.64687 167.49661 165.31462 172.14164 177.15881
## 2033 183.06287 171.24943 175.23443 180.40971 182.54866
## 2034 176.05176 178.61337 181.27119 185.80957 188.62490
## 2035 185.59396 186.76216 184.84187 191.14334 195.02594
## 2036 187.78331

5. Descomposición

decomp <- decompose(ts_data)
plot(decomp)

6. STL

stl_model <- stl(ts_data, s.window = "periodic")
plot(stl_model)

7. Test de estacionariedad

adf.test(ts_data)
## Warning in adf.test(ts_data): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_data
## Dickey-Fuller = -14.91, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

8. Diferenciación

ts_diff <- diff(ts_data)
adf.test(ts_diff)
## Warning in adf.test(ts_diff): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_diff
## Dickey-Fuller = -13.417, Lag order = 5, p-value = 0.01
## alternative hypothesis: stationary

9. ACF / PACF

acf(ts_diff)

pacf(ts_diff)

10. Split train/test

train <- window(ts_data, end = c(2032,12))
test  <- window(ts_data, start = c(2033,1))

11. Modelo ARIMA automático

model_arima <- auto.arima(train)
summary(model_arima)
## Series: train 
## ARIMA(1,0,0)(1,1,0)[12] with drift 
## 
## Coefficients:
##          ar1     sar1   drift
##       0.0272  -0.6441  0.4943
## s.e.  0.0835   0.0652  0.0149
## 
## sigma^2 = 11.21:  log likelihood = -380.01
## AIC=768.03   AICc=768.31   BIC=779.9
## 
## Training set error measures:
##                       ME     RMSE      MAE         MPE     MAPE      MASE
## Training set -0.02746231 3.182664 2.418592 -0.04633364 1.748719 0.3910958
##                      ACF1
## Training set 0.0005627227

12. Forecast ARIMA

forecast_arima <- forecast(model_arima, h=12)
plot(forecast_arima)

13. Evaluación ARIMA

accuracy(forecast_arima, test)
##                       ME     RMSE      MAE         MPE     MAPE      MASE
## Training set -0.02746231 3.182664 2.418592 -0.04633364 1.748719 0.3910958
## Test set      1.63519363 5.605551 4.647436  0.84476347 2.552538 0.7515087
##                       ACF1 Theil's U
## Training set  0.0005627227        NA
## Test set     -0.4971940222 0.8702873

14. Modelo ETS

model_ets <- ets(train)
forecast_ets <- forecast(model_ets, h=12)
plot(forecast_ets)

15. Evaluación ETS

accuracy(forecast_ets, test)
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.1703874 2.755783 2.185552 -0.2093589 1.625799 0.3534124
## Test set      1.6196110 3.925098 2.889394  0.8595473 1.578109 0.4672265
##                      ACF1 Theil's U
## Training set -0.004202457        NA
## Test set     -0.507763469 0.6130883

16. Comparación modelos

acc_arima <- accuracy(forecast_arima, test)
acc_ets   <- accuracy(forecast_ets, test)

acc_arima
##                       ME     RMSE      MAE         MPE     MAPE      MASE
## Training set -0.02746231 3.182664 2.418592 -0.04633364 1.748719 0.3910958
## Test set      1.63519363 5.605551 4.647436  0.84476347 2.552538 0.7515087
##                       ACF1 Theil's U
## Training set  0.0005627227        NA
## Test set     -0.4971940222 0.8702873
acc_ets
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.1703874 2.755783 2.185552 -0.2093589 1.625799 0.3534124
## Test set      1.6196110 3.925098 2.889394  0.8595473 1.578109 0.4672265
##                      ACF1 Theil's U
## Training set -0.004202457        NA
## Test set     -0.507763469 0.6130883

17. Modelo final

final_model <- model_arima
forecast_final <- forecast(final_model, h=24)
plot(forecast_final)

18. Diagnóstico de residuos

checkresiduals(final_model)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,0)(1,1,0)[12] with drift
## Q* = 52.187, df = 22, p-value = 0.0002946
## 
## Model df: 2.   Total lags used: 24

19. Error analysis

residuals <- residuals(final_model)
summary(residuals)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -8.42081 -2.06133  0.09758 -0.02746  1.95178  8.72475

20. Feature adicional (rolling mean)

data <- data %>%
  mutate(roll_mean = zoo::rollmean(value, 3, fill = NA))

21. Visualización avanzada

ggplot(data, aes(date, value)) +
  geom_line() +
  geom_line(aes(y = roll_mean), color="red") +
  theme_minimal()
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_line()`).

22. Forecast extendido

future <- forecast(final_model, h=36)
autoplot(future)

23. Insights

cat("La serie presenta tendencia creciente y estacionalidad clara.")
## La serie presenta tendencia creciente y estacionalidad clara.

24. Recomendaciones

cat("Se recomienda monitorear drift y recalibrar modelo periódicamente.")
## Se recomienda monitorear drift y recalibrar modelo periódicamente.