Grafica y descomposición de la serie de tiempo no estacionaria

# Gráfico de la serie original con LOESS - CORRECTO
datos$observation_date <- as.Date(datos$observation_date)

puntos_extremos <- datos %>%
  filter(WM2NS == max(WM2NS, na.rm = TRUE) | WM2NS == min(WM2NS, na.rm = TRUE)) %>%
  mutate(Label = paste0(round(WM2NS, 1), "\n", format(observation_date, "%b %Y")))

ggplot(data = datos, aes(x = observation_date, y = WM2NS)) +
  geom_line(color = "grey50", alpha = 0.6, linewidth = 0.7) +
  geom_smooth(
    method = "loess", 
    se = TRUE,
    color = "#3366FF", 
    fill = "#99CCFF",
    alpha = 0.2,
    linewidth = 1.2
  ) +
  geom_label_repel(
    data = puntos_extremos,
    aes(label = Label),
    size = 3,
    box.padding = 0.7,
    min.segment.length = 0,
    segment.color = "grey30"
  ) +
  geom_hline(
    yintercept = c(max(datos$WM2NS, na.rm = TRUE), min(datos$WM2NS, na.rm = TRUE)),
    linetype = "dotted",
    color = "grey60",
    alpha = 0.7
  ) +
  labs(
    title = "Trayectoria de WM2NS",
    subtitle = "Serie temporal con tendencia suavizada (LOESS) y valores extremos",
    y = "Valor de WM2NS",
    x = "Fecha de Observación",
    caption = "Fuente: Federal Reserve Economic Data (FRED)"
  ) +
  scale_x_date(
    date_breaks = "1 year", 
    date_labels = "%Y",
    expand = expansion(mult = c(0.02, 0.02))
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5, color = "gray30", size = 11),
    panel.grid.major = element_line(color = "grey90", linewidth = 0.2),
    panel.grid.minor = element_blank(),
    panel.background = element_rect(fill = "white", color = NA),
    plot.background = element_rect(fill = "white", color = NA),
    axis.title = element_text(size = 12),
    axis.text = element_text(color = "grey40"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.caption = element_text(color = "grey50", hjust = 0)
  )
## `geom_smooth()` using formula = 'y ~ x'

# 2. Medias móviles - CORREGIDO (usar WM2NS en lugar de Valor)
# ------------------------------------------------------------
# Renombrar columnas para mantener consistencia
names(datos)[names(datos) == "WM2NS"] <- "Valor"
names(datos)[names(datos) == "observation_date"] <- "Fecha"

ts_original <- ts(
  na.omit(datos$Valor),
  frequency = 52,
  start = c(year(datos$Fecha[1]), week(datos$Fecha[1]))
)

decomp_stl         <- stl(ts_original,   s.window = "periodic")
decomp_classic_add <- decompose(ts_original, type     = "additive")
decomp_classic_mult<- decompose(ts_original, type     = "multiplicative")



# Calcular medias móviles
datos$Valor.ma <- forecast::ma(datos$Valor, order = 7)
datos$Valor30.ma <- forecast::ma(datos$Valor, order = 30)

# Gráfico de media moviles

datos_long <- datos %>%
  select(Fecha, 
         `Serie original` = Valor,
         `Media móvil semanal (7 días)` = Valor.ma,
         `Media móvil mensual (30 días)` = Valor30.ma) %>%
  pivot_longer(cols = -Fecha, 
               names_to = "Serie", 
               values_to = "Valor") %>%
  mutate(Serie = factor(Serie, levels = c("Serie original", 
                                          "Media móvil semanal (7 días)", 
                                          "Media móvil mensual (30 días)")))

# Gráfico con facetas verticales
ggplot(datos_long, aes(x = Fecha, y = Valor, color = Serie)) +
  geom_line(linewidth = 0.8) +
  scale_color_manual(
    values = c(
      "Serie original" = "grey60",
      "Media móvil semanal (7 días)" = "steelblue",
      "Media móvil mensual (30 días)" = "indianred"
    )
  ) +
  facet_wrap(~ Serie, ncol = 1, scales = "free_y") +  # Paneles verticales
  labs(
    title = "WM2NS con medias móviles",
    subtitle = "Comparación de la serie original y suavizada en paneles separados",
    y = "Valor de WM2NS",
    x = "Fecha",
    caption = "Fuente: Federal Reserve Economic Data (FRED)"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5, color = "gray40", size = 11),
    legend.position = "none",  
    panel.grid.major = element_line(color = "grey90", linewidth = 0.2),
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1),
    strip.text = element_text(face = "bold", size = 10),
    strip.background = element_rect(fill = "grey95", color = NA)
  )
## Warning: Removed 36 rows containing missing values or values outside the scale range
## (`geom_line()`).

Descomposición de la serie de tiempo no estacionaria mediante STL

# Calcular la descomposición
count_ma <- ts(na.omit(datos$Valor30.ma), frequency = 52)  # Cambiado a frecuencia semanal (52 semanas/año)

# Verificar si tenemos suficientes datos
if (length(count_ma) > 0) {
  decomp <- stl(count_ma, s.window = "periodic")
  deseasonal_inf <- seasadj(decomp)
  
  # Crear un data frame con los componentes
  decomp_df <- data.frame(
    Fecha = time(decomp$time.series) %>% 
      as.numeric() %>% 
      as.Date(origin = "1970-01-01"),
    Original = as.numeric(decomp$time.series[, "trend"] + 
                            decomp$time.series[, "seasonal"] + 
                            decomp$time.series[, "remainder"]),
    Tendencia = as.numeric(decomp$time.series[, "trend"]),
    Estacionalidad = as.numeric(decomp$time.series[, "seasonal"]),
    Residual = as.numeric(decomp$time.series[, "remainder"])
  )
  
  # Función para crear gráficos consistentes
  crear_grafico_componente <- function(data, y_var, titulo, color) {
    ggplot(data, aes(x = Fecha, y = .data[[y_var]])) +
      geom_line(color = color, linewidth = 0.8) +
      labs(title = titulo, y = "Valor", x = "") +
      theme_minimal() +
      theme(
        plot.title = element_text(face = "bold", size = 12, hjust = 0.5),
        panel.grid.minor = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1, size = 8)
      )
  }
  
  # Crear los gráficos individuales
  g1 <- crear_grafico_componente(decomp_df, "Original", "Serie Original", "black")
  g2 <- crear_grafico_componente(decomp_df, "Tendencia", "Componente de Tendencia", "blue")
  g3 <- crear_grafico_componente(decomp_df, "Estacionalidad", "Componente Estacional", "red")
  g4 <- crear_grafico_componente(decomp_df, "Residual", "Componente Residual", "darkgreen")
plot(g1)
plot(g2)  
plot(g3)
plot(g4)
  # Combinar en un solo gráfico
  grid.arrange(
    g1, g2, g3, g4,
    nrow = 2,
    top = textGrob("Descomposición STL de WM2NS", 
                   gp = gpar(fontface = "bold", fontsize = 16))
  )
} else {
  message("No hay suficientes datos para realizar la descomposición STL")
}

Descomposición de la serie de tiempo no estacionaria mediante la clásica

decomp_classic_add <- decompose(ts_original, type = "additive")
decomp_classic_mult <- decompose(ts_original, type = "multiplicative")

autoplot(decomp_classic_add) +
  labs(title = "Descomposición Clásica Aditiva de WM2NS",
       y = "Valor de WM2NS", x = "Fecha") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold", size = 16, hjust = 0.5))

autoplot(decomp_classic_mult) + 
  labs(title = "Descomposición Clásica Multiplicativa de WM2NS",
       y = "Valor de WM2NS", x = "Fecha") +
  theme_minimal() +
  theme(plot.title = element_text(face = "bold", size = 16, hjust = 0.5))

Medidas del error de pronosticos

# Función para calcular métricas de error
calcular_errores <- function(observado, pronostico) {
  error <- observado - pronostico
  mae <- mean(abs(error), na.rm = TRUE)
  rmse <- sqrt(mean(error^2, na.rm = TRUE))
  mape <- mean(abs(error/observado), na.rm = TRUE) * 100
  return(list(MAE = mae, RMSE = rmse, MAPE = mape))
}

# 1. STL - Error dentro de muestra
# ---------------------------------
# La serie ajustada = tendencia + estacionalidad
fc_stl <- decomp_stl$time.series[, "trend"] + decomp_stl$time.series[, "seasonal"]

# Calcular errores para toda la serie
errores_stl <- calcular_errores(ts_original, fc_stl)

# 2. Métodos Clásicos - Error dentro de muestra
# ---------------------------------------------
# Función para extraer componentes válidos (excluyendo NAs)
extraer_componentes_validos <- function(decomp) {
  # Encontrar índices válidos (donde trend no es NA)
  indices_validos <- which(!is.na(decomp$trend))
  
  return(list(
    x = decomp$x[indices_validos],
    trend = decomp$trend[indices_validos],
    seasonal = decomp$seasonal[indices_validos],
    random = decomp$random[indices_validos],
    fechas = time(decomp$trend)[indices_validos]
  ))
}

# Clásica Aditiva
comp_add <- extraer_componentes_validos(decomp_classic_add)
fc_classic_add <- comp_add$trend + comp_add$seasonal
errores_classic_add <- calcular_errores(comp_add$x, fc_classic_add)

# Clásica Multiplicativa
comp_mult <- extraer_componentes_validos(decomp_classic_mult)
fc_classic_mult <- comp_mult$trend * comp_mult$seasonal
errores_classic_mult <- calcular_errores(comp_mult$x, fc_classic_mult)

# 3. Tabla comparativa de resultados
# ----------------------------------
resultados <- data.frame(
  Método = c("STL", "Clásica Aditiva", "Clásica Multiplicativa"),
  MAE = c(
    errores_stl$MAE,
    errores_classic_add$MAE,
    errores_classic_mult$MAE
  ),
  RMSE = c(
    errores_stl$RMSE,
    errores_classic_add$RMSE,
    errores_classic_mult$RMSE
  ),
  MAPE = c(
    errores_stl$MAPE,
    errores_classic_add$MAPE,
    errores_classic_mult$MAPE
  ),
  Observaciones = c(
    length(ts_original),
    length(comp_add$x),
    length(comp_mult$x)
  )
)

# Mostrar tabla
print(resultados)
##                   Método      MAE     RMSE      MAPE Observaciones
## 1                    STL 47.51429 89.52646 0.6846828          2327
## 2        Clásica Aditiva 47.71678 93.74181 0.6546882          2275
## 3 Clásica Multiplicativa 44.56613 95.08870 0.5306338          2275
# 4. Gráfico comparativo de ajustes
# ---------------------------------
# Crear dataframe con ajustes
ajustes_df <- data.frame(
  Fecha = c(
    time(ts_original),
    comp_add$fechas,
    comp_mult$fechas
  ),
  Valor = c(
    as.numeric(ts_original),
    comp_add$x,
    comp_mult$x
  ),
  Ajuste = c(
    fc_stl,
    fc_classic_add,
    fc_classic_mult
  ),
  Método = c(
    rep("STL", length(ts_original)),
    rep("Clásica Aditiva", length(comp_add$x)),
    rep("Clásica Multiplicativa", length(comp_mult$x))
  )
)

# Gráfico comparativo
ggplot(ajustes_df, aes(x = Fecha)) +
  geom_line(aes(y = Valor, color = "Observado"), alpha = 0.7) +
  geom_line(aes(y = Ajuste, color = "Ajuste"), linewidth = 0.8) +
  scale_color_manual(values = c("Observado" = "grey50", "Ajuste" = "red")) +
  facet_wrap(~ Método, ncol = 1, scales = "free_y") +
  labs(
    title = "Comparación de Ajustes de los Modelos",
    subtitle = "STL vs Métodos Clásicos de Descomposición",
    y = "Valor de WM2NS",
    x = "Fecha",
    color = "Serie"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(hjust = 0.5, color = "gray40", size = 11),
    legend.position = "bottom",
    strip.text = element_text(face = "bold", size = 12),
    panel.grid.major = element_line(color = "grey90", linewidth = 0.2),
    panel.grid.minor = element_blank()
  )

# 5. Gráfico de residuos comparativo
# ----------------------------------
residuos_df <- data.frame(
  Fecha = c(
    time(ts_original),
    comp_add$fechas,
    comp_mult$fechas
  ),
  Residuo = c(
    decomp_stl$time.series[, "remainder"],
    comp_add$random,
    comp_mult$random
  ),
  Método = c(
    rep("STL", length(ts_original)),
    rep("Clásica Aditiva", length(comp_add$random)),
    rep("Clásica Multiplicativa", length(comp_mult$random))
  )
)

# --- STL ---
ggplot(subset(residuos_df, Método == "STL"), aes(x = Fecha, y = Residuo)) +
  geom_line(color = "steelblue", alpha = 0.7) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Residuos - STL",
    subtitle = "Componente aleatorio del método STL",
    y = "Residuo", x = "Fecha"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(color = "gray40", size = 11)
  )

# --- Clásica Aditiva ---
ggplot(subset(residuos_df, Método == "Clásica Aditiva"), aes(x = Fecha, y = Residuo)) +
  geom_line(color = "steelblue", alpha = 0.7) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Residuos - Clásica Aditiva",
    subtitle = "Componente aleatorio del método aditivo",
    y = "Residuo", x = "Fecha"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(color = "gray40", size = 11)
  )

# --- Clásica Multiplicativa ---
ggplot(subset(residuos_df, Método == "Clásica Multiplicativa"), aes(x = Fecha, y = Residuo)) +
  geom_line(color = "steelblue", alpha = 0.7) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Residuos - Clásica Multiplicativa",
    subtitle = "Componente aleatorio del método multiplicativo",
    y = "Residuo", x = "Fecha"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 14),
    plot.subtitle = element_text(color = "gray40", size = 11)
  )

# Agregar ranking por cada métrica
resultados$Rank_MAE <- rank(resultados$MAE)
resultados$Rank_RMSE <- rank(resultados$RMSE)
resultados$Rank_MAPE <- rank(resultados$MAPE)

# Puntaje total (el menor es mejor)
resultados$Puntaje_Total <- resultados$Rank_MAE + resultados$Rank_RMSE + resultados$Rank_MAPE

# Elegir el mejor
mejor <- resultados[which.min(resultados$Puntaje_Total), ]
cat("El mejor método es:", mejor$Método, "con puntaje total:", mejor$Puntaje_Total, "\n")
## El mejor método es: Clásica Multiplicativa con puntaje total: 5
residuo_multiplicativo_log <- log(comp_mult$random)
residuos_log <- residuo_multiplicativo_log

# Agregar a tu dataframe
residuos_df_log <- data.frame(
  Fecha = comp_mult$fechas,
  Residuo = log(comp_mult$random),
  Método = "Clásica Multiplicativa (Log)"
)

ggplot(residuos_df_log, aes(x = Fecha, y = Residuo)) +
  geom_line(color = "steelblue", alpha = 0.7) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(
    title = "Residuos Logarítmicos - Clásica Multiplicativa",
    subtitle = "Transformación logarítmica para evaluar ruido blanco",
    y = "Residuo (log)", x = "Fecha"
  ) +
  theme_minimal()

# Prueba de ruido blanco
Box.test(log(comp_mult$random), type = "Ljung-Box")
## 
##  Box-Ljung test
## 
## data:  log(comp_mult$random)
## X-squared = 1251.4, df = 1, p-value < 2.2e-16
# Prueba de Arch
ArchTest(residuos_log)
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  residuos_log
## Chi-squared = 1914.2, df = 12, p-value < 2.2e-16
# Gráfico ACF/PACF
ggAcf(residuos_log) + 
  ggtitle("ACF de Residuos Log-Transformados")

ggPacf(residuos_log) +
  ggtitle("PACF de Residuos Log-Transformados")

Prueba de raiz unitaria

# Serie original
y <- count_ma                      
plot(y, main="Serie Original", ylab="Valor de WM2NS", xlab="Tiempo")

# ADF + KPSS en nivel
tseries::adf.test(count_ma, alternative="stationary")
## 
##  Augmented Dickey-Fuller Test
## 
## data:  count_ma
## Dickey-Fuller = -0.672, Lag order = 13, p-value = 0.9731
## alternative hypothesis: stationary
tseries::kpss.test(count_ma)
## Warning in tseries::kpss.test(count_ma): p-value smaller than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  count_ma
## KPSS Level = 22.125, Truncation lag parameter = 8, p-value = 0.01
# Primera diferencia
count_ma_diff <- diff(count_ma, differences=1)

# ADF + KPSS en diferencia
tseries::adf.test(na.omit(count_ma_diff), alternative="stationary")
## Warning in tseries::adf.test(na.omit(count_ma_diff), alternative =
## "stationary"): p-value smaller than printed p-value
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(count_ma_diff)
## Dickey-Fuller = -6.4261, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
tseries::kpss.test(na.omit(count_ma_diff))
## Warning in tseries::kpss.test(na.omit(count_ma_diff)): p-value smaller than
## printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  na.omit(count_ma_diff)
## KPSS Level = 5.3197, Truncation lag parameter = 8, p-value = 0.01
# Primera diferencia
count_ma_diff <- diff(count_ma, differences = 1)
autoplot(ts(count_ma_diff)) +
  labs(title = "Serie después de primera diferenciación",
       y = "WM2NS (Diferenciado)", x = "observation_date")

# Pruebas en nivel
adf0  <- adf.test(y, alternative="stationary")
kpss0 <- kpss.test(y)
## Warning in kpss.test(y): p-value smaller than printed p-value
# Primera diferencia (no estacional)
y_diff1 <- diff(y, differences = 1)
adf1  <- adf.test(na.omit(y_diff1), alternative="stationary")
## Warning in adf.test(na.omit(y_diff1), alternative = "stationary"): p-value
## smaller than printed p-value
kpss1 <- kpss.test(na.omit(y_diff1))
## Warning in kpss.test(na.omit(y_diff1)): p-value smaller than printed p-value
# ¿Cuántas diferencias estacionales?
D <- nsdiffs(y)   # número de diferencias estacionales sugeridas

# Aplicar diferencia estacional si D > 0
if (D > 0) {
  y_diff_seas <- diff(y, lag = frequency(y), differences = D)
} else {
  y_diff_seas <- y
}

# Serie completamente diferenciada
if (D > 0) {
  # primero quitamos tendencia, luego estacionalidad
  y_final <- diff(y_diff1, lag = frequency(y), differences = D)
} else {
  # solo quitamos tendencia
  y_final <- y_diff1
}

# Pruebas finales en y_final
adf_final  <- adf.test(na.omit(y_final), alternative="stationary")
## Warning in adf.test(na.omit(y_final), alternative = "stationary"): p-value
## smaller than printed p-value
kpss_final <- kpss.test(na.omit(y_final))
## Warning in kpss.test(na.omit(y_final)): p-value smaller than printed p-value
# Mostrar resultados
cat("=== Pruebas de Raíz Unitaria y Estacionariedad ===\n")
## === Pruebas de Raíz Unitaria y Estacionariedad ===
print(adf0);  print(kpss0)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  y
## Dickey-Fuller = -0.672, Lag order = 13, p-value = 0.9731
## alternative hypothesis: stationary
## 
##  KPSS Test for Level Stationarity
## 
## data:  y
## KPSS Level = 22.125, Truncation lag parameter = 8, p-value = 0.01
print(adf1);  print(kpss1)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(y_diff1)
## Dickey-Fuller = -6.4261, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
## 
##  KPSS Test for Level Stationarity
## 
## data:  na.omit(y_diff1)
## KPSS Level = 5.3197, Truncation lag parameter = 8, p-value = 0.01
cat("Diferencias estacionales sugeridas (D) =", D, "\n")
## Diferencias estacionales sugeridas (D) = 0
print(adf_final);  print(kpss_final)
## 
##  Augmented Dickey-Fuller Test
## 
## data:  na.omit(y_final)
## Dickey-Fuller = -6.4261, Lag order = 13, p-value = 0.01
## alternative hypothesis: stationary
## 
##  KPSS Test for Level Stationarity
## 
## data:  na.omit(y_final)
## KPSS Level = 5.3197, Truncation lag parameter = 8, p-value = 0.01
# Preparar datos para ggplot (eje X como tiempo numérico)
plot_df <- tibble(
  Fecha      = time(y),
  Original   = as.numeric(y),
  Diff1      = c(NA, as.numeric(y_diff1)),
  SeasDiff   = {
    tmp <- rep(NA, length(y))
    if (D > 0) tmp[(frequency(y)+1):length(y)] <- as.numeric(y_diff_seas)
    tmp
  },
  FinalDiff  = {
    tmp2 <- rep(NA, length(y))
    if (D > 0) {
      start_idx <- 1 + frequency(y) * D + 1
    } else {
      start_idx <- 2
    }
    tmp2[start_idx:length(y)] <- as.numeric(y_final)
    tmp2
  }
) %>%
  pivot_longer(-Fecha, names_to = "Tipo", values_to = "Valor")

# Gráfico con capas dinámicas y facetado
#  Lista de tipos
tipos <- unique(plot_df$Tipo)

#  Función que genera el plot para un tipo dado
crear_plot <- function(tipo_nombre) {
  df_sub <- filter(plot_df, Tipo == tipo_nombre)
  
  ggplot(df_sub, aes(x = Fecha, y = Valor)) +
    geom_line(color = "steelblue", size = 0.8, alpha = 0.8) +
    geom_point(data = df_sub %>% filter(!is.na(Valor) & Tipo != "Original"),
               size = 1.2, color = "darkred") +
    labs(
      title = paste0("Serie: ", tipo_nombre),
      x     = "Tiempo",
      y     = ifelse(tipo_nombre == "Original", "Valor", "Diferencia")
    ) +
    theme_minimal(base_size = 12) +
    theme(
      plot.title      = element_text(face="bold", size=14, hjust=0.5),
      panel.grid.minor= element_blank()
    )
}

# Crear cada gráfico
plots <- lapply(tipos, crear_plot)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Mostrar en un grid de 2 columnas x 2 filas
grid.arrange(grobs = plots, ncol = 2)
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 2297 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).