Estimación de Ventas con Procesos Gaussianos en Espacios de Hilbert

Author

Rafael Navarro

Published

February 11, 2027

Carga de librerías

Código

library(rprojroot)
root<-has_file(".Workflow-Examples-root")$make_fix_file()
library(tidyverse)
library(tictoc)
mytoc <- \() {
  toc(func.toc=\(tic, toc, msg) {
    sprintf("%s tomó %s seg", msg, as.character(signif(toc-tic, 2)))
  })}
library(cmdstanr)
CMDSTANR_OUTPUT_DIR <- root("stan_output")
library(posterior)
options(pillar.neg = FALSE, pillar.subtle=FALSE, pillar.sigfig=2)
library(tinytable)
options(tinytable_format_num_fmt = "significant_cell", tinytable_format_digits = 2, tinytable_tt_digits=2)
library(loo)
library(bayesplot)
theme_set(bayesplot::theme_default(base_family = "sans"))
library(patchwork)
library(ggrepel)
set1 <- RColorBrewer::brewer.pal(7, "Set1")
#setwd("~/HSGP Stan Euro")

Carga de datos

ventas <- read_csv(root("data", "data.csv")) #Cargar ventas desde 2022
ventas$venta[ventas$venta==0] <- 1
mventa <- mean(ventas$venta)

train <- ventas[ventas$nompuntodeventa == "MP ANGAMOS",]
test <- train[train$fecha >= as.Date('2025-11-01'),]
train <- train[train$fecha <= as.Date('2025-10-31'),]
mventa <- mean(train$venta)
train$ventas100 <- train$venta/mventa*100

xnew <- 1:365 + max(train$t)
fecha_new <- 1:365 + max(train$fecha) #solo de paso, no es necesario para modelo
day_of_week_new <- lubridate::wday(fecha_new, week_start = 1)
day_of_year_new <- lubridate::yday(fecha_new)

Flujo de trabajo iterativo

Modelo 1: Tendencia

El modelo 1 solo estima la tendencia a largo plazo sobre los años utilizando procesos Gaussianos aproximados por funciones base en espacios de Hilbert

\begin{align*} f(x) &= \text{Intercepto} + f_1(x) \\ \text{Intercepto} &= \beta_{trend} \\ f_1(x) &\sim \text{GP}(0, K_1{(x, x')} ) \\ \\ y(x) &= \text{Normal}(f(x), \sigma^2) \end{align*} donde GP tiene una función de covarianza cuadrática exponencial.

Compilar el modelo 1

modelo1 <- cmdstan_model(stan_file = root("gpbf1.stan"),
                        include_paths = root())

Data para Stan

standata1 <- list(x=train$t,
                  nlength = length(train$nompuntodeventa),
                  y=log(train$ventas100),
                  N=length(train$t),
                  c_f1=1.5,
                  M_f1=20,
                  x_new = xnew,
                  N_total = length(train$nompuntodeventa) + length(xnew),
                  N_new = length(xnew)
                  )

Ajuste del modelo1 con Pathfinder

tic('Estimando MAP para modelo 1 con Pathfinder')
pth1 <- modelo1$pathfinder(data = standata1, init=0.1,
                          num_paths=10, single_path_draws=40, draws=400,
                          history_size=100, max_lbfgs_iters=100,
                          refresh=0, output_dir=CMDSTANR_OUTPUT_DIR)
mytoc()

Parámetros estimados de Pathfinder

pdraws1 <- pth1$draws()
summarise_draws(subset(pdraws1, variable=c('beta_trend', 'sigma_f1','lengthscale_f1','sigma')),
                default_summary_measures()) |>
  mutate(across(where(is.numeric), ~ round(.x, 3))) |>
  tt()

variable	mean	median	sd	q5	q95
beta_trend	-0.12	-0.12	0.018	-0.14	-0.1
sigma_f1	0.33	0.3	0.075	0.3	0.52
lengthscale_f1	0.068	0.056	0.023	0.056	0.12
sigma	0.98	0.98	0.01	0.96	0.99

Ajuste del modelo1 con MCMC incluyendo Pathfinder

tic('MCMC muestreo de modelo 1')
fit1 <- modelo1$sample(data=standata1, iter_warmup=100, iter_sampling=100,
                      chains=4, parallel_chains=4, seed=3896, output_dir=CMDSTANR_OUTPUT_DIR,
                      init = pth1)
mytoc()

Obtener muestras

draws1 <- fit1$draws()
summarise_draws(subset(draws1, variable=c('beta_trend', 'sigma_f1','lengthscale_f1','sigma'))) |>
  mutate(across(where(is.numeric), ~ round(.x, 3))) |>
  tt()

variable	mean	median	sd	mad	q5	q95	rhat	ess_bulk	ess_tail
beta_trend	-0.13	-0.12	0.039	0.036	-0.19	-0.07	1	374	301
sigma_f1	0.2	0.19	0.099	0.061	0.11	0.34	1	168	147
lengthscale_f1	0.17	0.11	0.27	0.051	0.038	0.58	1.1	38	52
sigma	0.98	0.98	0.022	0.023	0.94	1	1	340	300

No se observa multimodalidad

mcmc_trace(draws1, regex_pars=c('beta_trend', 'sigma_f1','lengthscale_f1','sigma'))

Ajuste del modelo a los datos reales. La línea morada representa la estimación fuera de la muestra a 365 días.

Código

draws1 <- as_draws_matrix(draws1)
Ef <- exp(apply(subset(draws1, variable='f'), 2, median))
Ef_new <- exp(apply(subset(draws1, variable='f_new'), 2, median))

ymean100 <- mean( log(train$ventas100) )
f1 <- exp(apply(subset(draws1, variable='f1'), 2, median) + ymean100)
f1_new <- exp(apply(subset(draws1, variable='f1_new'), 2, median) + ymean100)

intercept <- exp(apply(subset(draws1, variable='intercept'), 2, median) + ymean100)
intercept_new <- exp(apply(subset(draws1, variable='intercept_new'), 2, median) + ymean100)

df_new <- data.frame(
  fecha_new = fecha_new, 
  Ef_new = Ef_new, 
  f1_new = f1_new, 
  intercept_new = intercept_new
)

# --- GRÁFICO 1: ESTIMACIÓN TOTAL ---
p1 <- train |>
  mutate(Ef = Ef) |>
  ggplot(aes(x=fecha, y=ventas100)) +
  geom_point(color=set1[2], alpha=0.2) +
  geom_line(aes(y=Ef, color="Ajuste")) + 
  geom_hline(yintercept=100, color='gray') +
  geom_line(data= df_new, aes(y=Ef_new, x = fecha_new, color="Predicción")) +
  labs(x="Fecha", y="Ventas") +
  scale_color_manual(values = c("Ajuste" = set1[1], "Predicción" = set1[4])) +
  ggtitle('Estimación Total') +
  theme(legend.position = "none") # <--- LEYENDA ELIMINADA

# --- GRÁFICO 2: COMPONENTE GP1 (Ciclo) ---
p2 <- train |>
  mutate(f1 = f1) |>
  ggplot(aes(x=fecha, y=ventas100)) +
  geom_point(color=set1[2], alpha=0.2) +
  geom_line(aes(y=f1, color="Ajuste")) + 
  geom_hline(yintercept=100, color='gray') +
  geom_line(data= df_new, aes(y=f1_new, x = fecha_new, color="Predicción")) +
  labs(x="Fecha", y="Ventas") +
  scale_color_manual(values = c("Ajuste" = set1[1], "Predicción" = set1[4])) +
  ggtitle('GP1: Ciclo suave') +
  theme(legend.position = "none") # <--- LEYENDA ELIMINADA

# --- GRÁFICO 3: TENDENCIA ---
p3 <- train |>
  mutate(intercept = intercept) |>
  ggplot(aes(x=fecha, y=ventas100)) +
  geom_point(color=set1[2], alpha=0.2) +
  geom_line(aes(y=intercept, color="Ajuste")) + 
  geom_hline(yintercept=100, color='gray') +
  geom_line(data= df_new, aes(y=intercept_new, x = fecha_new, color="Predicción")) +
  labs(x="Fecha", y="Ventas") +
  scale_color_manual(values = c("Ajuste" = set1[1], "Predicción" = set1[4])) +
  ggtitle('Tendencia') +
  theme(legend.position = "none") # 

# Composición final
p1 / (p3 + p2)

Modelo 2: Tendencia + Tendencia Estacional Anual

El modelo 2 estima la tendencia a largo plazo sobre los años utilizando procesos Gaussianos aproximados por funciones base en espacios de Hilbert. Mientras el primer proceso Gaussiano utiliza la función de covarianza exponencial cuadrática, el segundo utiliza una función de covarianza periódica para capturar efectos anuales.

\begin{align*} f(x) &= \text{Intercepto} + f_1(x) + f_2(x) \\ \text{Intercepto} &= 0.0 \\ f_1(x) &\sim \text{GP}(0, K_1{(x, x')} ) \\ f_2(x) &\sim \text{GP}(0, K_2{(x, x')} )\\ \\ y(x) &= \text{Normal}(f(x), \sigma^2) \end{align*}

Compilar el modelo 2

modelo2 <- cmdstan_model(stan_file = root("gpbf22.stan"),
                        include_paths = root())

Data para Stan

standata2 <- list(x=train$t,
                  nlength = length(train$nompuntodeventa),
                  y=log(train$ventas100),
                  N=length(train$t),
                  c_f1=1.5,
                  M_f1=20,
                  J_f2 = 20,
                  x_new = xnew,
                  N_total = length(train$nompuntodeventa) + length(xnew),
                  N_new = length(xnew)
                  )

Ajuste del modelo2 con Pathfinder

tic('Estimando MAP para modelo 2 con Pathfinder')
pth2 <- modelo2$pathfinder(data = standata2, init=0.1,
                          num_paths=10, single_path_draws=40, draws=400,
                          history_size=100, max_lbfgs_iters=100,
                          refresh=0, output_dir=CMDSTANR_OUTPUT_DIR, psis_resample = FALSE)
mytoc()

Parámetros estimados de Pathfinder

pdraws2 <- pth2$draws()
summarise_draws(subset(pdraws2, variable=c('beta_trend', 'sigma',
                                           'sigma_f1','lengthscale_f1',
                                           'sigma_f2','lengthscale_f2')),
                default_summary_measures()) |>
  mutate(across(where(is.numeric), ~ round(.x, 3))) |>
  tt()

variable	mean	median	sd	mad	q5	q95
beta_trend	-0.11	-0.11	0.045	0.045	-0.18	-0.037
sigma	0.94	0.94	0.035	0.033	0.88	1
sigma_f1	0.71	0.65	0.35	0.28	0.3	1.3
lengthscale_f1	0.66	0.64	0.15	0.15	0.45	0.93
sigma_f2	1	1	0.21	0.21	0.73	1.4
lengthscale_f2	0.098	0.094	0.035	0.032	0.052	0.16

Ajuste del modelo1 con MCMC incluyendo Pathfinder

tic('MCMC muestreo de modelo 2')
fit2 <- modelo2$sample(data=standata2, iter_warmup=100, iter_sampling=100,
                      chains=4, parallel_chains=4, seed=3896, output_dir=CMDSTANR_OUTPUT_DIR,
                      init = pth2)
mytoc()

Obtener muestras

draws2 <- fit2$draws()
summarise_draws(subset(draws2, variable=c('beta_trend', 'sigma',
                                          'sigma_f1','lengthscale_f1',
                                          'sigma_f2','lengthscale_f2'))) |>
  mutate(across(where(is.numeric), ~ round(.x, 3))) |>
  tt()

variable	mean	median	sd	mad	q5	q95	rhat	ess_bulk	ess_tail
beta_trend	-0.12	-0.12	0.037	0.036	-0.18	-0.061	1	532	311
sigma	0.96	0.96	0.02	0.022	0.93	0.99	1	411	359
sigma_f1	0.28	0.21	0.21	0.14	0.06	0.67	1	323	297
lengthscale_f1	1	0.91	0.55	0.47	0.42	2.2	1	149	274
sigma_f2	0.26	0.26	0.056	0.052	0.18	0.37	1	174	248
lengthscale_f2	0.15	0.15	0.05	0.045	0.071	0.24	1	249	278

No se observa multimodalidad

mcmc_trace(draws2, regex_pars=c('beta_trend', 'sigma',
                                'sigma_f1','lengthscale_f1',
                                'sigma_f2','lengthscale_f2'))

Ajuste del modelo a los datos reales. La línea morada representa la estimación fuera de la muestra a 365 días.

Código

draws2 <- as_draws_matrix(draws2)
Ef <- exp(apply(subset(draws2, variable='f'), 2, median))
Ef_new <- exp(apply(subset(draws2, variable='f_new'), 2, median))

ymean100 <- mean( log(train$ventas100) )
f1 <- exp(apply(subset(draws2, variable='f1'), 2, median) + ymean100)
f1_new <- exp(apply(subset(draws2, variable='f1_new'), 2, median) + ymean100)
f2 <- exp(apply(subset(draws2, variable='f2'), 2, median) + ymean100)
f2_new <- exp(apply(subset(draws2, variable='f2_new'), 2, median) + ymean100)


intercept <- exp(apply(subset(draws2, variable='intercept'), 2, median) + ymean100)
intercept_new <- exp(apply(subset(draws2, variable='intercept_new'), 2, median) + ymean100)




df_new <- data.frame(
  fecha_new = fecha_new, 
  Ef_new = Ef_new, 
  f1_new = f1_new,
  f2_new = f2_new,          
  intercept_new = intercept_new 
)


p1 <- train |>
  mutate(Ef = Ef) |>
  ggplot(aes(x=fecha, y=ventas100)) +
  geom_point(color=set1[2], alpha=0.2) +
  geom_line(aes(y=Ef), color=set1[1], linewidth=0.8) + 
  geom_hline(yintercept=100, color='gray') +
  geom_line(data= df_new, aes(y=Ef_new, x = fecha_new), color=set1[4], linewidth=0.8) +
  labs(x="", y="Ventas") + # Quito X para no repetir en el patchwork
  #ggtitle('Estimación Total') +
  theme(legend.position = "none")


p2 <- train |>
  mutate(f1 = f1) |>
  ggplot(aes(x=fecha, y=ventas100)) +
  geom_point(color=set1[2], alpha=0.1) + # Puntos muy tenues de referencia
  geom_line(aes(y=f1), color=set1[1], linewidth=0.8) + 
  geom_hline(yintercept=100, color='gray') +
  geom_line(data= df_new, aes(y=f1_new, x = fecha_new), color=set1[4], linewidth=0.8) +
  labs(x="", y="Ventas") +
  #ggtitle('GP1: Ciclo suave') +
  theme(legend.position = "none")

p3 <- train |>
  mutate(intercept = intercept) |>
  ggplot(aes(x=fecha, y=ventas100)) +
  geom_point(color=set1[2], alpha=0.1) +
  geom_line(aes(y=intercept), color=set1[1], linewidth=0.8) + 
  geom_hline(yintercept=100, color='gray') +
  geom_line(data= df_new, aes(y=intercept_new, x = fecha_new), color=set1[4], linewidth=0.8) +
  labs(x="", y="Ventas") +
  #ggtitle('Tendencia') +
  theme(legend.position = "none")

p4 <- train |>
  mutate(f2 = f2) |>
  ggplot(aes(x=fecha, y=ventas100)) +
  geom_point(color=set1[2], alpha=0.1) +
  geom_line(aes(y=f2), color=set1[1], linewidth=0.8) + 
  geom_hline(yintercept=100, color='gray') +
  geom_line(data= df_new, aes(y=f2_new, x = fecha_new), color=set1[4], linewidth=0.8) +
  labs(x="Fecha", y="Ventas") + # Aquí sí dejo la etiqueta X
  #ggtitle('GP2: Estacionalidad Anual') +
  theme(legend.position = "none")

p1 / (p3 + p2) / p4

Modelo 3: Tendencia + Tendencia Estacional Anual + Día de Semana

El modelo 3 añade el efecto del día de semana para la estimación de la venta. Los sábados y domingos la venta es significativamente mayor que los días de semana.

\begin{align*} f(x,d) &= \text{Intercepto} + f_1(x) + f_2(x) \\ \text{Intercepto} &= \beta_{d} \\ f_1(x) &\sim \text{GP}(0, K_1{(x, x')} ) \\ f_2(x) &\sim \text{GP}(0, K_2{(x, x')} )\\ \beta_{ 1 } &= 0 \quad \text{Si el día cae Lunes} \\ \beta_{ d } &\sim \text{Normal}(0,1), \quad d=2, ..., 7 \\ \\ y(x,d) &= \text{Normal}(f(x,d), \sigma^2) \end{align*}

En este punto se hizo una modificación importante en el cálculo del intercepto. Cuando el intercepto estimaba una tendencia lineal que bajaba de forma muy pronunciada, el proceso gaussiano intentaba corregir el efecto creando una fuerza opuesta, lo que resultaba en un incremento artificial de la predicción. El efecto que se desea capturar para esta serie de ventas consiste en un declive de ventas pronunciado, que se logra estabilizar en un determinado momento de la serie. Entonces, se sabe que la media de ventas debe tener un valor mínimo, de ninguna manera ser inferior 0, y debe estabilizarse en un momento específico de la serie. Para ello, se define una función sigmoidal:

\mu(t) = min \ + \frac{max - min }{1+exp(k * (t - t_0) )}

con 4 parámetros adicionales a estimar:

\begin{align} max&: \text{ Valor Máximo} \\ min&: \text{Valor Mínimo} \\ t_0&: \text{Punto de Inflexión} \\ k&: \text{Velocidad de cambio} \\ \end{align}

Compilar el modelo 3

modelo3 <- cmdstan_model(stan_file = root("gpbf34.stan"),
                        include_paths = root())

Data para Stan

standata3 <- list(x=train$t,
                  nlength = length(train$nompuntodeventa),
                  y=log(train$ventas100),
                  N=length(train$t),
                  c_f1=2,
                  M_f1=20,
                  J_f2 = 20,
                  x_new = xnew,
                  N_total = length(train$nompuntodeventa) + length(xnew),
                  N_new = length(xnew),
                  day_of_week= train$dia_sem,
                  day_of_week_new = day_of_week_new
                  )

Ajuste del modelo3 con Pathfinder

tic('Estimando MAP para modelo 3 con Pathfinder')
pth3 <- modelo3$pathfinder(data = standata3, init=0.1,
                          num_paths=10, single_path_draws=40, draws=400,
                          history_size=100, max_lbfgs_iters=100,
                          refresh=0, output_dir=CMDSTANR_OUTPUT_DIR, psis_resample = FALSE)
mytoc()

Parámetros estimados de Pathfinder

pdraws3 <- pth3$draws()
summarise_draws(subset(pdraws3, variable=c( 'sigma',
                                           'sigma_f1','lengthscale_f1',
                                           'sigma_f2','lengthscale_f2')),
                default_summary_measures()) |>
  mutate(across(where(is.numeric), ~ round(.x, 3))) |>
  tt()

variable	mean	median	sd	mad	q5	q95
sigma	0.6	0.6	0.015	0.015	0.58	0.63
sigma_f1	0.21	0.21	0.036	0.035	0.15	0.27
lengthscale_f1	0.094	0.092	0.021	0.019	0.065	0.13
sigma_f2	1.1	1	0.17	0.16	0.81	1.3
lengthscale_f2	0.1	0.1	0.018	0.02	0.074	0.13

summarise_draws(subset(pdraws3, variable=c('beta_f3')),
                default_summary_measures()) |>
  mutate(across(where(is.numeric), ~ round(.x, 3))) |>
  tt()

variable	mean	median	sd	mad	q5	q95
beta_f3[1]	0.092	0.092	0.083	0.089	-0.043	0.23
beta_f3[2]	0.23	0.23	0.086	0.078	0.09	0.36
beta_f3[3]	0.34	0.34	0.079	0.079	0.2	0.46
beta_f3[4]	0.96	0.96	0.076	0.082	0.84	1.1
beta_f3[5]	1.8	1.8	0.077	0.079	1.7	2
beta_f3[6]	1.7	1.7	0.089	0.082	1.5	1.8

Ajuste del modelo3 con MCMC incluyendo Pathfinder

tic('MCMC muestreo de modelo 3')
fit3 <- modelo3$sample(data=standata3, iter_warmup=100, iter_sampling=100,
                      chains=4, parallel_chains=4, seed=3896, output_dir=CMDSTANR_OUTPUT_DIR,
                      init = pth3)
mytoc()

Obtener muestras

draws3 <- fit3$draws()
summarise_draws(subset(draws3, variable=c('sigma',
                                          'sigma_f1','lengthscale_f1',
                                          'sigma_f2','lengthscale_f2'))) |>
  mutate(across(where(is.numeric), ~ round(.x, 3))) |>
  tt()

variable	mean	median	sd	mad	q5	q95	rhat	ess_bulk	ess_tail
sigma	0.62	0.62	0.014	0.013	0.6	0.65	1	367	238
sigma_f1	0.14	0.14	0.037	0.035	0.085	0.2	1	415	323
lengthscale_f1	0.094	0.09	0.048	0.046	0.027	0.19	1	298	308
sigma_f2	0.28	0.28	0.047	0.047	0.22	0.37	1	185	276
lengthscale_f2	0.12	0.11	0.03	0.028	0.068	0.17	1	179	276

summarise_draws(subset(draws3, variable=c('beta_f3' ))) |>
  mutate(across(where(is.numeric), ~ round(.x, 3))) |>
  tt()

variable	mean	median	sd	mad	q5	q95	rhat	ess_bulk	ess_tail
beta_f3[1]	0.1	0.099	0.07	0.067	-0.003	0.22	1	381	206
beta_f3[2]	0.23	0.23	0.077	0.073	0.1	0.35	1	434	339
beta_f3[3]	0.34	0.34	0.07	0.067	0.22	0.46	1	460	370
beta_f3[4]	0.97	0.97	0.075	0.076	0.85	1.1	1	405	331
beta_f3[5]	1.8	1.8	0.074	0.075	1.7	2	1	452	305
beta_f3[6]	1.7	1.7	0.069	0.067	1.6	1.8	1	412	364

No se observa multimodalidad

mcmc_trace(draws3, pars  =c('sigma',
                            'floor', 'cap', 'k', 't0',
                            'sigma_f1','lengthscale_f1',
                            'sigma_f2','lengthscale_f2'))

Ajuste del modelo a los datos reales. La línea morada representa la estimación fuera de la muestra a 365 días.

Código

draws3 <- as_draws_matrix(draws3)
Ef <- exp(apply(subset(draws3, variable='f'), 2, mean))
#Ef_new <- exp(apply(subset(draws3, variable='f_new'), 2, mean))
Ef_new <- apply(subset(draws3, variable='f_new_correct'), 2, mean)

ymean100 <- mean( log(train$ventas100) )
f1 <- exp(apply(subset(draws3, variable='f1'), 2, median) + ymean100)
f1_new <- exp(apply(subset(draws3, variable='f1_new'), 2, median) + ymean100)
f2 <- exp(apply(subset(draws3, variable='f2'), 2, median) + ymean100)
f2_new <- exp(apply(subset(draws3, variable='f2_new'), 2, median) + ymean100)

Ef_day_of_week <- apply(subset(draws3, variable='f_day_of_week'), 2, median)
Ef_day_of_week <- exp(Ef_day_of_week - mean(Ef_day_of_week) + ymean100)

intercept <- exp(apply(subset(draws3, variable='intercept'), 2, median) + ymean100)
intercept_new <- exp(apply(subset(draws3, variable='intercept_new'), 2, median) + ymean100)




df_new <- data.frame(
  fecha_new = fecha_new, 
  Ef_new = Ef_new, 
  f1_new = f1_new,
  f2_new = f2_new,          
  intercept_new = intercept_new 
)


p1 <- train |>
  mutate(Ef = Ef) |>
  ggplot(aes(x=fecha, y=ventas100)) +
  geom_point(color=set1[2], alpha=0.2) +
  geom_line(aes(y=Ef), color=set1[1], linewidth=0.8) + 
  geom_hline(yintercept=100, color='gray') +
  geom_line(data= df_new, aes(y=Ef_new, x = fecha_new), color=set1[4], linewidth=0.8) +
  labs(x="", y="Ventas") + # Quito X para no repetir en el patchwork
  #ggtitle('Estimación Total') +
  theme(legend.position = "none")


p2 <- train |>
  mutate(f1 = f1) |>
  ggplot(aes(x=fecha, y=ventas100)) +
  geom_point(color=set1[2], alpha=0.1) + # Puntos muy tenues de referencia
  geom_line(aes(y=f1), color=set1[1], linewidth=0.8) + 
  geom_hline(yintercept=100, color='gray') +
  geom_line(data= df_new, aes(y=f1_new, x = fecha_new), color=set1[4], linewidth=0.8) +
  labs(x="", y="Ventas") +
  #ggtitle('GP1: Ciclo suave') +
  theme(legend.position = "none")

p3 <- train |>
  mutate(intercept = intercept) |>
  ggplot(aes(x=fecha, y=ventas100)) +
  geom_point(color=set1[2], alpha=0.1) +
  geom_line(aes(y=intercept), color=set1[1], linewidth=0.8) + 
  geom_hline(yintercept=100, color='gray') +
  geom_line(data= df_new, aes(y=intercept_new, x = fecha_new), color=set1[4], linewidth=0.8) +
  labs(x="", y="Ventas") +
  #ggtitle('Tendencia') +
  theme(legend.position = "none")

p4 <- train |>
  mutate(f2 = f2) |>
  ggplot(aes(x=fecha, y=ventas100)) +
  geom_point(color=set1[2], alpha=0.1) +
  geom_line(aes(y=f2), color=set1[1], linewidth=0.8) + 
  geom_hline(yintercept=100, color='gray') +
  geom_line(data= df_new, aes(y=f2_new, x = fecha_new), color=set1[4], linewidth=0.8) +
  labs(x="Fecha", y="Ventas") +
  theme(legend.position = "none")

p5 <- train |>
  ggplot(aes(x=dia_sem, y=ventas100)) +
  geom_point(color=set1[2], alpha=0.2) +
  scale_x_continuous(breaks = 1:7, labels=c('Lun','Mar','Mier','Jue','Vie','Sab','Dom')) +
  geom_line(data=data.frame(x=1:7,y=Ef_day_of_week), aes(x=x, y=Ef_day_of_week), color=set1[1]) +
  geom_hline(yintercept=100, color='gray') +
  labs(x="Date", y="Ventas")

p1 / (p3 + p2) / (p4 + p5)

Predicción vs Real

test$Ef3 <- (Ef_new[1: length(test$fecha)])/100*mventa

test |>
  ggplot(aes(x=fecha, y = venta)) + 
  geom_point(alpha = 0.5) + geom_line(alpha = 0.2) + 
  geom_point(aes(y = Ef3), alpha = 0.5, color = 'purple') + geom_line(aes(y = Ef3), alpha = 0.2, color = 'purple') + 
  ylim(0, NA)

rmse <- sqrt(mean((test$venta - test$Ef3)^2))

sum(test$venta)

[1] 55627

sum(test$Ef3)

[1] 55152.51

rmse

[1] 703.7954