---
title: "Estimación de Ventas con Procesos Gaussianos en Espacios de Hilbert"
author: "Rafael Navarro"
date: "26/11/2025"
toc: true
format:
html:
html-math-method: katex
code-tools: true
self-contained: true
math: mathjax
execute:
warning: false
cache: false
freeze: auto
project:
type: website
output-dir: docs
---
## Carga de librerías
```{r}
#| code-fold: true
#| code-summary: "Código"
library (rprojroot)
root<- has_file (".Workflow-Examples-root" )$ make_fix_file ()
library (tidyverse)
library (tictoc)
mytoc <- \() {
toc (func.toc= \(tic, toc, msg) {
sprintf ("%s tomó %s seg" , msg, as.character (signif (toc- tic, 2 )))
})}
library (cmdstanr)
CMDSTANR_OUTPUT_DIR <- root ("stan_output" )
library (posterior)
options (pillar.neg = FALSE , pillar.subtle= FALSE , pillar.sigfig= 2 )
library (tinytable)
options (tinytable_format_num_fmt = "significant_cell" , tinytable_format_digits = 2 , tinytable_tt_digits= 2 )
library (loo)
library (bayesplot)
theme_set (bayesplot:: theme_default (base_family = "sans" ))
library (patchwork)
library (ggrepel)
set1 <- RColorBrewer:: brewer.pal (7 , "Set1" )
#setwd("~/HSGP Stan Euro")
```
# Carga de datos
```{r}
#| code-fold: false
#| code-summary: "Código"
ventas <- read_csv (root ("data" , "data.csv" )) #Cargar ventas desde 2022
ventas$ venta[ventas$ venta== 0 ] <- 1
mventa <- mean (ventas$ venta)
train <- ventas[ventas$ nompuntodeventa == "MP ANGAMOS" ,]
test <- train[train$ fecha >= as.Date ('2025-11-01' ),]
train <- train[train$ fecha <= as.Date ('2025-10-31' ),]
mventa <- mean (train$ venta)
train$ ventas100 <- train$ venta/ mventa* 100
xnew <- 1 : 365 + max (train$ t)
fecha_new <- 1 : 365 + max (train$ fecha) #solo de paso, no es necesario para modelo
day_of_week_new <- lubridate:: wday (fecha_new, week_start = 1 )
day_of_year_new <- lubridate:: yday (fecha_new)
```
# Flujo de trabajo iterativo
## Modelo 1: Tendencia
El modelo 1 solo estima la tendencia a largo plazo sobre los años utilizando procesos Gaussianos aproximados por funciones base en espacios de Hilbert
$$
\begin{align*}
f(x) &= \text{Intercepto} + f_1(x) \\
\text{Intercepto} &= \beta_{trend} \\
f_1(x) &\sim \text{GP}(0, K_1{(x, x')} ) \\
\\
y(x) &= \text{Normal}(f(x), \sigma^2)
\end{align*}
$$
donde GP tiene una función de covarianza cuadrática exponencial.
\
Compilar el modelo 1
```{r}
modelo1 <- cmdstan_model (stan_file = root ("gpbf1.stan" ),
include_paths = root ())
```
Data para Stan
```{r}
standata1 <- list (x= train$ t,
nlength = length (train$ nompuntodeventa),
y= log (train$ ventas100),
N= length (train$ t),
c_f1= 1.5 ,
M_f1= 20 ,
x_new = xnew,
N_total = length (train$ nompuntodeventa) + length (xnew),
N_new = length (xnew)
)
```
Ajuste del modelo1 con Pathfinder
```{r, output = FALSE}
tic ('Estimando MAP para modelo 1 con Pathfinder' )
pth1 <- modelo1$ pathfinder (data = standata1, init= 0.1 ,
num_paths= 10 , single_path_draws= 40 , draws= 400 ,
history_size= 100 , max_lbfgs_iters= 100 ,
refresh= 0 , output_dir= CMDSTANR_OUTPUT_DIR)
mytoc ()
```
Parámetros estimados de Pathfinder
```{r}
pdraws1 <- pth1$ draws ()
summarise_draws (subset (pdraws1, variable= c ('beta_trend' , 'sigma_f1' ,'lengthscale_f1' ,'sigma' )),
default_summary_measures ()) |>
mutate (across (where (is.numeric), ~ round (.x, 3 ))) |>
tt ()
```
Ajuste del modelo1 con MCMC incluyendo Pathfinder
```{r, output = FALSE}
tic ('MCMC muestreo de modelo 1' )
fit1 <- modelo1$ sample (data= standata1, iter_warmup= 100 , iter_sampling= 100 ,
chains= 4 , parallel_chains= 4 , seed= 3896 , output_dir= CMDSTANR_OUTPUT_DIR,
init = pth1)
mytoc ()
```
Obtener muestras
```{r}
draws1 <- fit1$ draws ()
summarise_draws (subset (draws1, variable= c ('beta_trend' , 'sigma_f1' ,'lengthscale_f1' ,'sigma' ))) |>
mutate (across (where (is.numeric), ~ round (.x, 3 ))) |>
tt ()
```
\
No se observa multimodalidad
```{r}
mcmc_trace (draws1, regex_pars= c ('beta_trend' , 'sigma_f1' ,'lengthscale_f1' ,'sigma' ))
```
Ajuste del modelo a los datos reales. La línea morada representa la estimación fuera de la muestra a 365 días.
```{r}
#| code-fold: true
#| code-summary: "Código"
draws1 <- as_draws_matrix (draws1)
Ef <- exp (apply (subset (draws1, variable= 'f' ), 2 , median))
Ef_new <- exp (apply (subset (draws1, variable= 'f_new' ), 2 , median))
ymean100 <- mean ( log (train$ ventas100) )
f1 <- exp (apply (subset (draws1, variable= 'f1' ), 2 , median) + ymean100)
f1_new <- exp (apply (subset (draws1, variable= 'f1_new' ), 2 , median) + ymean100)
intercept <- exp (apply (subset (draws1, variable= 'intercept' ), 2 , median) + ymean100)
intercept_new <- exp (apply (subset (draws1, variable= 'intercept_new' ), 2 , median) + ymean100)
df_new <- data.frame (
fecha_new = fecha_new,
Ef_new = Ef_new,
f1_new = f1_new,
intercept_new = intercept_new
)
# --- GRÁFICO 1: ESTIMACIÓN TOTAL ---
p1 <- train |>
mutate (Ef = Ef) |>
ggplot (aes (x= fecha, y= ventas100)) +
geom_point (color= set1[2 ], alpha= 0.2 ) +
geom_line (aes (y= Ef, color= "Ajuste" )) +
geom_hline (yintercept= 100 , color= 'gray' ) +
geom_line (data= df_new, aes (y= Ef_new, x = fecha_new, color= "Predicción" )) +
labs (x= "Fecha" , y= "Ventas" ) +
scale_color_manual (values = c ("Ajuste" = set1[1 ], "Predicción" = set1[4 ])) +
ggtitle ('Estimación Total' ) +
theme (legend.position = "none" ) # <--- LEYENDA ELIMINADA
# --- GRÁFICO 2: COMPONENTE GP1 (Ciclo) ---
p2 <- train |>
mutate (f1 = f1) |>
ggplot (aes (x= fecha, y= ventas100)) +
geom_point (color= set1[2 ], alpha= 0.2 ) +
geom_line (aes (y= f1, color= "Ajuste" )) +
geom_hline (yintercept= 100 , color= 'gray' ) +
geom_line (data= df_new, aes (y= f1_new, x = fecha_new, color= "Predicción" )) +
labs (x= "Fecha" , y= "Ventas" ) +
scale_color_manual (values = c ("Ajuste" = set1[1 ], "Predicción" = set1[4 ])) +
ggtitle ('GP1: Ciclo suave' ) +
theme (legend.position = "none" ) # <--- LEYENDA ELIMINADA
# --- GRÁFICO 3: TENDENCIA ---
p3 <- train |>
mutate (intercept = intercept) |>
ggplot (aes (x= fecha, y= ventas100)) +
geom_point (color= set1[2 ], alpha= 0.2 ) +
geom_line (aes (y= intercept, color= "Ajuste" )) +
geom_hline (yintercept= 100 , color= 'gray' ) +
geom_line (data= df_new, aes (y= intercept_new, x = fecha_new, color= "Predicción" )) +
labs (x= "Fecha" , y= "Ventas" ) +
scale_color_manual (values = c ("Ajuste" = set1[1 ], "Predicción" = set1[4 ])) +
ggtitle ('Tendencia' ) +
theme (legend.position = "none" ) #
# Composición final
p1 / (p3 + p2)
```
## Modelo 2: Tendencia + Tendencia Estacional Anual
El modelo 2 estima la tendencia a largo plazo sobre los años utilizando procesos Gaussianos aproximados por funciones base en espacios de Hilbert. Mientras el primer proceso Gaussiano utiliza la función de covarianza exponencial cuadrática, el segundo utiliza una función de covarianza periódica para capturar efectos anuales.
$$
\begin{align*}
f(x) &= \text{Intercepto} + f_1(x) + f_2(x) \\
\text{Intercepto} &= 0.0 \\
f_1(x) &\sim \text{GP}(0, K_1{(x, x')} ) \\
f_2(x) &\sim \text{GP}(0, K_2{(x, x')} )\\
\\
y(x) &= \text{Normal}(f(x), \sigma^2)
\end{align*}
$$
Compilar el modelo 2
```{r}
modelo2 <- cmdstan_model (stan_file = root ("gpbf22.stan" ),
include_paths = root ())
```
Data para Stan
```{r}
standata2 <- list (x= train$ t,
nlength = length (train$ nompuntodeventa),
y= log (train$ ventas100),
N= length (train$ t),
c_f1= 1.5 ,
M_f1= 20 ,
J_f2 = 20 ,
x_new = xnew,
N_total = length (train$ nompuntodeventa) + length (xnew),
N_new = length (xnew)
)
```
Ajuste del modelo2 con Pathfinder
```{r, output = FALSE}
tic ('Estimando MAP para modelo 2 con Pathfinder' )
pth2 <- modelo2$ pathfinder (data = standata2, init= 0.1 ,
num_paths= 10 , single_path_draws= 40 , draws= 400 ,
history_size= 100 , max_lbfgs_iters= 100 ,
refresh= 0 , output_dir= CMDSTANR_OUTPUT_DIR, psis_resample = FALSE )
mytoc ()
```
Parámetros estimados de Pathfinder
```{r}
pdraws2 <- pth2$ draws ()
summarise_draws (subset (pdraws2, variable= c ('beta_trend' , 'sigma' ,
'sigma_f1' ,'lengthscale_f1' ,
'sigma_f2' ,'lengthscale_f2' )),
default_summary_measures ()) |>
mutate (across (where (is.numeric), ~ round (.x, 3 ))) |>
tt ()
```
Ajuste del modelo1 con MCMC incluyendo Pathfinder
```{r, output = FALSE}
tic ('MCMC muestreo de modelo 2' )
fit2 <- modelo2$ sample (data= standata2, iter_warmup= 100 , iter_sampling= 100 ,
chains= 4 , parallel_chains= 4 , seed= 3896 , output_dir= CMDSTANR_OUTPUT_DIR,
init = pth2)
mytoc ()
```
Obtener muestras
```{r}
draws2 <- fit2$ draws ()
summarise_draws (subset (draws2, variable= c ('beta_trend' , 'sigma' ,
'sigma_f1' ,'lengthscale_f1' ,
'sigma_f2' ,'lengthscale_f2' ))) |>
mutate (across (where (is.numeric), ~ round (.x, 3 ))) |>
tt ()
```
\
No se observa multimodalidad
```{r}
mcmc_trace (draws2, regex_pars= c ('beta_trend' , 'sigma' ,
'sigma_f1' ,'lengthscale_f1' ,
'sigma_f2' ,'lengthscale_f2' ))
```
Ajuste del modelo a los datos reales. La línea morada representa la estimación fuera de la muestra a 365 días.
```{r}
#| code-fold: true
#| code-summary: "Código"
draws2 <- as_draws_matrix (draws2)
Ef <- exp (apply (subset (draws2, variable= 'f' ), 2 , median))
Ef_new <- exp (apply (subset (draws2, variable= 'f_new' ), 2 , median))
ymean100 <- mean ( log (train$ ventas100) )
f1 <- exp (apply (subset (draws2, variable= 'f1' ), 2 , median) + ymean100)
f1_new <- exp (apply (subset (draws2, variable= 'f1_new' ), 2 , median) + ymean100)
f2 <- exp (apply (subset (draws2, variable= 'f2' ), 2 , median) + ymean100)
f2_new <- exp (apply (subset (draws2, variable= 'f2_new' ), 2 , median) + ymean100)
intercept <- exp (apply (subset (draws2, variable= 'intercept' ), 2 , median) + ymean100)
intercept_new <- exp (apply (subset (draws2, variable= 'intercept_new' ), 2 , median) + ymean100)
df_new <- data.frame (
fecha_new = fecha_new,
Ef_new = Ef_new,
f1_new = f1_new,
f2_new = f2_new,
intercept_new = intercept_new
)
p1 <- train |>
mutate (Ef = Ef) |>
ggplot (aes (x= fecha, y= ventas100)) +
geom_point (color= set1[2 ], alpha= 0.2 ) +
geom_line (aes (y= Ef), color= set1[1 ], linewidth= 0.8 ) +
geom_hline (yintercept= 100 , color= 'gray' ) +
geom_line (data= df_new, aes (y= Ef_new, x = fecha_new), color= set1[4 ], linewidth= 0.8 ) +
labs (x= "" , y= "Ventas" ) + # Quito X para no repetir en el patchwork
#ggtitle('Estimación Total') +
theme (legend.position = "none" )
p2 <- train |>
mutate (f1 = f1) |>
ggplot (aes (x= fecha, y= ventas100)) +
geom_point (color= set1[2 ], alpha= 0.1 ) + # Puntos muy tenues de referencia
geom_line (aes (y= f1), color= set1[1 ], linewidth= 0.8 ) +
geom_hline (yintercept= 100 , color= 'gray' ) +
geom_line (data= df_new, aes (y= f1_new, x = fecha_new), color= set1[4 ], linewidth= 0.8 ) +
labs (x= "" , y= "Ventas" ) +
#ggtitle('GP1: Ciclo suave') +
theme (legend.position = "none" )
p3 <- train |>
mutate (intercept = intercept) |>
ggplot (aes (x= fecha, y= ventas100)) +
geom_point (color= set1[2 ], alpha= 0.1 ) +
geom_line (aes (y= intercept), color= set1[1 ], linewidth= 0.8 ) +
geom_hline (yintercept= 100 , color= 'gray' ) +
geom_line (data= df_new, aes (y= intercept_new, x = fecha_new), color= set1[4 ], linewidth= 0.8 ) +
labs (x= "" , y= "Ventas" ) +
#ggtitle('Tendencia') +
theme (legend.position = "none" )
p4 <- train |>
mutate (f2 = f2) |>
ggplot (aes (x= fecha, y= ventas100)) +
geom_point (color= set1[2 ], alpha= 0.1 ) +
geom_line (aes (y= f2), color= set1[1 ], linewidth= 0.8 ) +
geom_hline (yintercept= 100 , color= 'gray' ) +
geom_line (data= df_new, aes (y= f2_new, x = fecha_new), color= set1[4 ], linewidth= 0.8 ) +
labs (x= "Fecha" , y= "Ventas" ) + # Aquí sí dejo la etiqueta X
#ggtitle('GP2: Estacionalidad Anual') +
theme (legend.position = "none" )
p1 / (p3 + p2) / p4
```
## Modelo 3: Tendencia + Tendencia Estacional Anual + Día de Semana
El modelo 3 añade el efecto del día de semana para la estimación de la venta. Los sábados y domingos la venta es significativamente mayor que los días de semana.
$$
\begin{align*}
f(x,d) &= \text{Intercepto} + f_1(x) + f_2(x) \\
\text{Intercepto} &= \beta_{d} \\
f_1(x) &\sim \text{GP}(0, K_1{(x, x')} ) \\
f_2(x) &\sim \text{GP}(0, K_2{(x, x')} )\\
\beta_{ 1 } &= 0 \quad \text{Si el día cae Lunes} \\
\beta_{ d } &\sim \text{Normal}(0,1), \quad d=2, ..., 7 \\
\\
y(x,d) &= \text{Normal}(f(x,d), \sigma^2)
\end{align*}
$$
En este punto se hizo una modificación importante en el cálculo del intercepto. Cuando el intercepto estimaba una tendencia lineal que bajaba de forma muy pronunciada, el proceso gaussiano intentaba corregir el efecto creando una fuerza opuesta, lo que resultaba en un incremento artificial de la predicción. El efecto que se desea capturar para esta serie de ventas consiste en un declive de ventas pronunciado, que se logra estabilizar en un determinado momento de la serie. Entonces, se sabe que la media de ventas debe tener un valor mínimo, de ninguna manera ser inferior 0, y debe estabilizarse en un momento específico de la serie. Para ello, se define una función sigmoidal:
$$
\mu(t) = min \ + \frac{max - min }{1+exp(k * (t - t_0) )}
$$
con 4 parámetros adicionales a estimar:
$$
\begin{align}
max&: \text{ Valor Máximo} \\
min&: \text{Valor Mínimo} \\
t_0&: \text{Punto de Inflexión} \\
k&: \text{Velocidad de cambio} \\
\end{align}
$$
Compilar el modelo 3
```{r}
modelo3 <- cmdstan_model (stan_file = root ("gpbf34.stan" ),
include_paths = root ())
```
Data para Stan
```{r}
standata3 <- list (x= train$ t,
nlength = length (train$ nompuntodeventa),
y= log (train$ ventas100),
N= length (train$ t),
c_f1= 2 ,
M_f1= 20 ,
J_f2 = 20 ,
x_new = xnew,
N_total = length (train$ nompuntodeventa) + length (xnew),
N_new = length (xnew),
day_of_week= train$ dia_sem,
day_of_week_new = day_of_week_new
)
```
Ajuste del modelo3 con Pathfinder
```{r, output = FALSE}
tic ('Estimando MAP para modelo 3 con Pathfinder' )
pth3 <- modelo3$ pathfinder (data = standata3, init= 0.1 ,
num_paths= 10 , single_path_draws= 40 , draws= 400 ,
history_size= 100 , max_lbfgs_iters= 100 ,
refresh= 0 , output_dir= CMDSTANR_OUTPUT_DIR, psis_resample = FALSE )
mytoc ()
```
Parámetros estimados de Pathfinder
```{r}
pdraws3 <- pth3$ draws ()
summarise_draws (subset (pdraws3, variable= c ( 'sigma' ,
'sigma_f1' ,'lengthscale_f1' ,
'sigma_f2' ,'lengthscale_f2' )),
default_summary_measures ()) |>
mutate (across (where (is.numeric), ~ round (.x, 3 ))) |>
tt ()
summarise_draws (subset (pdraws3, variable= c ('beta_f3' )),
default_summary_measures ()) |>
mutate (across (where (is.numeric), ~ round (.x, 3 ))) |>
tt ()
```
Ajuste del modelo3 con MCMC incluyendo Pathfinder
```{r, output = FALSE}
tic ('MCMC muestreo de modelo 3' )
fit3 <- modelo3$ sample (data= standata3, iter_warmup= 100 , iter_sampling= 100 ,
chains= 4 , parallel_chains= 4 , seed= 3896 , output_dir= CMDSTANR_OUTPUT_DIR,
init = pth3)
mytoc ()
```
Obtener muestras
```{r}
draws3 <- fit3$ draws ()
summarise_draws (subset (draws3, variable= c ('sigma' ,
'sigma_f1' ,'lengthscale_f1' ,
'sigma_f2' ,'lengthscale_f2' ))) |>
mutate (across (where (is.numeric), ~ round (.x, 3 ))) |>
tt ()
summarise_draws (subset (draws3, variable= c ('beta_f3' ))) |>
mutate (across (where (is.numeric), ~ round (.x, 3 ))) |>
tt ()
```
\
No se observa multimodalidad
```{r}
mcmc_trace (draws3, pars = c ('sigma' ,
'floor' , 'cap' , 'k' , 't0' ,
'sigma_f1' ,'lengthscale_f1' ,
'sigma_f2' ,'lengthscale_f2' ))
```
Ajuste del modelo a los datos reales. La línea morada representa la estimación fuera de la muestra a 365 días.
```{r}
#| code-fold: true
#| code-summary: "Código"
draws3 <- as_draws_matrix (draws3)
Ef <- exp (apply (subset (draws3, variable= 'f' ), 2 , mean))
#Ef_new <- exp(apply(subset(draws3, variable='f_new'), 2, mean))
Ef_new <- apply (subset (draws3, variable= 'f_new_correct' ), 2 , mean)
ymean100 <- mean ( log (train$ ventas100) )
f1 <- exp (apply (subset (draws3, variable= 'f1' ), 2 , median) + ymean100)
f1_new <- exp (apply (subset (draws3, variable= 'f1_new' ), 2 , median) + ymean100)
f2 <- exp (apply (subset (draws3, variable= 'f2' ), 2 , median) + ymean100)
f2_new <- exp (apply (subset (draws3, variable= 'f2_new' ), 2 , median) + ymean100)
Ef_day_of_week <- apply (subset (draws3, variable= 'f_day_of_week' ), 2 , median)
Ef_day_of_week <- exp (Ef_day_of_week - mean (Ef_day_of_week) + ymean100)
intercept <- exp (apply (subset (draws3, variable= 'intercept' ), 2 , median) + ymean100)
intercept_new <- exp (apply (subset (draws3, variable= 'intercept_new' ), 2 , median) + ymean100)
df_new <- data.frame (
fecha_new = fecha_new,
Ef_new = Ef_new,
f1_new = f1_new,
f2_new = f2_new,
intercept_new = intercept_new
)
p1 <- train |>
mutate (Ef = Ef) |>
ggplot (aes (x= fecha, y= ventas100)) +
geom_point (color= set1[2 ], alpha= 0.2 ) +
geom_line (aes (y= Ef), color= set1[1 ], linewidth= 0.8 ) +
geom_hline (yintercept= 100 , color= 'gray' ) +
geom_line (data= df_new, aes (y= Ef_new, x = fecha_new), color= set1[4 ], linewidth= 0.8 ) +
labs (x= "" , y= "Ventas" ) + # Quito X para no repetir en el patchwork
#ggtitle('Estimación Total') +
theme (legend.position = "none" )
p2 <- train |>
mutate (f1 = f1) |>
ggplot (aes (x= fecha, y= ventas100)) +
geom_point (color= set1[2 ], alpha= 0.1 ) + # Puntos muy tenues de referencia
geom_line (aes (y= f1), color= set1[1 ], linewidth= 0.8 ) +
geom_hline (yintercept= 100 , color= 'gray' ) +
geom_line (data= df_new, aes (y= f1_new, x = fecha_new), color= set1[4 ], linewidth= 0.8 ) +
labs (x= "" , y= "Ventas" ) +
#ggtitle('GP1: Ciclo suave') +
theme (legend.position = "none" )
p3 <- train |>
mutate (intercept = intercept) |>
ggplot (aes (x= fecha, y= ventas100)) +
geom_point (color= set1[2 ], alpha= 0.1 ) +
geom_line (aes (y= intercept), color= set1[1 ], linewidth= 0.8 ) +
geom_hline (yintercept= 100 , color= 'gray' ) +
geom_line (data= df_new, aes (y= intercept_new, x = fecha_new), color= set1[4 ], linewidth= 0.8 ) +
labs (x= "" , y= "Ventas" ) +
#ggtitle('Tendencia') +
theme (legend.position = "none" )
p4 <- train |>
mutate (f2 = f2) |>
ggplot (aes (x= fecha, y= ventas100)) +
geom_point (color= set1[2 ], alpha= 0.1 ) +
geom_line (aes (y= f2), color= set1[1 ], linewidth= 0.8 ) +
geom_hline (yintercept= 100 , color= 'gray' ) +
geom_line (data= df_new, aes (y= f2_new, x = fecha_new), color= set1[4 ], linewidth= 0.8 ) +
labs (x= "Fecha" , y= "Ventas" ) +
theme (legend.position = "none" )
p5 <- train |>
ggplot (aes (x= dia_sem, y= ventas100)) +
geom_point (color= set1[2 ], alpha= 0.2 ) +
scale_x_continuous (breaks = 1 : 7 , labels= c ('Lun' ,'Mar' ,'Mier' ,'Jue' ,'Vie' ,'Sab' ,'Dom' )) +
geom_line (data= data.frame (x= 1 : 7 ,y= Ef_day_of_week), aes (x= x, y= Ef_day_of_week), color= set1[1 ]) +
geom_hline (yintercept= 100 , color= 'gray' ) +
labs (x= "Date" , y= "Ventas" )
p1 / (p3 + p2) / (p4 + p5)
```
Predicción vs Real
```{r}
test$ Ef3 <- (Ef_new[1 : length (test$ fecha)])/ 100 * mventa
test |>
ggplot (aes (x= fecha, y = venta)) +
geom_point (alpha = 0.5 ) + geom_line (alpha = 0.2 ) +
geom_point (aes (y = Ef3), alpha = 0.5 , color = 'purple' ) + geom_line (aes (y = Ef3), alpha = 0.2 , color = 'purple' ) +
ylim (0 , NA )
rmse <- sqrt (mean ((test$ venta - test$ Ef3)^ 2 ))
sum (test$ venta)
sum (test$ Ef3)
rmse
```