Bootstrapping doble

#### Doble bootstrap: impacto de la campaña ####
library(boot)
library(dplyr)

## 
## Adjuntando el paquete: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# 0) Cargar datos
datos <- read.csv("~/dataset_ventas.csv", stringsAsFactors = FALSE)

# Asegurar nombres y tipos
names(datos) <- tolower(names(datos))
datos <- datos %>%
  mutate(
    dia     = as.integer(dia),
    ventas  = as.numeric(ventas),
    periodo = tolower(trimws(periodo))  # ej. "antes" / "despues"
  )
datos$periodo <- factor(datos$periodo)   # dos niveles: antes / despues (en ese orden según el archivo)

# 1) Diferencia de medias observada (despues - antes)
lev <- levels(datos$periodo)
media_antes   <- mean(datos$ventas[datos$periodo == lev[1]], na.rm = TRUE)
media_despues <- mean(datos$ventas[datos$periodo == lev[2]], na.rm = TRUE)
diff_obs <- media_despues - media_antes
cat("Diferencia observada (después - antes):", diff_obs, "\n")

## Diferencia observada (después - antes): 10.01246

# 2) PRIMER bootstrap (estratificado por periodo)
diff_means_boot <- function(data, indices){
  d <- data[indices, , drop = FALSE]
  l <- levels(d$periodo)
  m_antes   <- mean(d$ventas[d$periodo == l[1]], na.rm = TRUE)
  m_despues <- mean(d$ventas[d$periodo == l[2]], na.rm = TRUE)
  m_despues - m_antes
}

set.seed(616)
boot1 <- boot(data = datos, statistic = diff_means_boot, R = 1000, strata = datos$periodo)
print(boot1)

## 
## STRATIFIED BOOTSTRAP
## 
## 
## Call:
## boot(data = datos, statistic = diff_means_boot, R = 1000, strata = datos$periodo)
## 
## 
## Bootstrap Statistics :
##     original      bias    std. error
## t1* 10.01246 -0.02854209    2.382898

ic1 <- boot.ci(boot1, type = "perc")
print(ic1)

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = boot1, type = "perc")
## 
## Intervals : 
## Level     Percentile     
## 95%   ( 5.27, 14.78 )  
## Calculations and Intervals on Original Scale

hist(boot1$t,
     main = "Bootstrap 1: Dif. de medias (después - antes)",
     xlab = "Diferencia remuestreada", col = "lightblue", border = "black")

# 3) SEGUNDO bootstrap (sobre la distribución del primero)
boot_mean <- function(x, i) mean(x[i], na.rm = TRUE)

set.seed(838)
boot2 <- boot(data = boot1$t, statistic = boot_mean, R = 1000)
print(boot2)

## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = boot1$t, statistic = boot_mean, R = 1000)
## 
## 
## Bootstrap Statistics :
##     original      bias    std. error
## t1* 9.983918 0.004834517   0.0718386

ic2 <- boot.ci(boot2, type = "perc")
print(ic2)

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 1000 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = boot2, type = "perc")
## 
## Intervals : 
## Level     Percentile     
## 95%   ( 9.846, 10.139 )  
## Calculations and Intervals on Original Scale

# 4) IC 95% del doble bootstrap (percentiles del nivel 1; refinado por nivel 2)
ic_db <- quantile(boot1$t, probs = c(0.025, 0.975), na.rm = TRUE)
cat(sprintf("IC 95%% (doble bootstrap): [%.4f, %.4f]\n", ic_db[1], ic_db[2]))

## IC 95% (doble bootstrap): [5.2831, 14.6632]

# 5) Conclusión de impacto (significativo si el IC no incluye 0)
if (ic_db[1] > 0 || ic_db[2] < 0) {
  cat("Conclusión: La campaña tuvo un IMPACTO significativo en ventas.\n")
} else {
  cat("Conclusión: NO hay evidencia suficiente de impacto significativo (IC incluye 0).\n")
}

## Conclusión: La campaña tuvo un IMPACTO significativo en ventas.

Bootstrapping doble

Luis Felipe Franco Rodríguez

2025-08-19