1 1. Ejemplo

Se simuló una población ficticia con el objetivo de analizar y establecer conclusiones sobre el comportamiento de estas técnicas cuando se aplican en situaciones reales.

La simulación se basó en el estudio presentado por Bethlehem (2010), incorporando varias modificaciones para ampliar el espectro de casos en los que pueden utilizarse métodos de ajuste.

En el estudio de simulación propuesto, se llevaría a cabo una encuesta para examinar la intención de voto de una población. La población tenía un tamaño fijo de ( N = 50000 ) y en el estudio se incluyeron seis variables: edad, nacionalidad (nativo/no nativo), género, nivel educativo (primario/secundario/terciario), acceso a internet (sí/no) y el partido al que tenían intención de votar, con cuatro opciones posibles: Partido 1, Partido 2, Partido 3 y Abstención. 1. Ejemplo usando MAS

1.1 2. Instalación y carga de librerías necesarias

#install.packages('NonProbEst')
library(NonProbEst)
library(survey)

1.2 3. Cargando los Datos

# datos ----
population  = NonProbEst::population
sampleNP = NonProbEst::sampleNP
sampleP  = NonProbEst::sampleP

head(sampleNP)

##   vote_gen vote_pens vote_pir education_primaria education_secundaria
## 1        0         1        0                  1                    0
## 2        0         0        1                  0                    0
## 3        1         0        0                  0                    1
## 4        0         0        1                  1                    0
## 5        0         0        1                  0                    1
## 6        0         0        0                  1                    0
##   education_terciaria age sex language
## 1                   0  66   1        1
## 2                   1  30   1        1
## 3                   0  62   0        1
## 4                   0  33   0        1
## 5                   0  30   0        1
## 6                   0  69   1        1

attach(sampleNP)
n = nrow(sampleNP)
N = nrow(population)

1.3 4. Estimación usando MAS Muestreo Aleatorio Simple

\[ \hat{t}_y = N \cdot \bar{y}_s \]

## estimacion usando mas
design  <- svydesign(id = ~1, data = sampleNP, fpc = ~rep(N,n))
totales <-  svytotal(design = design,~vote_gen+vote_pens)
totales

##           total     SE
## vote_gen   4800 461.34
## vote_pens 17300 744.95

1.3.1 4.1 Varianza

#varianza estimadores
var_MAS <- c(461.34, 744.95)**2
var_MAS

## [1] 212834.6 554950.5

1.3.1.1 4.2 Coeficiente de variación

#Coeficiente de variacion
cv_MAS = round(as.numeric(100*cbind(sqrt(var_MAS[1])/totales[1],sqrt(var_MAS[2])/totales[2])),5)

rbind( c("vote_gen","vote_pens"), cv_MAS)

##        [,1]       [,2]       
##        "vote_gen" "vote_pens"
## cv_MAS "9.61125"  "4.30607"

1.3.2 4.3 Intervalo

confint(totales)

##               2.5 %    97.5 %
## vote_gen   3895.792  5704.208
## vote_pens 15839.925 18760.075

1.4 5. Estimación usando PSA

1.4.1 5.1 Selección de variables auxiliares

covariates = c("education_primaria", "education_secundaria","age","sex")

1.4.2 5.2 Calculo de propensities

data_propensities = propensities(sampleNP, sampleP, covariates)

1.4.3 5.3 Pesos

psa_weights = sc_weights(data_propensities$convenience)

1.4.4 5.4 Estimación del total por PSA

#Estimacion del total mediande PSA
t_hat<-total_estimation(sampleNP, psa_weights, c("vote_gen","vote_pens"), N)
t_hat

##  vote_gen vote_pens 
##  5137.747 18256.192

1.4.5 5.5 Varianza

#Estiacion de la varianza mediante jacknife
var<-fast_jackknife_variance(sampleNP, psa_weights, c("vote_gen","vote_pens"), N = N)*(N**2)
var

##  vote_gen vote_pens 
##  245497.4  594729.4

1.4.5.1 5.6 CV

cv_PSA = 100*sqrt(var)/t_hat; cv_PSA

##  vote_gen vote_pens 
##  9.643856  4.224249

1.4.6 5.7 estimación por intervalo

confidence_interval(t_hat,sqrt(var*N^2), confidence=0.95 )

##  lower.vote_gen lower.vote_pens  upper.vote_gen upper.vote_pens 
##       -48550709       -75556681        48560984        75593193

1.5 6. Muestreo por calibracion

Pesos Calibracion

# Pesos de calibracion ----
totals = colSums(population[, covariates])

1.5.1 6.1 Con pesos iniciales = 1

## Con pesos iniciales = 1 ----
# Calibracion de pesos
cw1_weight = calib_weights(sampleNP[, covariates], totals, rep(1, n), N, method = "raking")

1.5.1.1 6.1.1 Estimacion del total

#Estimacion del total
Y_cw1_t = total_estimation(sampleNP, cw1_weight, c("vote_gen","vote_pens"), N)
Y_cw1_m = colSums(cw1_weight*sampleNP[, c("vote_gen","vote_pens")])

Y_cw1_t; Y_cw1_m

##  vote_gen vote_pens 
##  5044.451 17625.967

##  vote_gen vote_pens 
##  5226.617 18262.479

1.5.1.2 6.1.2 Varianza e Intervalo de confianza

#Estimacion de la varianza e intervalo de confianza para el total ponderado
VY_cw1_t = fast_jackknife_variance(sampleNP, cw1_weight, estimated_vars = c("vote_gen","vote_pens")) * N^2
ci_cw1_t = confidence_interval(Y_cw1_t, sqrt(VY_cw1_t), confidence = 0.95)

VY_cw1_t; ci_cw1_t

##  vote_gen vote_pens 
##  237669.7  578298.6

##  lower.vote_gen lower.vote_pens  upper.vote_gen upper.vote_pens 
##        4088.942       16135.494        5999.961       19116.440

1.5.1.3 6.1.3 CV

#Coeficiente de variacion para el total ponderado
cv_Cal = sqrt(VY_cw1_t)/Y_cw1_t*100; cv_Cal

##  vote_gen vote_pens 
##  9.664358  4.314427

#Estimacion de la varianza e intervalo de confianza para el estimador de regresion

reg_est_gen1 = function(sample){
  re_w = calib_weights(sample[, covariates], totals, rep(1, nrow(sample)), N, method = "raking")
  reg = sum(sample$vote_gen*re_w)
  return(reg)
  }

reg_est_pens1 = function(sample){
  re_w = calib_weights(sample[, covariates], totals, rep(1, nrow(sample)), N, method = "raking")
  reg = sum(sample$vote_pens*re_w)
  return(reg)
  }

VY_cw1_m = c(vote_gen = generic_jackknife_variance(sampleNP, reg_est_gen1, N),
             vote_pens = generic_jackknife_variance(sampleNP, reg_est_pens1, N))
ci_cw1_m = confidence_interval(Y_cw1_m, sqrt(VY_cw1_m), confidence = 0.95)

VY_cw1_m; ci_cw1_m

##  vote_gen vote_pens 
##  236565.6  534349.2

##  lower.vote_gen lower.vote_pens  upper.vote_gen upper.vote_pens 
##        4273.330       16829.761        6179.905       19695.197

#Coeficiente de variacion para el estimador de regresion
sqrt(VY_cw1_m)/Y_cw1_m*100

##  vote_gen vote_pens 
##  9.305830  4.002698

1.5.2 6.2 Pesos iniciales con pseudo probabilidades de inclusion

pi = propensities(sampleNP, sampleP, covariates, algorithm = "glm", smooth = FALSE)
wi = sc_weights(pi$convenience)

1.5.2.1 6.2.1 Estimacion del total

# Calibracion de pesos
cw2_weight = calib_weights(sampleNP[, covariates], totals, wi, N, method = "raking")

#Estimacion del total
Y_cw2_t  = total_estimation(sampleNP, cw2_weight, c("vote_gen","vote_pens"), N)
Y_cw2_ma = colSums(cw2_weight*sampleNP[ ,c("vote_gen","vote_pens")])

Y_cw2_t; Y_cw2_ma

##  vote_gen vote_pens 
##  4898.813 18255.550

##  vote_gen vote_pens 
##  4952.409 18455.280

1.5.2.2 6.2.2 Varianza e Intervalo de confianza

#Estimacion de la varianza e intervalo de confianza
VY_cw2_t = fast_jackknife_variance(sampleNP, cw2_weight, estimated_vars = c("vote_gen","vote_pens")) * N^2
ci_cw2_t = confidence_interval(Y_cw2_t, sqrt(VY_cw2_t), confidence = 0.95)

1.5.2.3 6.2.3 CV

#Resultados
VY_cw2_t

##  vote_gen vote_pens 
##  228650.7  600623.6

ci_cw2_t

##  lower.vote_gen lower.vote_pens  upper.vote_gen upper.vote_pens 
##        3961.608       16736.580        5836.017       19774.521

cv_CPSA = sqrt(VY_cw2_t)/Y_cw2_t*100; cv_CPSA

##  vote_gen vote_pens 
##  9.761026  4.245279

reg_est_gen2 = function(sample){
  pi = propensities(sample, sampleP, covariates, algorithm = "glm", smooth = FALSE)
  wi = sc_weights(pi$convenience)
  
  re_w = calib_weights(sample[, covariates], totals, wi, N, method = "raking")
  reg = sum(sample$vote_gen*re_w)
  return(reg)
}

reg_est_pens2 = function(sample){
  pi = propensities(sample, sampleP, covariates, algorithm = "glm", smooth = FALSE)
  wi = sc_weights(pi$convenience)
  
  re_w = calib_weights(sample[, covariates], totals, wi, N, method = "raking")
  reg = sum(sample$vote_pens*re_w)
  return(reg)
}

#VY_cw2_m = c(vote_gen = generic_jackknife_variance(sampleNP, reg_est_gen2, N),
#             vote_pens = generic_jackknife_variance(sampleNP, reg_est_pens2, N))
#ci_cw2_m = confidence_interval(Y_cw2_m, sqrt(VY_cw2_m), confidence = 0.95)
#print(paste("Con un coeficiente de variacion de: ", sqrt(VY_cw2_m)/Y_cw2_m*100, "%"))

1.6 7. Comparacion de resultados

Comparacion_gen = cbind(rbind(totales[1], cv_MAS[1]), rbind(t_hat[1], cv_PSA[1]), rbind(Y_cw1_t[1], cv_Cal[1]),rbind(Y_cw2_t[1], cv_CPSA[1]) )

colnames(Comparacion_gen) = c("MAS", "PSA", "Calibrated", "Calibrated_PSA")
rownames(Comparacion_gen) <- c("Estimación", "CV")

Comparacion_gen

##                   MAS         PSA  Calibrated Calibrated_PSA
## Estimación 4800.00000 5137.746809 5044.451240    4898.812630
## CV            9.61125    9.643856    9.664358       9.761026

Comparacion_pens = cbind(rbind(totales[2], cv_MAS[2]), rbind(t_hat[2], cv_PSA[2]), rbind(Y_cw1_t[2], cv_Cal[2]),rbind(Y_cw2_t[2], cv_CPSA[2]) )

colnames(Comparacion_pens) = c("MAS", "PSA", "Calibrated", "Calibrated_PSA")
rownames(Comparacion_pens) <- c("Estimación", "CV")

Comparacion_pens

##                    MAS          PSA   Calibrated Calibrated_PSA
## Estimación 17300.00000 18256.192422 17625.966795   18255.550201
## CV             4.30607     4.224249     4.314427       4.245279

Non Probabilistic Sampling

Juan Felipe Moreno, Luisa Fernanda Chaves, Sebastian Vela

2025-06-05