UTS PEMODELAN STATISTIKA DAN SIMULASI

TOPIK : SIMULASI ESTIMASI PARAMETER REGRESI LOGISTIK

SKENARIO:

1. Variasi Ukuran Sampel

2. Variasi Missing Value

1. PARAMETER AWAL

set.seed(467)

beta0 <- -5
beta1 <- 0.8
beta2 <- 0.05

rep <- 100

2. FUNGSI MEMBUAT DATA

buat_data <- function(n){

  x1 <- runif(n, 1, 10)      # jam belajar
  x2 <- runif(n, 50, 100)    # kehadiran

  eta <- beta0 + beta1*x1 + beta2*x2
  p   <- exp(eta)/(1 + exp(eta))

  y <- rbinom(n, 1, p)

  data.frame(y, x1, x2)
}

3. FUNGSI TAMBAH MISSING

tambah_missing <- function(data, prop){

  n <- nrow(data)

  idx <- sample(1:n, size = prop*n)

  data$x1[idx] <- NA

  return(data)
}

SKENARIO 1 : UKURAN SAMPEL

simulasi_sampel <- function(n){

  hasil <- matrix(NA, nrow = rep, ncol = 3)
  colnames(hasil) <- c("b0","b1","b2")

  for(i in 1:rep){

    data <- buat_data(n)

    model <- glm(y ~ x1 + x2,
                 family = binomial,
                 data = data)

    hasil[i,] <- coef(model)
  }

  rata2 <- colMeans(hasil)
  bias  <- rata2 - c(beta0, beta1, beta2)

  return(list(rata2 = rata2, bias = bias))
}

Jalankan simulasi ukuran sampel

hasil30  <- simulasi_sampel(30)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
hasil100 <- simulasi_sampel(100)
hasil500 <- simulasi_sampel(500)

Tabel hasil ukuran sampel

tabel_sampel <- data.frame(
  Sampel = c(30,100,500),

  Est_b0 = c(hasil30$rata2[1],
             hasil100$rata2[1],
             hasil500$rata2[1]),

  Est_b1 = c(hasil30$rata2[2],
             hasil100$rata2[2],
             hasil500$rata2[2]),

  Est_b2 = c(hasil30$rata2[3],
             hasil100$rata2[3],
             hasil500$rata2[3]),

  Bias_b0 = c(hasil30$bias[1],
              hasil100$bias[1],
              hasil500$bias[1]),

  Bias_b1 = c(hasil30$bias[2],
              hasil100$bias[2],
              hasil500$bias[2]),

  Bias_b2 = c(hasil30$bias[3],
              hasil100$bias[3],
              hasil500$bias[3])
)

print("TABEL HASIL UKURAN SAMPEL")
## [1] "TABEL HASIL UKURAN SAMPEL"
print(tabel_sampel)
##   Sampel     Est_b0     Est_b1     Est_b2     Bias_b0     Bias_b1     Bias_b2
## 1     30 -59.106454 14.3844520 0.44788816 -54.1064538 13.58445201 0.397888159
## 2    100  -5.990504  0.8956322 0.06007350  -0.9905045  0.09563219 0.010073497
## 3    500  -5.106834  0.8174598 0.05105153  -0.1068336  0.01745979 0.001051534

SKENARIO 2 : MISSING VALUE

simulasi_missing <- function(prop){

  hasil <- matrix(NA, nrow = rep, ncol = 3)
  colnames(hasil) <- c("b0","b1","b2")

  for(i in 1:rep){

    data <- buat_data(100)

    data <- tambah_missing(data, prop)

    model <- glm(y ~ x1 + x2,
                 family = binomial,
                 data = data)

    hasil[i,] <- coef(model)
  }

  rata2 <- colMeans(hasil, na.rm = TRUE)
  bias  <- rata2 - c(beta0, beta1, beta2)

  return(list(rata2 = rata2, bias = bias))
}

Jalankan simulasi missing

hasil0  <- simulasi_missing(0)
hasil10 <- simulasi_missing(0.10)
hasil20 <- simulasi_missing(0.20)

Tabel hasil missing

tabel_missing <- data.frame(
  Missing = c("0%","10%","20%"),

  Est_b0 = c(hasil0$rata2[1],
             hasil10$rata2[1],
             hasil20$rata2[1]),

  Est_b1 = c(hasil0$rata2[2],
             hasil10$rata2[2],
             hasil20$rata2[2]),

  Est_b2 = c(hasil0$rata2[3],
             hasil10$rata2[3],
             hasil20$rata2[3]),

  Bias_b0 = c(hasil0$bias[1],
              hasil10$bias[1],
              hasil20$bias[1]),

  Bias_b1 = c(hasil0$bias[2],
              hasil10$bias[2],
              hasil20$bias[2]),

  Bias_b2 = c(hasil0$bias[3],
              hasil10$bias[3],
              hasil20$bias[3])
)

print("TABEL HASIL MISSING VALUE")
## [1] "TABEL HASIL MISSING VALUE"
print(tabel_missing)
##   Missing    Est_b0    Est_b1     Est_b2    Bias_b0    Bias_b1     Bias_b2
## 1      0% -5.278445 0.8667814 0.05219779 -0.2784445 0.06678138 0.002197788
## 2     10% -6.055720 0.9708759 0.05870675 -1.0557196 0.17087592 0.008706749
## 3     20% -5.961315 0.9863870 0.05793124 -0.9613153 0.18638699 0.007931241