UTS PEMODELAN STATISTIKA DAN SIMULASI
TOPIK : SIMULASI ESTIMASI PARAMETER REGRESI LOGISTIK
SKENARIO:
1. Variasi Ukuran Sampel
2. Variasi Missing Value
1. PARAMETER AWAL
set.seed(467)
beta0 <- -5
beta1 <- 0.8
beta2 <- 0.05
rep <- 100
2. FUNGSI MEMBUAT DATA
buat_data <- function(n){
x1 <- runif(n, 1, 10) # jam belajar
x2 <- runif(n, 50, 100) # kehadiran
eta <- beta0 + beta1*x1 + beta2*x2
p <- exp(eta)/(1 + exp(eta))
y <- rbinom(n, 1, p)
data.frame(y, x1, x2)
}
3. FUNGSI TAMBAH MISSING
tambah_missing <- function(data, prop){
n <- nrow(data)
idx <- sample(1:n, size = prop*n)
data$x1[idx] <- NA
return(data)
}
SKENARIO 1 : UKURAN SAMPEL
simulasi_sampel <- function(n){
hasil <- matrix(NA, nrow = rep, ncol = 3)
colnames(hasil) <- c("b0","b1","b2")
for(i in 1:rep){
data <- buat_data(n)
model <- glm(y ~ x1 + x2,
family = binomial,
data = data)
hasil[i,] <- coef(model)
}
rata2 <- colMeans(hasil)
bias <- rata2 - c(beta0, beta1, beta2)
return(list(rata2 = rata2, bias = bias))
}
Jalankan simulasi ukuran sampel
hasil30 <- simulasi_sampel(30)
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
hasil100 <- simulasi_sampel(100)
hasil500 <- simulasi_sampel(500)
Tabel hasil ukuran sampel
tabel_sampel <- data.frame(
Sampel = c(30,100,500),
Est_b0 = c(hasil30$rata2[1],
hasil100$rata2[1],
hasil500$rata2[1]),
Est_b1 = c(hasil30$rata2[2],
hasil100$rata2[2],
hasil500$rata2[2]),
Est_b2 = c(hasil30$rata2[3],
hasil100$rata2[3],
hasil500$rata2[3]),
Bias_b0 = c(hasil30$bias[1],
hasil100$bias[1],
hasil500$bias[1]),
Bias_b1 = c(hasil30$bias[2],
hasil100$bias[2],
hasil500$bias[2]),
Bias_b2 = c(hasil30$bias[3],
hasil100$bias[3],
hasil500$bias[3])
)
print("TABEL HASIL UKURAN SAMPEL")
## [1] "TABEL HASIL UKURAN SAMPEL"
print(tabel_sampel)
## Sampel Est_b0 Est_b1 Est_b2 Bias_b0 Bias_b1 Bias_b2
## 1 30 -59.106454 14.3844520 0.44788816 -54.1064538 13.58445201 0.397888159
## 2 100 -5.990504 0.8956322 0.06007350 -0.9905045 0.09563219 0.010073497
## 3 500 -5.106834 0.8174598 0.05105153 -0.1068336 0.01745979 0.001051534
SKENARIO 2 : MISSING VALUE
simulasi_missing <- function(prop){
hasil <- matrix(NA, nrow = rep, ncol = 3)
colnames(hasil) <- c("b0","b1","b2")
for(i in 1:rep){
data <- buat_data(100)
data <- tambah_missing(data, prop)
model <- glm(y ~ x1 + x2,
family = binomial,
data = data)
hasil[i,] <- coef(model)
}
rata2 <- colMeans(hasil, na.rm = TRUE)
bias <- rata2 - c(beta0, beta1, beta2)
return(list(rata2 = rata2, bias = bias))
}
Jalankan simulasi missing
hasil0 <- simulasi_missing(0)
hasil10 <- simulasi_missing(0.10)
hasil20 <- simulasi_missing(0.20)
Tabel hasil missing
tabel_missing <- data.frame(
Missing = c("0%","10%","20%"),
Est_b0 = c(hasil0$rata2[1],
hasil10$rata2[1],
hasil20$rata2[1]),
Est_b1 = c(hasil0$rata2[2],
hasil10$rata2[2],
hasil20$rata2[2]),
Est_b2 = c(hasil0$rata2[3],
hasil10$rata2[3],
hasil20$rata2[3]),
Bias_b0 = c(hasil0$bias[1],
hasil10$bias[1],
hasil20$bias[1]),
Bias_b1 = c(hasil0$bias[2],
hasil10$bias[2],
hasil20$bias[2]),
Bias_b2 = c(hasil0$bias[3],
hasil10$bias[3],
hasil20$bias[3])
)
print("TABEL HASIL MISSING VALUE")
## [1] "TABEL HASIL MISSING VALUE"
print(tabel_missing)
## Missing Est_b0 Est_b1 Est_b2 Bias_b0 Bias_b1 Bias_b2
## 1 0% -5.278445 0.8667814 0.05219779 -0.2784445 0.06678138 0.002197788
## 2 10% -6.055720 0.9708759 0.05870675 -1.0557196 0.17087592 0.008706749
## 3 20% -5.961315 0.9863870 0.05793124 -0.9613153 0.18638699 0.007931241