# PRAKTIKUM: ESTIMASI DISTRIBUSI DAN PARAMETER MODEL
library(probs)
## Warning: package 'probs' was built under R version 4.4.3
##
## Attaching package: 'probs'
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
# BAGIAN 1: TEOREMA LIMIT PUSAT (TLC)
# 1A. Hampiran Normal Terhadap Distribusi GEOMETRIK
par(mfrow = c(3, 1))
set.seed(123)
populasi_geo <- rgeom(20, 0.1)
# n = 2
contoh_geo1 <- urnsamples(populasi_geo, size = 2, replace = FALSE, ordered = FALSE)
mean_geo1 <- matrix(apply(contoh_geo1, 1, mean))
# n = 5
contoh_geo2 <- urnsamples(populasi_geo, size = 5, replace = FALSE, ordered = FALSE)
mean_geo2 <- matrix(apply(contoh_geo2, 1, mean))
# n = 10
contoh_geo3 <- urnsamples(populasi_geo, size = 10, replace = FALSE, ordered = FALSE)
mean_geo3 <- matrix(apply(contoh_geo3, 1, mean))
hist(mean_geo1, main = "Hampiran Normal Terhadap Geometrik (n = 2)", xlab = "xbar")
hist(mean_geo2, main = "Hampiran Normal Terhadap Geometrik (n = 5)", xlab = "xbar")
hist(mean_geo3, main = "Hampiran Normal Terhadap Geometrik (n = 10)", xlab = "xbar")

# Kesimpulan: semakin besar n, histogram makin mendekati kurva normal
# 1B. Hampiran Normal Terhadap Distribusi EKSPONENSIAL
par(mfrow = c(3, 1))
set.seed(123)
populasi_exp <- rexp(20)
contoh_exp1 <- urnsamples(populasi_exp, size = 2, replace = FALSE, ordered = FALSE)
mean_exp1 <- matrix(apply(contoh_exp1, 1, mean))
contoh_exp2 <- urnsamples(populasi_exp, size = 5, replace = FALSE, ordered = FALSE)
mean_exp2 <- matrix(apply(contoh_exp2, 1, mean))
contoh_exp3 <- urnsamples(populasi_exp, size = 10, replace = FALSE, ordered = FALSE)
mean_exp3 <- matrix(apply(contoh_exp3, 1, mean))
hist(mean_exp1, main = "Hampiran Normal Terhadap Eksponensial (n = 2)", xlab = "xbar")
hist(mean_exp2, main = "Hampiran Normal Terhadap Eksponensial (n = 5)", xlab = "xbar")
hist(mean_exp3, main = "Hampiran Normal Terhadap Eksponensial (n = 10)", xlab = "xbar")

# 1C. Hampiran Normal Terhadap Distribusi SERAGAM (Uniform)
par(mfrow = c(3, 1))
set.seed(123)
populasi_unif <- runif(20)
contoh_unif1 <- urnsamples(populasi_unif, size = 2, replace = FALSE, ordered = FALSE)
mean_unif1 <- matrix(apply(contoh_unif1, 1, mean))
contoh_unif2 <- urnsamples(populasi_unif, size = 5, replace = FALSE, ordered = FALSE)
mean_unif2 <- matrix(apply(contoh_unif2, 1, mean))
contoh_unif3 <- urnsamples(populasi_unif, size = 10, replace = FALSE, ordered = FALSE)
mean_unif3 <- matrix(apply(contoh_unif3, 1, mean))
hist(mean_unif1, main = "Hampiran Normal Terhadap Seragam (n = 2)", xlab = "xbar")
hist(mean_unif2, main = "Hampiran Normal Terhadap Seragam (n = 5)", xlab = "xbar")
hist(mean_unif3, main = "Hampiran Normal Terhadap Seragam (n = 10)", xlab = "xbar")

# 1D. Sebaran Percontohan dari Populasi NORMAL
par(mfrow = c(3, 1))
set.seed(1299)
populasi_norm <- rnorm(20, mean = 5, sd = sqrt(12)) # Normal(mu=5, sigma2=12)
# n = 3
contoh_norm1 <- urnsamples(populasi_norm, size = 3, replace = FALSE, ordered = FALSE)
mean_norm1 <- matrix(apply(contoh_norm1, 1, mean))
mean_xbar1 <- mean(mean_norm1)
var_xbar1 <- var(mean_norm1)
# n = 4
contoh_norm2 <- urnsamples(populasi_norm, size = 4, replace = FALSE, ordered = FALSE)
mean_norm2 <- matrix(apply(contoh_norm2, 1, mean))
mean_xbar2 <- mean(mean_norm2)
var_xbar2 <- var(mean_norm2)
# n = 15
contoh_norm3 <- urnsamples(populasi_norm, size = 15, replace = FALSE, ordered = FALSE)
mean_norm3 <- matrix(apply(contoh_norm3, 1, mean))
mean_xbar3 <- mean(mean_norm3)
var_xbar3 <- var(mean_norm3)
hist(mean_norm1, main = "Sebaran Percontohan Normal (n = 3)", xlab = "xbar")
hist(mean_norm2, main = "Sebaran Percontohan Normal (n = 4)", xlab = "xbar")
hist(mean_norm3, main = "Sebaran Percontohan Normal (n = 15)", xlab = "xbar")

# Tabel perbandingan mean dan varian
hasil_norm <- data.frame(
"." = c("mean", "varian"),
"Populasi" = c(5, 12),
"n=3" = c(mean_xbar1, var_xbar1),
"n=4" = c(mean_xbar2, var_xbar2),
"n=15" = c(mean_xbar3, var_xbar3)
)
print(hasil_norm)
## . Populasi n.3 n.4 n.15
## 1 mean 5 4.809415 4.809415 4.8094152
## 2 varian 12 4.547044 3.207524 0.2672558
# Kesimpulan: mean xbar mendekati mu=5, varian mendekati sigma2/n = 12/n
# BAGIAN 2: KETAKBIASAN PENDUGA PARAMETER
# 2A. Penduga Mean (x-bar vs median)
# Membuktikan bahwa E(x-bar) = mu, bukan E(median) = mu
set.seed(123)
n <- 10
# Sebaran Normal
populasi1 <- rnorm(20)
mean_pop1 <- mean(populasi1)
sampel_normal1 <- urnsamples(populasi1, size = 10, replace = FALSE, ordered = FALSE)
mean_normal1 <- matrix(apply(sampel_normal1, 1, mean))
median_normal1 <- matrix(apply(sampel_normal1, 1, median))
harapan_mean_norm <- mean(mean_normal1)
harapan_med_norm <- mean(median_normal1)
# Sebaran Eksponensial
populasi2 <- rexp(20)
mean_pop2 <- mean(populasi2)
sampel_exp2 <- urnsamples(populasi2, size = 10, replace = FALSE, ordered = FALSE)
mean_exp2b <- matrix(apply(sampel_exp2, 1, mean))
median_exp2 <- matrix(apply(sampel_exp2, 1, median))
harapan_mean_exp <- mean(mean_exp2b)
harapan_med_exp <- mean(median_exp2)
# Sebaran Uniform
populasi3 <- runif(20)
mean_pop3 <- mean(populasi3)
sampel_unif2 <- urnsamples(populasi3, size = 10, replace = FALSE, ordered = FALSE)
mean_unif2b <- matrix(apply(sampel_unif2, 1, mean))
median_unif2 <- matrix(apply(sampel_unif2, 1, median))
harapan_mean_unif <- mean(mean_unif2b)
harapan_med_unif <- mean(median_unif2)
# Tabel hasil
hasil_mean <- data.frame(
"Hasil" = c("mean_populasi", "harapan_mean_contoh", "harapan_median_contoh"),
"Sebaran Normal" = c(mean_pop1, harapan_mean_norm, harapan_med_norm),
"Sebaran Eksponensial"= c(mean_pop2, harapan_mean_exp, harapan_med_exp),
"Sebaran Seragam" = c(mean_pop3, harapan_mean_unif, harapan_med_unif)
)
print(hasil_mean)
## Hasil Sebaran.Normal Sebaran.Eksponensial Sebaran.Seragam
## 1 mean_populasi 0.1416238 1.113617 0.4436850
## 2 harapan_mean_contoh 0.1416238 1.113617 0.4436850
## 3 harapan_median_contoh 0.1174878 1.132277 0.4058455
# Kesimpulan: harapan x-bar = mu (penduga tak bias), harapan median ≠ mu
# 2B. Penduga Ragam (s² penyebut n-1 vs n)
# Membuktikan bahwa E(s²) = sigma² menggunakan penyebut n-1
set.seed(888)
n <- 10
# Sebaran Normal
populasi_r1 <- rnorm(20)
sigma2_norm <- var(populasi_r1) * (20 - 1) / 20 # ragam populasi (penyebut N)
sampel_r1 <- urnsamples(populasi_r1, size = 10, replace = FALSE, ordered = FALSE)
s2_n1_norm <- matrix(apply(sampel_r1, 1, var)) # penyebut n-1
E_s2_n1 <- mean(s2_n1_norm)
s2_n_norm <- s2_n1_norm * (10 - 1) / 10 # penyebut n
E_s2_n <- mean(s2_n_norm)
# Sebaran Eksponensial
populasi_r2 <- rexp(20)
sigma2_exp <- var(populasi_r2) * (20 - 1) / 20
sampel_r2 <- urnsamples(populasi_r2, size = 10, replace = FALSE, ordered = FALSE)
s2_n1_exp <- matrix(apply(sampel_r2, 1, var))
E_s2_n1_exp <- mean(s2_n1_exp)
s2_n_exp <- s2_n1_exp * (10 - 1) / 10
E_s2_n_exp <- mean(s2_n_exp)
# Tabel hasil
hasil_ragam <- data.frame(
"." = c("ragam populasi",
"nilai harapan ragam contoh (n-1)",
"nilai harapan ragam contoh (n)"),
"Sebaran Normal" = c(sigma2_norm, E_s2_n1, E_s2_n),
"Sebaran Eksponensial"= c(sigma2_exp, E_s2_n1_exp, E_s2_n_exp)
)
print(hasil_ragam)
## . Sebaran.Normal Sebaran.Eksponensial
## 1 ragam populasi 1.298573 0.8957634
## 2 nilai harapan ragam contoh (n-1) 1.366919 0.9429088
## 3 nilai harapan ragam contoh (n) 1.230227 0.8486180
# Kesimpulan: s² dengan penyebut n-1 lebih mendekati sigma² → penduga tak bias
# BAGIAN 3: SELANG KEPERCAYAAN (CONFIDENCE INTERVAL)
# 3A. Simulasi Proporsi SK yang Memuat mu
# Membandingkan ketepatan SK untuk berbagai ukuran contoh n
alpha <- 0.05
mu <- 50
std <- 10
k <- 100 # banyak ulangan
# n = 10
set.seed(123)
n1 <- 10
sampel_sk1 <- matrix(rnorm(n1 * k, mu, std), k)
xbar_sk1 <- apply(sampel_sk1, 1, mean)
s_sk1 <- apply(sampel_sk1, 1, sd)
SE_sk1 <- s_sk1 / sqrt(n1)
z_val <- qnorm(1 - alpha / 2)
SK1 <- (xbar_sk1 - z_val * SE_sk1 < mu) & (mu < xbar_sk1 + z_val * SE_sk1)
proporsi_sk1 <- sum(SK1) / k
# n = 30
set.seed(123)
n2 <- 30
sampel_sk2 <- matrix(rnorm(n2 * k, mu, std), k)
xbar_sk2 <- apply(sampel_sk2, 1, mean)
s_sk2 <- apply(sampel_sk2, 1, sd)
SE_sk2 <- s_sk2 / sqrt(n2)
SK2 <- (xbar_sk2 - z_val * SE_sk2 < mu) & (mu < xbar_sk2 + z_val * SE_sk2)
proporsi_sk2 <- sum(SK2) / k
# n = 100
set.seed(123)
n3 <- 100
sampel_sk3 <- matrix(rnorm(n3 * k, mu, std), k)
xbar_sk3 <- apply(sampel_sk3, 1, mean)
s_sk3 <- apply(sampel_sk3, 1, sd)
SE_sk3 <- s_sk3 / sqrt(n3)
SK3 <- (xbar_sk3 - z_val * SE_sk3 < mu) & (mu < xbar_sk3 + z_val * SE_sk3)
proporsi_sk3 <- sum(SK3) / k
# Tabel hasil
hasil_sk <- data.frame(
"n" = c(10, 30, 100),
"Ketepatan SK Sebaran Normal"= c(proporsi_sk1, proporsi_sk2, proporsi_sk3)
)
print(hasil_sk)
## n Ketepatan.SK.Sebaran.Normal
## 1 10 0.93
## 2 30 0.93
## 3 100 0.96
# Kesimpulan: semakin besar n, proporsi SK yang memuat mu mendekati 1-alpha = 0.95
# 3B. Visualisasi 100 Selang Kepercayaan
# Grafik garis horizontal: biru = SK memuat mu, merah = SK tidak memuat mu
par(mfrow = c(1, 1))
matplot(
rbind(xbar_sk2 - z_val * SE_sk2, xbar_sk2 + z_val * SE_sk2),
rbind(1:k, 1:k),
col = ifelse(SK2, "blue", "red"),
type = "l",
lty = 1,
main = "Selang Kepercayaan 95% (n = 30)",
xlab = "SK",
ylab = "Banyak ulangan"
)
abline(v = mu) # garis vertikal di mu = 50

# 3C. Interval Kepercayaan pada Data Nyata (Dataset Prestige)
data("Prestige")
# Hitung rata-rata income
m_income <- mean(Prestige$income)
cat("Rata-rata income:", m_income, "\n")
## Rata-rata income: 6797.902
# Hitung standar error
n_prestige <- nrow(Prestige)
se_income <- sd(Prestige$income) / sqrt(n_prestige)
cat("Standar error:", se_income, "\n")
## Standar error: 420.4089
# Nilai kritis t
tval <- qt(0.975, df = n_prestige - 1)
# Interval kepercayaan 95%
batas_bawah <- round(m_income - tval * se_income, 2)
batas_atas <- round(m_income + tval * se_income, 2)
cat(paste("KI 95%: [", batas_bawah, ",", batas_atas, "]\n"))
## KI 95%: [ 5963.92 , 7631.88 ]
# Interpretasi: dengan tingkat kepercayaan 95%, rata-rata income populasi
# berada dalam rentang [batas_bawah, batas_atas]