# PRAKTIKUM: ESTIMASI DISTRIBUSI DAN PARAMETER MODEL

library(probs)
## Warning: package 'probs' was built under R version 4.4.3
## 
## Attaching package: 'probs'
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
# BAGIAN 1: TEOREMA LIMIT PUSAT (TLC)

# 1A. Hampiran Normal Terhadap Distribusi GEOMETRIK
par(mfrow = c(3, 1))
set.seed(123)
populasi_geo <- rgeom(20, 0.1)

# n = 2
contoh_geo1 <- urnsamples(populasi_geo, size = 2, replace = FALSE, ordered = FALSE)
mean_geo1   <- matrix(apply(contoh_geo1, 1, mean))

# n = 5
contoh_geo2 <- urnsamples(populasi_geo, size = 5, replace = FALSE, ordered = FALSE)
mean_geo2   <- matrix(apply(contoh_geo2, 1, mean))

# n = 10
contoh_geo3 <- urnsamples(populasi_geo, size = 10, replace = FALSE, ordered = FALSE)
mean_geo3   <- matrix(apply(contoh_geo3, 1, mean))

hist(mean_geo1, main = "Hampiran Normal Terhadap Geometrik (n = 2)",  xlab = "xbar")
hist(mean_geo2, main = "Hampiran Normal Terhadap Geometrik (n = 5)",  xlab = "xbar")
hist(mean_geo3, main = "Hampiran Normal Terhadap Geometrik (n = 10)", xlab = "xbar")

# Kesimpulan: semakin besar n, histogram makin mendekati kurva normal


# 1B. Hampiran Normal Terhadap Distribusi EKSPONENSIAL
par(mfrow = c(3, 1))
set.seed(123)
populasi_exp <- rexp(20)

contoh_exp1 <- urnsamples(populasi_exp, size = 2,  replace = FALSE, ordered = FALSE)
mean_exp1   <- matrix(apply(contoh_exp1, 1, mean))

contoh_exp2 <- urnsamples(populasi_exp, size = 5,  replace = FALSE, ordered = FALSE)
mean_exp2   <- matrix(apply(contoh_exp2, 1, mean))

contoh_exp3 <- urnsamples(populasi_exp, size = 10, replace = FALSE, ordered = FALSE)
mean_exp3   <- matrix(apply(contoh_exp3, 1, mean))

hist(mean_exp1, main = "Hampiran Normal Terhadap Eksponensial (n = 2)",  xlab = "xbar")
hist(mean_exp2, main = "Hampiran Normal Terhadap Eksponensial (n = 5)",  xlab = "xbar")
hist(mean_exp3, main = "Hampiran Normal Terhadap Eksponensial (n = 10)", xlab = "xbar")

# 1C. Hampiran Normal Terhadap Distribusi SERAGAM (Uniform)
par(mfrow = c(3, 1))
set.seed(123)
populasi_unif <- runif(20)

contoh_unif1 <- urnsamples(populasi_unif, size = 2,  replace = FALSE, ordered = FALSE)
mean_unif1   <- matrix(apply(contoh_unif1, 1, mean))

contoh_unif2 <- urnsamples(populasi_unif, size = 5,  replace = FALSE, ordered = FALSE)
mean_unif2   <- matrix(apply(contoh_unif2, 1, mean))

contoh_unif3 <- urnsamples(populasi_unif, size = 10, replace = FALSE, ordered = FALSE)
mean_unif3   <- matrix(apply(contoh_unif3, 1, mean))

hist(mean_unif1, main = "Hampiran Normal Terhadap Seragam (n = 2)",  xlab = "xbar")
hist(mean_unif2, main = "Hampiran Normal Terhadap Seragam (n = 5)",  xlab = "xbar")
hist(mean_unif3, main = "Hampiran Normal Terhadap Seragam (n = 10)", xlab = "xbar")

# 1D. Sebaran Percontohan dari Populasi NORMAL
par(mfrow = c(3, 1))
set.seed(1299)
populasi_norm <- rnorm(20, mean = 5, sd = sqrt(12))  # Normal(mu=5, sigma2=12)

# n = 3
contoh_norm1 <- urnsamples(populasi_norm, size = 3, replace = FALSE, ordered = FALSE)
mean_norm1   <- matrix(apply(contoh_norm1, 1, mean))
mean_xbar1   <- mean(mean_norm1)
var_xbar1    <- var(mean_norm1)

# n = 4
contoh_norm2 <- urnsamples(populasi_norm, size = 4, replace = FALSE, ordered = FALSE)
mean_norm2   <- matrix(apply(contoh_norm2, 1, mean))
mean_xbar2   <- mean(mean_norm2)
var_xbar2    <- var(mean_norm2)

# n = 15
contoh_norm3 <- urnsamples(populasi_norm, size = 15, replace = FALSE, ordered = FALSE)
mean_norm3   <- matrix(apply(contoh_norm3, 1, mean))
mean_xbar3   <- mean(mean_norm3)
var_xbar3    <- var(mean_norm3)

hist(mean_norm1, main = "Sebaran Percontohan Normal (n = 3)",  xlab = "xbar")
hist(mean_norm2, main = "Sebaran Percontohan Normal (n = 4)",  xlab = "xbar")
hist(mean_norm3, main = "Sebaran Percontohan Normal (n = 15)", xlab = "xbar")

# Tabel perbandingan mean dan varian
hasil_norm <- data.frame(
  "."       = c("mean", "varian"),
  "Populasi" = c(5, 12),
  "n=3"     = c(mean_xbar1, var_xbar1),
  "n=4"     = c(mean_xbar2, var_xbar2),
  "n=15"    = c(mean_xbar3, var_xbar3)
)
print(hasil_norm)
##        . Populasi      n.3      n.4      n.15
## 1   mean        5 4.809415 4.809415 4.8094152
## 2 varian       12 4.547044 3.207524 0.2672558
# Kesimpulan: mean xbar mendekati mu=5, varian mendekati sigma2/n = 12/n
# BAGIAN 2: KETAKBIASAN PENDUGA PARAMETER

# 2A. Penduga Mean (x-bar vs median)
# Membuktikan bahwa E(x-bar) = mu, bukan E(median) = mu

set.seed(123)
n <- 10

# Sebaran Normal
populasi1         <- rnorm(20)
mean_pop1         <- mean(populasi1)
sampel_normal1    <- urnsamples(populasi1, size = 10, replace = FALSE, ordered = FALSE)
mean_normal1      <- matrix(apply(sampel_normal1, 1, mean))
median_normal1    <- matrix(apply(sampel_normal1, 1, median))
harapan_mean_norm <- mean(mean_normal1)
harapan_med_norm  <- mean(median_normal1)

# Sebaran Eksponensial
populasi2        <- rexp(20)
mean_pop2        <- mean(populasi2)
sampel_exp2      <- urnsamples(populasi2, size = 10, replace = FALSE, ordered = FALSE)
mean_exp2b       <- matrix(apply(sampel_exp2, 1, mean))
median_exp2      <- matrix(apply(sampel_exp2, 1, median))
harapan_mean_exp <- mean(mean_exp2b)
harapan_med_exp  <- mean(median_exp2)

# Sebaran Uniform
populasi3         <- runif(20)
mean_pop3         <- mean(populasi3)
sampel_unif2      <- urnsamples(populasi3, size = 10, replace = FALSE, ordered = FALSE)
mean_unif2b       <- matrix(apply(sampel_unif2, 1, mean))
median_unif2      <- matrix(apply(sampel_unif2, 1, median))
harapan_mean_unif <- mean(mean_unif2b)
harapan_med_unif  <- mean(median_unif2)

# Tabel hasil
hasil_mean <- data.frame(
  "Hasil"               = c("mean_populasi", "harapan_mean_contoh", "harapan_median_contoh"),
  "Sebaran Normal"      = c(mean_pop1, harapan_mean_norm, harapan_med_norm),
  "Sebaran Eksponensial"= c(mean_pop2, harapan_mean_exp,  harapan_med_exp),
  "Sebaran Seragam"     = c(mean_pop3, harapan_mean_unif, harapan_med_unif)
)
print(hasil_mean)
##                   Hasil Sebaran.Normal Sebaran.Eksponensial Sebaran.Seragam
## 1         mean_populasi      0.1416238             1.113617       0.4436850
## 2   harapan_mean_contoh      0.1416238             1.113617       0.4436850
## 3 harapan_median_contoh      0.1174878             1.132277       0.4058455
# Kesimpulan: harapan x-bar = mu (penduga tak bias), harapan median ≠ mu


#  2B. Penduga Ragam (s² penyebut n-1 vs n) 
# Membuktikan bahwa E(s²) = sigma² menggunakan penyebut n-1

set.seed(888)
n <- 10

# Sebaran Normal
populasi_r1 <- rnorm(20)
sigma2_norm <- var(populasi_r1) * (20 - 1) / 20  # ragam populasi (penyebut N)

sampel_r1   <- urnsamples(populasi_r1, size = 10, replace = FALSE, ordered = FALSE)
s2_n1_norm  <- matrix(apply(sampel_r1, 1, var))          # penyebut n-1
E_s2_n1     <- mean(s2_n1_norm)
s2_n_norm   <- s2_n1_norm * (10 - 1) / 10               # penyebut n
E_s2_n      <- mean(s2_n_norm)

# Sebaran Eksponensial
populasi_r2  <- rexp(20)
sigma2_exp   <- var(populasi_r2) * (20 - 1) / 20

sampel_r2    <- urnsamples(populasi_r2, size = 10, replace = FALSE, ordered = FALSE)
s2_n1_exp    <- matrix(apply(sampel_r2, 1, var))
E_s2_n1_exp  <- mean(s2_n1_exp)
s2_n_exp     <- s2_n1_exp * (10 - 1) / 10
E_s2_n_exp   <- mean(s2_n_exp)

# Tabel hasil
hasil_ragam <- data.frame(
  "."                   = c("ragam populasi",
                             "nilai harapan ragam contoh (n-1)",
                             "nilai harapan ragam contoh (n)"),
  "Sebaran Normal"      = c(sigma2_norm, E_s2_n1,     E_s2_n),
  "Sebaran Eksponensial"= c(sigma2_exp,  E_s2_n1_exp, E_s2_n_exp)
)
print(hasil_ragam)
##                                  . Sebaran.Normal Sebaran.Eksponensial
## 1                   ragam populasi       1.298573            0.8957634
## 2 nilai harapan ragam contoh (n-1)       1.366919            0.9429088
## 3   nilai harapan ragam contoh (n)       1.230227            0.8486180
# Kesimpulan: s² dengan penyebut n-1 lebih mendekati sigma² → penduga tak bias
# BAGIAN 3: SELANG KEPERCAYAAN (CONFIDENCE INTERVAL)

# 3A. Simulasi Proporsi SK yang Memuat mu 
# Membandingkan ketepatan SK untuk berbagai ukuran contoh n

alpha <- 0.05
mu    <- 50
std   <- 10
k     <- 100  # banyak ulangan

# n = 10
set.seed(123)
n1           <- 10
sampel_sk1   <- matrix(rnorm(n1 * k, mu, std), k)
xbar_sk1     <- apply(sampel_sk1, 1, mean)
s_sk1        <- apply(sampel_sk1, 1, sd)
SE_sk1       <- s_sk1 / sqrt(n1)
z_val        <- qnorm(1 - alpha / 2)
SK1          <- (xbar_sk1 - z_val * SE_sk1 < mu) & (mu < xbar_sk1 + z_val * SE_sk1)
proporsi_sk1 <- sum(SK1) / k

# n = 30
set.seed(123)
n2           <- 30
sampel_sk2   <- matrix(rnorm(n2 * k, mu, std), k)
xbar_sk2     <- apply(sampel_sk2, 1, mean)
s_sk2        <- apply(sampel_sk2, 1, sd)
SE_sk2       <- s_sk2 / sqrt(n2)
SK2          <- (xbar_sk2 - z_val * SE_sk2 < mu) & (mu < xbar_sk2 + z_val * SE_sk2)
proporsi_sk2 <- sum(SK2) / k

# n = 100
set.seed(123)
n3           <- 100
sampel_sk3   <- matrix(rnorm(n3 * k, mu, std), k)
xbar_sk3     <- apply(sampel_sk3, 1, mean)
s_sk3        <- apply(sampel_sk3, 1, sd)
SE_sk3       <- s_sk3 / sqrt(n3)
SK3          <- (xbar_sk3 - z_val * SE_sk3 < mu) & (mu < xbar_sk3 + z_val * SE_sk3)
proporsi_sk3 <- sum(SK3) / k

# Tabel hasil
hasil_sk <- data.frame(
  "n"                         = c(10, 30, 100),
  "Ketepatan SK Sebaran Normal"= c(proporsi_sk1, proporsi_sk2, proporsi_sk3)
)
print(hasil_sk)
##     n Ketepatan.SK.Sebaran.Normal
## 1  10                        0.93
## 2  30                        0.93
## 3 100                        0.96
# Kesimpulan: semakin besar n, proporsi SK yang memuat mu mendekati 1-alpha = 0.95


#  3B. Visualisasi 100 Selang Kepercayaan 
# Grafik garis horizontal: biru = SK memuat mu, merah = SK tidak memuat mu
par(mfrow = c(1, 1))
matplot(
  rbind(xbar_sk2 - z_val * SE_sk2, xbar_sk2 + z_val * SE_sk2),
  rbind(1:k, 1:k),
  col  = ifelse(SK2, "blue", "red"),
  type = "l",
  lty  = 1,
  main = "Selang Kepercayaan 95% (n = 30)",
  xlab = "SK",
  ylab = "Banyak ulangan"
)
abline(v = mu)  # garis vertikal di mu = 50

# 3C. Interval Kepercayaan pada Data Nyata (Dataset Prestige)
data("Prestige")

# Hitung rata-rata income
m_income <- mean(Prestige$income)
cat("Rata-rata income:", m_income, "\n")
## Rata-rata income: 6797.902
# Hitung standar error
n_prestige <- nrow(Prestige)
se_income  <- sd(Prestige$income) / sqrt(n_prestige)
cat("Standar error:", se_income, "\n")
## Standar error: 420.4089
# Nilai kritis t
tval <- qt(0.975, df = n_prestige - 1)

# Interval kepercayaan 95%
batas_bawah <- round(m_income - tval * se_income, 2)
batas_atas  <- round(m_income + tval * se_income, 2)
cat(paste("KI 95%: [", batas_bawah, ",", batas_atas, "]\n"))
## KI 95%: [ 5963.92 , 7631.88 ]
# Interpretasi: dengan tingkat kepercayaan 95%, rata-rata income populasi
# berada dalam rentang [batas_bawah, batas_atas]