# ============================================================
# Algoritma Expectation-Maximization (EM) - Masalah 2 Koin
# Parameter: theta_A = 0.7, theta_B = 0.4
# Dataset: 5 set pelemparan koin
# ============================================================
# ----------------------------------------------------------
# DATA OBSERVASI
# Setiap baris: c(jumlah_H, jumlah_T)
# ----------------------------------------------------------
data_observasi <- list(
set1 = c(H = 5, T = 5),
set2 = c(H = 9, T = 1),
set3 = c(H = 8, T = 2),
set4 = c(H = 4, T = 6),
set5 = c(H = 7, T = 3)
)
# ----------------------------------------------------------
# PARAMETER AWAL
# Slide menggunakan: theta_A = 0.6, theta_B = 0.5
# Tes ini gunakan: theta_A = 0.7, theta_B = 0.4
# ----------------------------------------------------------
theta_A_init <- 0.7
theta_B_init <- 0.4
# ----------------------------------------------------------
# FUNGSI E-STEP
# Hitung probabilitas setiap set berasal dari koin A atau B
# ----------------------------------------------------------
e_step <- function(data, theta_A, theta_B) {
hasil <- lapply(data, function(set) {
H <- set["H"]
T <- set["T"]
n <- H + T
# Likelihood set ini dari koin A dan koin B
L_A <- theta_A^H * (1 - theta_A)^T
L_B <- theta_B^H * (1 - theta_B)^T
# Probabilitas posterior (asumsikan prior seimbang P(A)=P(B)=0.5)
denom <- L_A + L_B
p_A <- L_A / denom
p_B <- L_B / denom
# Estimasi kontribusi H dan T untuk masing-masing koin
H_A_est <- p_A * H
T_A_est <- p_A * T
H_B_est <- p_B * H
T_B_est <- p_B * T
list(p_A = p_A, p_B = p_B,
H_A = H_A_est, T_A = T_A_est,
H_B = H_B_est, T_B = T_B_est)
})
return(hasil)
}
# ----------------------------------------------------------
# FUNGSI M-STEP
# Update parameter theta_A dan theta_B
# ----------------------------------------------------------
m_step <- function(e_hasil) {
total_H_A <- sum(sapply(e_hasil, function(x) x$H_A))
total_T_A <- sum(sapply(e_hasil, function(x) x$T_A))
total_H_B <- sum(sapply(e_hasil, function(x) x$H_B))
total_T_B <- sum(sapply(e_hasil, function(x) x$T_B))
theta_A_baru <- total_H_A / (total_H_A + total_T_A)
theta_B_baru <- total_H_B / (total_H_B + total_T_B)
list(theta_A = theta_A_baru, theta_B = theta_B_baru,
total_H_A = total_H_A, total_T_A = total_T_A,
total_H_B = total_H_B, total_T_B = total_T_B)
}
# ----------------------------------------------------------
# FUNGSI EUCLIDEAN DISTANCE (cek konvergensi)
# ----------------------------------------------------------
euclidean_dist <- function(t_A_lama, t_B_lama, t_A_baru, t_B_baru) {
sqrt((t_A_baru - t_A_lama)^2 + (t_B_baru - t_B_lama)^2)
}
# ----------------------------------------------------------
# JALANKAN ALGORITMA EM
# ----------------------------------------------------------
cat("==========================================================\n")
## ==========================================================
cat(" ALGORITMA EM - MASALAH 2 KOIN\n")
## ALGORITMA EM - MASALAH 2 KOIN
cat("==========================================================\n")
## ==========================================================
cat(sprintf(" Parameter Awal : theta_A = %.1f | theta_B = %.1f\n",
theta_A_init, theta_B_init))
## Parameter Awal : theta_A = 0.7 | theta_B = 0.4
cat(" (Slide contoh : theta_A = 0.6 | theta_B = 0.5)\n")
## (Slide contoh : theta_A = 0.6 | theta_B = 0.5)
cat("\n")
# Tampilkan data observasi
cat("----------------------------------------------------------\n")
## ----------------------------------------------------------
cat(" DATA OBSERVASI\n")
## DATA OBSERVASI
cat("----------------------------------------------------------\n")
## ----------------------------------------------------------
for (i in seq_along(data_observasi)) {
s <- data_observasi[[i]]
cat(sprintf(" Set %d : %dH %dT (total %d lemparan)\n",
i, s["H"], s["T"], s["H"] + s["T"]))
}
## Set 1 : 5H 5T (total 10 lemparan)
## Set 2 : 9H 1T (total 10 lemparan)
## Set 3 : 8H 2T (total 10 lemparan)
## Set 4 : 4H 6T (total 10 lemparan)
## Set 5 : 7H 3T (total 10 lemparan)
cat("\n")
# Inisialisasi
theta_A <- theta_A_init
theta_B <- theta_B_init
max_iter <- 100
tol <- 1e-6
riwayat <- data.frame(iter = integer(), theta_A = numeric(),
theta_B = numeric(), delta = numeric())
for (iter in 1:max_iter) {
# ---- E-STEP ----
e_hasil <- e_step(data_observasi, theta_A, theta_B)
# ---- M-STEP ----
m_hasil <- m_step(e_hasil)
theta_A_baru <- m_hasil$theta_A
theta_B_baru <- m_hasil$theta_B
# Hitung perubahan (Euclidean Distance)
delta <- euclidean_dist(theta_A, theta_B, theta_A_baru, theta_B_baru)
# Simpan riwayat
riwayat <- rbind(riwayat, data.frame(
iter = iter,
theta_A = round(theta_A_baru, 6),
theta_B = round(theta_B_baru, 6),
delta = round(delta, 8)
))
# Cetak detail iterasi (5 iterasi pertama + saat konvergen)
if (iter <= 5 || delta < tol) {
cat(sprintf(" === ITERASI %d ===\n", iter))
cat(" [E-Step] Probabilitas & Estimasi per Set:\n")
cat(sprintf(" %-6s %-8s %-8s %-8s %-8s %-8s %-8s\n",
"Set", "P(A)", "P(B)", "H_A", "T_A", "H_B", "T_B"))
for (i in seq_along(e_hasil)) {
e <- e_hasil[[i]]
cat(sprintf(" %-6d %-8.4f %-8.4f %-8.4f %-8.4f %-8.4f %-8.4f\n",
i, e$p_A, e$p_B, e$H_A, e$T_A, e$H_B, e$T_B))
}
cat(sprintf("\n [M-Step] Total H_A=%.4f T_A=%.4f | H_B=%.4f T_B=%.4f\n",
m_hasil$total_H_A, m_hasil$total_T_A,
m_hasil$total_H_B, m_hasil$total_T_B))
cat(sprintf(" [Update] theta_A: %.4f -> %.6f | theta_B: %.4f -> %.6f\n",
theta_A, theta_A_baru, theta_B, theta_B_baru))
cat(sprintf(" [Delta] Euclidean Distance = %.8f\n\n", delta))
}
# Update
theta_A <- theta_A_baru
theta_B <- theta_B_baru
# Cek konvergensi
if (delta < tol) {
cat(sprintf(" >> KONVERGEN pada iterasi ke-%d (delta < %.0e)\n\n", iter, tol))
break
}
}
## === ITERASI 1 ===
## [E-Step] Probabilitas & Estimasi per Set:
## Set P(A) P(B) H_A T_A H_B T_B
## 1 0.3390 0.6610 1.6951 1.6951 3.3049 3.3049
## 2 0.9872 0.0128 8.8846 0.9872 0.1154 0.0128
## 3 0.9565 0.0435 7.6520 1.9130 0.3480 0.0870
## 4 0.1278 0.8722 0.5113 0.7669 3.4887 5.2331
## 5 0.8627 0.1373 6.0389 2.5881 0.9611 0.4119
##
## [M-Step] Total H_A=24.7818 T_A=7.9503 | H_B=8.2182 T_B=9.0497
## [Update] theta_A: 0.7000 -> 0.757111 | theta_B: 0.4000 -> 0.475921
## [Delta] Euclidean Distance = 0.09500371
##
## === ITERASI 2 ===
## [E-Step] Probabilitas & Estimasi per Set:
## Set P(A) P(B) H_A T_A H_B T_B
## 1 0.1789 0.8211 0.8944 0.8944 4.1056 4.1056
## 2 0.9680 0.0320 8.7119 0.9680 0.2881 0.0320
## 3 0.8981 0.1019 7.1846 1.7961 0.8154 0.2039
## 4 0.0597 0.9403 0.2387 0.3581 3.7613 5.6419
## 5 0.7196 0.2804 5.0375 2.1589 1.9625 0.8411
##
## [M-Step] Total H_A=22.0672 T_A=6.1756 | H_B=10.9328 T_B=10.8244
## [Update] theta_A: 0.7571 -> 0.781339 | theta_B: 0.4759 -> 0.502491
## [Delta] Euclidean Distance = 0.03595785
##
## === ITERASI 3 ===
## [E-Step] Probabilitas & Estimasi per Set:
## Set P(A) P(B) H_A T_A H_B T_B
## 1 0.1297 0.8703 0.6487 0.6487 4.3513 4.3513
## 2 0.9589 0.0411 8.6305 0.9589 0.3695 0.0411
## 3 0.8684 0.1316 6.9475 1.7369 1.0525 0.2631
## 4 0.0404 0.9596 0.1617 0.2426 3.8383 5.7574
## 5 0.6511 0.3489 4.5575 1.9532 2.4425 1.0468
##
## [M-Step] Total H_A=20.9459 T_A=5.5403 | H_B=12.0541 T_B=11.4597
## [Update] theta_A: 0.7813 -> 0.790823 | theta_B: 0.5025 -> 0.512640
## [Delta] Euclidean Distance = 0.01388987
##
## === ITERASI 4 ===
## [E-Step] Probabilitas & Estimasi per Set:
## Set P(A) P(B) H_A T_A H_B T_B
## 1 0.1129 0.8871 0.5644 0.5644 4.4356 4.4356
## 2 0.9550 0.0450 8.5952 0.9550 0.4048 0.0450
## 3 0.8552 0.1448 6.8420 1.7105 1.1580 0.2895
## 4 0.0342 0.9658 0.1368 0.2052 3.8632 5.7948
## 5 0.6218 0.3782 4.3523 1.8653 2.6477 1.1347
##
## [M-Step] Total H_A=20.4908 T_A=5.3004 | H_B=12.5092 T_B=11.6996
## [Update] theta_A: 0.7908 -> 0.794488 | theta_B: 0.5126 -> 0.516722
## [Delta] Euclidean Distance = 0.00548610
##
## === ITERASI 5 ===
## [E-Step] Probabilitas & Estimasi per Set:
## Set P(A) P(B) H_A T_A H_B T_B
## 1 0.1067 0.8933 0.5337 0.5337 4.4663 4.4663
## 2 0.9533 0.0467 8.5799 0.9533 0.4201 0.0467
## 3 0.8496 0.1504 6.7967 1.6992 1.2033 0.3008
## 4 0.0320 0.9680 0.1280 0.1920 3.8720 5.8080
## 5 0.6097 0.3903 4.2680 1.8291 2.7320 1.1709
##
## [M-Step] Total H_A=20.3062 T_A=5.2073 | H_B=12.6938 T_B=11.7927
## [Update] theta_A: 0.7945 -> 0.795901 | theta_B: 0.5167 -> 0.518399
## [Delta] Euclidean Distance = 0.00219320
##
## === ITERASI 14 ===
## [E-Step] Probabilitas & Estimasi per Set:
## Set P(A) P(B) H_A T_A H_B T_B
## 1 0.1030 0.8970 0.5150 0.5150 4.4850 4.4850
## 2 0.9520 0.0480 8.5681 0.9520 0.4319 0.0480
## 3 0.8455 0.1545 6.7640 1.6910 1.2360 0.3090
## 4 0.0307 0.9693 0.1228 0.1842 3.8772 5.8158
## 5 0.6015 0.3985 4.2105 1.8045 2.7895 1.1955
##
## [M-Step] Total H_A=20.1805 T_A=5.1468 | H_B=12.8195 T_B=11.8532
## [Update] theta_A: 0.7968 -> 0.796789 | theta_B: 0.5196 -> 0.519583
## [Delta] Euclidean Distance = 0.00000065
##
## >> KONVERGEN pada iterasi ke-14 (delta < 1e-06)
# ----------------------------------------------------------
# HASIL AKHIR
# ----------------------------------------------------------
cat("==========================================================\n")
## ==========================================================
cat(" HASIL AKHIR ALGORITMA EM\n")
## HASIL AKHIR ALGORITMA EM
cat("==========================================================\n")
## ==========================================================
cat(sprintf(" theta_A (prob H koin A) = %.6f\n", theta_A))
## theta_A (prob H koin A) = 0.796789
cat(sprintf(" theta_B (prob H koin B) = %.6f\n", theta_B))
## theta_B (prob H koin B) = 0.519583
cat("\n")
# Perbandingan dengan contoh slide
cat("----------------------------------------------------------\n")
## ----------------------------------------------------------
cat(" PERBANDINGAN HASIL\n")
## PERBANDINGAN HASIL
cat("----------------------------------------------------------\n")
## ----------------------------------------------------------
cat(sprintf(" %-20s %-12s %-12s\n", "Parameter", "theta_A", "theta_B"))
## Parameter theta_A theta_B
cat(sprintf(" %-20s %-12.4f %-12.4f\n", "Slide (0.6 / 0.5)", 0.7965, 0.5173))
## Slide (0.6 / 0.5) 0.7965 0.5173
cat(sprintf(" %-20s %-12.6f %-12.6f\n", "Tes ini (0.7 / 0.4)", theta_A, theta_B))
## Tes ini (0.7 / 0.4) 0.796789 0.519583
cat("\n")
# ----------------------------------------------------------
# KESIMPULAN IDENTIFIKASI KOIN PER SET
# ----------------------------------------------------------
cat("----------------------------------------------------------\n")
## ----------------------------------------------------------
cat(" KESIMPULAN IDENTIFIKASI KOIN\n")
## KESIMPULAN IDENTIFIKASI KOIN
cat("----------------------------------------------------------\n")
## ----------------------------------------------------------
e_final <- e_step(data_observasi, theta_A, theta_B)
cat(sprintf(" %-6s %-12s %-8s %-8s %-20s\n",
"Set", "Hasil", "P(Koin A)", "P(Koin B)", "Kesimpulan"))
## Set Hasil P(Koin A) P(Koin B) Kesimpulan
for (i in seq_along(e_final)) {
s <- data_observasi[[i]]
e <- e_final[[i]]
label <- ifelse(e$p_A > e$p_B, "Koin A", "Koin B")
cat(sprintf(" %-6d %dH %dT %-8.4f %-8.4f %-20s\n",
i, s["H"], s["T"], e$p_A, e$p_B, label))
}
## 1 5H 5T 0.1030 0.8970 Koin B
## 2 9H 1T 0.9520 0.0480 Koin A
## 3 8H 2T 0.8455 0.1545 Koin A
## 4 4H 6T 0.0307 0.9693 Koin B
## 5 7H 3T 0.6015 0.3985 Koin A
cat("\n")
cat("----------------------------------------------------------\n")
## ----------------------------------------------------------
cat(" INSIGHT\n")
## INSIGHT
cat("----------------------------------------------------------\n")
## ----------------------------------------------------------
cat(" 1. Dengan theta_A=0.7 (bias ke H) dan theta_B=0.4 (bias ke T),\n")
## 1. Dengan theta_A=0.7 (bias ke H) dan theta_B=0.4 (bias ke T),
cat(" pemisahan antara koin A dan B lebih tegas dibanding contoh.\n")
## pemisahan antara koin A dan B lebih tegas dibanding contoh.
cat(" 2. Set dengan banyak H diatribusikan kuat ke Koin A.\n")
## 2. Set dengan banyak H diatribusikan kuat ke Koin A.
cat(" 3. Set dengan banyak T diatribusikan kuat ke Koin B.\n")
## 3. Set dengan banyak T diatribusikan kuat ke Koin B.
cat(" 4. Nilai akhir theta konvergen mendekati nilai 'benar' dari data.\n")
## 4. Nilai akhir theta konvergen mendekati nilai 'benar' dari data.
cat(" 5. Jumlah iterasi hingga konvergen:\n")
## 5. Jumlah iterasi hingga konvergen:
cat(sprintf(" - Slide (0.6/0.5) : ~10 iterasi\n"))
## - Slide (0.6/0.5) : ~10 iterasi
cat(sprintf(" - Tes ini (0.7/0.4) : %d iterasi\n", nrow(riwayat)))
## - Tes ini (0.7/0.4) : 14 iterasi
cat("==========================================================\n")
## ==========================================================
# ----------------------------------------------------------
# TAMPILKAN RIWAYAT ITERASI RINGKAS
# ----------------------------------------------------------
cat("\n RIWAYAT KONVERGENSI THETA:\n")
##
## RIWAYAT KONVERGENSI THETA:
cat(sprintf(" %-8s %-12s %-12s %-15s\n", "Iterasi", "theta_A", "theta_B", "Delta"))
## Iterasi theta_A theta_B Delta
print_rows <- unique(c(1:5, nrow(riwayat)))
for (i in print_rows) {
r <- riwayat[i, ]
cat(sprintf(" %-8d %-12.6f %-12.6f %-15.8f\n",
r$iter, r$theta_A, r$theta_B, r$delta))
}
## 1 0.757111 0.475921 0.09500371
## 2 0.781339 0.502491 0.03595785
## 3 0.790823 0.512640 0.01388987
## 4 0.794488 0.516722 0.00548610
## 5 0.795901 0.518399 0.00219320
## 14 0.796789 0.519583 0.00000065
cat("==========================================================\n")
## ==========================================================