EM

# Data Observasi (Hasil 5 set percobaan, masing-masing 10 repetisi)
# H = jumlah Angka, T = jumlah Gambar
heads <- c(5, 9, 8, 4, 7)
tails <- c(5, 1, 2, 6, 3)
n_tosses <- 10

# Inisialisasi Parameter Awal
theta_a <- 0.3
theta_b <- 0.6

# Toleransi untuk berhenti (konvergensi)
diff <- 1
iteration <- 0

cat("Memulai Algoritma EM...\n")

## Memulai Algoritma EM...

cat(sprintf("Iterasi 0: theta_a = %.3f, theta_b = %.3f\n", theta_a, theta_b))

## Iterasi 0: theta_a = 0.300, theta_b = 0.600

# Loop Iterasi EM
while(diff > 1e-6) {
  iteration <- iteration + 1
  old_theta_a <- theta_a
  old_theta_b <- theta_b
  
  ### E-STEP (Expectation)
  # Menghitung probabilitas (likelihood) setiap set berasal dari Koin A atau B
  # Menggunakan distribusi binomial
  prob_if_a <- dbinom(heads, size = n_tosses, prob = theta_a)
  prob_if_b <- dbinom(heads, size = n_tosses, prob = theta_b)
  
  # Normalisasi untuk mendapatkan bobot (weight) probabilitas relatif
  weight_a <- prob_if_a / (prob_if_a + prob_if_b)
  weight_b <- 1 - weight_a
  
  # Menghitung "Soft Counts" (Harapan jumlah H dan T untuk masing-masing koin)
  expected_heads_a <- sum(weight_a * heads)
  expected_tails_a <- sum(weight_a * tails)
  
  expected_heads_b <- sum(weight_b * heads)
  expected_tails_b <- sum(weight_b * tails)
  
  ### M-STEP (Maximization)
  # Memperbarui nilai theta berdasarkan ekspektasi data yang baru saja dihitung
  theta_a <- expected_heads_a / (expected_heads_a + expected_tails_a)
  theta_b <- expected_heads_b / (expected_heads_b + expected_tails_b)
  
  # Menghitung selisih perubahan untuk mengecek konvergensi
  diff <- sqrt((theta_a - old_theta_a)^2 + (theta_b - old_theta_b)^2)
  
  cat(sprintf("Iterasi %d: theta_a = %.3f, theta_b = %.3f (diff = %.6f)\n", 
              iteration, theta_a, theta_b, diff))
}

## Iterasi 1: theta_a = 0.451, theta_b = 0.715 (diff = 0.189309)
## Iterasi 2: theta_a = 0.496, theta_b = 0.765 (diff = 0.067587)
## Iterasi 3: theta_a = 0.511, theta_b = 0.784 (diff = 0.024489)
## Iterasi 4: theta_a = 0.516, theta_b = 0.792 (diff = 0.009297)
## Iterasi 5: theta_a = 0.518, theta_b = 0.795 (diff = 0.003612)
## Iterasi 6: theta_a = 0.519, theta_b = 0.796 (diff = 0.001420)
## Iterasi 7: theta_a = 0.519, theta_b = 0.797 (diff = 0.000562)
## Iterasi 8: theta_a = 0.519, theta_b = 0.797 (diff = 0.000223)
## Iterasi 9: theta_a = 0.520, theta_b = 0.797 (diff = 0.000089)
## Iterasi 10: theta_a = 0.520, theta_b = 0.797 (diff = 0.000036)
## Iterasi 11: theta_a = 0.520, theta_b = 0.797 (diff = 0.000014)
## Iterasi 12: theta_a = 0.520, theta_b = 0.797 (diff = 0.000006)
## Iterasi 13: theta_a = 0.520, theta_b = 0.797 (diff = 0.000002)
## Iterasi 14: theta_a = 0.520, theta_b = 0.797 (diff = 0.000001)

cat("\nHasil Akhir Konvergensi:\n")

## 
## Hasil Akhir Konvergensi:

cat(sprintf("Theta A: %.4f\n", theta_a))

## Theta A: 0.5196

cat(sprintf("Theta B: %.4f\n", theta_b))

## Theta B: 0.7968

Nilai Theta_A = 0.5196 dan Theta_B = 0.7968, kita akan menebakkoin mana yang paling mungkin didapatkan tiap set:

5H 5T, kemungkinan besar Koin A, karena seimbang cocok dengan probabilitas koin A = 51,96% (seimbang).
9H 1T, kemungkinan besar Koin B, karena cocok dengan probabilitas Koin B = 79,68% (lebih condong ke H).
8H 2T, kemungkinan besar Koin B, karena cocok dengan probabilitas Koin B = 79,68% (lebih condong ke H).
4H 6T, kemungkinan besar Koin A, karena seimbang cocok dengan probabilitas koin A = 51,96% (seimbang).
7H 3T, kemungkinan besar Koin B, karena cocok dengan probabilitas Koin B = 79,68% (lebih condong ke H).

Yang akan menjadi “A” atau “B” ditentukan oleh penentuan lebih besar/lebih kecil di awal saat menentukan parameter awal, jika memilih “A” lebih besar, maka probabilitas kemunculan yang condong ke “Head” akan menjadi Koin A, dan sebaliknya.

# Data Observasi (Hasil 5 set percobaan, masing-masing 10 repetisi)
# H = jumlah Angka, T = jumlah Gambar
heads <- c(5, 9, 8, 4, 7)
tails <- c(5, 1, 2, 6, 3)
n_tosses <- 10

# Inisialisasi Parameter Awal
theta_a <- 0.5
theta_b <- 0.5

# Toleransi untuk berhenti (konvergensi)
diff <- 1
iteration <- 0

cat("Memulai Algoritma EM...\n")

## Memulai Algoritma EM...

cat(sprintf("Iterasi 0: theta_a = %.3f, theta_b = %.3f\n", theta_a, theta_b))

## Iterasi 0: theta_a = 0.500, theta_b = 0.500

# Loop Iterasi EM
while(diff > 1e-6) {
  iteration <- iteration + 1
  old_theta_a <- theta_a
  old_theta_b <- theta_b
  
  ### E-STEP (Expectation)
  # Menghitung probabilitas (likelihood) setiap set berasal dari Koin A atau B
  # Menggunakan distribusi binomial
  prob_if_a <- dbinom(heads, size = n_tosses, prob = theta_a)
  prob_if_b <- dbinom(heads, size = n_tosses, prob = theta_b)
  
  # Normalisasi untuk mendapatkan bobot (weight) probabilitas relatif
  weight_a <- prob_if_a / (prob_if_a + prob_if_b)
  weight_b <- 1 - weight_a
  
  # Menghitung "Soft Counts" (Harapan jumlah H dan T untuk masing-masing koin)
  expected_heads_a <- sum(weight_a * heads)
  expected_tails_a <- sum(weight_a * tails)
  
  expected_heads_b <- sum(weight_b * heads)
  expected_tails_b <- sum(weight_b * tails)
  
  ### M-STEP (Maximization)
  # Memperbarui nilai theta berdasarkan ekspektasi data yang baru saja dihitung
  theta_a <- expected_heads_a / (expected_heads_a + expected_tails_a)
  theta_b <- expected_heads_b / (expected_heads_b + expected_tails_b)
  
  # Menghitung selisih perubahan untuk mengecek konvergensi
  diff <- sqrt((theta_a - old_theta_a)^2 + (theta_b - old_theta_b)^2)
  
  cat(sprintf("Iterasi %d: theta_a = %.3f, theta_b = %.3f (diff = %.6f)\n", 
              iteration, theta_a, theta_b, diff))
}

## Iterasi 1: theta_a = 0.660, theta_b = 0.660 (diff = 0.226274)
## Iterasi 2: theta_a = 0.660, theta_b = 0.660 (diff = 0.000000)

cat("\nHasil Akhir Konvergensi:\n")

## 
## Hasil Akhir Konvergensi:

cat(sprintf("Theta A: %.4f\n", theta_a))

## Theta A: 0.6600

cat(sprintf("Theta B: %.4f\n", theta_b))

## Theta B: 0.6600

Di sini algoritma EM gagal membedakan untuk mengetahui arah mana yang harus diambil untuk koin A dan koin B, karena memiliki hasil yang sama baik Theta_A dan Theta_B, yaitu 66%.
Lebih baik jika ingin menggunakan algoritma EM ini jangan menggunakan nilai Theta_A dan Theta_B yang sama, lebih baik menggunakan nilai acak atau yang berbeda.

# Data Observasi (Hasil 5 set percobaan, masing-masing 10 repetisi)
# H = jumlah Angka, T = jumlah Gambar
heads <- c(5, 9, 8, 4, 7)
tails <- c(5, 1, 2, 6, 3)
n_tosses <- 10

# Inisialisasi Parameter Awal
theta_a <- 0.49
theta_b <- 0.51

# Toleransi untuk berhenti (konvergensi)
diff <- 1
iteration <- 0

cat("Memulai Algoritma EM...\n")

## Memulai Algoritma EM...

cat(sprintf("Iterasi 0: theta_a = %.3f, theta_b = %.3f\n", theta_a, theta_b))

## Iterasi 0: theta_a = 0.490, theta_b = 0.510

# Loop Iterasi EM
while(diff > 1e-6) {
  iteration <- iteration + 1
  old_theta_a <- theta_a
  old_theta_b <- theta_b
  
  ### E-STEP (Expectation)
  # Menghitung probabilitas (likelihood) setiap set berasal dari Koin A atau B
  # Menggunakan distribusi binomial
  prob_if_a <- dbinom(heads, size = n_tosses, prob = theta_a)
  prob_if_b <- dbinom(heads, size = n_tosses, prob = theta_b)
  
  # Normalisasi untuk mendapatkan bobot (weight) probabilitas relatif
  weight_a <- prob_if_a / (prob_if_a + prob_if_b)
  weight_b <- 1 - weight_a
  
  # Menghitung "Soft Counts" (Harapan jumlah H dan T untuk masing-masing koin)
  expected_heads_a <- sum(weight_a * heads)
  expected_tails_a <- sum(weight_a * tails)
  
  expected_heads_b <- sum(weight_b * heads)
  expected_tails_b <- sum(weight_b * tails)
  
  ### M-STEP (Maximization)
  # Memperbarui nilai theta berdasarkan ekspektasi data yang baru saja dihitung
  theta_a <- expected_heads_a / (expected_heads_a + expected_tails_a)
  theta_b <- expected_heads_b / (expected_heads_b + expected_tails_b)
  
  # Menghitung selisih perubahan untuk mengecek konvergensi
  diff <- sqrt((theta_a - old_theta_a)^2 + (theta_b - old_theta_b)^2)
  
  cat(sprintf("Iterasi %d: theta_a = %.3f, theta_b = %.3f (diff = %.6f)\n", 
              iteration, theta_a, theta_b, diff))
}

## Iterasi 1: theta_a = 0.645, theta_b = 0.673 (diff = 0.225101)
## Iterasi 2: theta_a = 0.639, theta_b = 0.681 (diff = 0.010209)
## Iterasi 3: theta_a = 0.628, theta_b = 0.692 (diff = 0.015128)
## Iterasi 4: theta_a = 0.613, theta_b = 0.707 (diff = 0.021703)
## Iterasi 5: theta_a = 0.593, theta_b = 0.727 (diff = 0.028773)
## Iterasi 6: theta_a = 0.569, theta_b = 0.750 (diff = 0.032829)
## Iterasi 7: theta_a = 0.548, theta_b = 0.771 (diff = 0.029567)
## Iterasi 8: theta_a = 0.534, theta_b = 0.785 (diff = 0.020019)
## Iterasi 9: theta_a = 0.526, theta_b = 0.792 (diff = 0.010542)
## Iterasi 10: theta_a = 0.522, theta_b = 0.795 (diff = 0.004722)
## Iterasi 11: theta_a = 0.521, theta_b = 0.796 (diff = 0.001951)
## Iterasi 12: theta_a = 0.520, theta_b = 0.797 (diff = 0.000780)
## Iterasi 13: theta_a = 0.520, theta_b = 0.797 (diff = 0.000308)
## Iterasi 14: theta_a = 0.520, theta_b = 0.797 (diff = 0.000121)
## Iterasi 15: theta_a = 0.520, theta_b = 0.797 (diff = 0.000048)
## Iterasi 16: theta_a = 0.520, theta_b = 0.797 (diff = 0.000019)
## Iterasi 17: theta_a = 0.520, theta_b = 0.797 (diff = 0.000007)
## Iterasi 18: theta_a = 0.520, theta_b = 0.797 (diff = 0.000003)
## Iterasi 19: theta_a = 0.520, theta_b = 0.797 (diff = 0.000001)
## Iterasi 20: theta_a = 0.520, theta_b = 0.797 (diff = 0.000000)

cat("\nHasil Akhir Konvergensi:\n")

## 
## Hasil Akhir Konvergensi:

cat(sprintf("Theta A: %.4f\n", theta_a))

## Theta A: 0.5196

cat(sprintf("Theta B: %.4f\n", theta_b))

## Theta B: 0.7968

Di sini walau nilai Theta_A 0,49 dan Theta_B = 0,51, nilainya tidak jauh berbeda dari sebelumnya 0,5, tapi bisa menghasilkan nilai akhir yang sangat berbeda, hasilnya sama dengan Theta_A = 0,3 dan Theta_B = 0,6. Hal ini menunjukkan bahwa hasil akhir konvergensi 0,52 dan 0,8 untuk Theta_A dan Theta_B adalah titik di mana data tersebut dengan fungsi MLE.

EM

Muhammad Kholilullah

2026-05-04