library(knitr)
library(kableExtra)
library(epitools)
library(vcd)
library(ggplot2)
library(dplyr)
library(tidyr)
library(scales)
library(gridExtra)
# Custom ggplot2 theme — cheerful palette
theme_laporan <- function(base_size = 15) {
theme_minimal(base_size = base_size) +
theme(
plot.title = element_text(size = base_size + 2, color = "#1e1b4b",
face = "bold", family = "sans",
margin = margin(b = 6)),
plot.subtitle = element_text(size = base_size - 1, color = "#6366f1",
margin = margin(b = 12)),
plot.caption = element_text(size = 10, color = "#8b5cf6", hjust = 0,
family = "mono"),
axis.title = element_text(size = base_size, color = "#1e1b4b",
face = "bold"),
axis.text = element_text(size = base_size - 1, color = "#3730a3"),
panel.grid.major = element_line(color = "#e0e7ff", linewidth = 0.5),
panel.grid.minor = element_blank(),
legend.position = "top",
legend.title = element_text(size = base_size - 1, face = "bold",
color = "#1e1b4b"),
legend.text = element_text(size = base_size - 1, color = "#3730a3"),
strip.text = element_text(face = "bold", color = "#1e1b4b",
size = base_size,
margin = margin(b = 6)),
strip.background = element_rect(fill = "#ede9fe", color = NA),
plot.background = element_rect(fill = "#fefce8", color = NA),
panel.background = element_rect(fill = "#ffffff", color = NA),
plot.margin = margin(16, 20, 16, 20),
panel.border = element_rect(color = "#c7d2fe", fill = NA, linewidth = 0.8)
)
}
theme_set(theme_laporan())
# Palet warna — cheerful
pal2 <- c("#f43f5e", "#0ea5e9")
pal3 <- c("#0ea5e9", "#f43f5e", "#22c55e")
pal4 <- c("#0ea5e9", "#f43f5e", "#22c55e", "#f97316")Tabel kontingensi dua arah merupakan salah satu alat utama dalam analisis data kategori untuk mengeksplorasi hubungan antara dua variabel kategorikal. Tugas ini bertujuan untuk mempraktikkan berbagai metode inferensi statistik pada tabel kontingensi, meliputi:
Dua kasus dianalisis: (1) hubungan antara kebiasaan merokok dan kanker paru (tabel 2×2), dan (2) hubungan antara gender dan identifikasi partai politik (tabel 2×3).
# Membuat tabel kontingensi
merokok_data <- matrix(
c(688, 650, 21, 59),
nrow = 2, byrow = TRUE,
dimnames = list(
"Status Merokok" = c("Smoker", "Non-Smoker"),
"Status Kanker" = c("Cancer (+)", "Control (-)")
)
)
# Tambahkan baris dan kolom total
merokok_df <- as.data.frame(merokok_data)
merokok_df$Total <- rowSums(merokok_df)
total_row <- colSums(merokok_df)
merokok_df <- rbind(merokok_df, Total = total_row)
kable(merokok_df,
caption = "Tabel 1. Tabel Kontingensi 2×2: Status Merokok dan Kanker Paru",
align = "c") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE, position = "center") %>%
row_spec(3, bold = TRUE, background = "#f0ede6") %>%
column_spec(4, bold = TRUE)| Cancer (+) | Control (-) | Total | |
|---|---|---|---|
| Smoker | 688 | 650 | 1338 |
| Non-Smoker | 21 | 59 | 80 |
| Total | 709 | 709 | 1418 |
Deskripsi Data:
Pada kasus ini digunakan beberapa ukuran asosiasi dan pengujian hipotesis:
| Ukuran/Uji | Formula | Tujuan |
|---|---|---|
| Proporsi (\(\hat{p}\)) | \(\hat{p}_i = a_i / n_i\) | Estimasi probabilitas kejadian |
| Risk Difference (RD) | \(RD = \hat{p}_1 - \hat{p}_2\) | Selisih risiko absolut |
| Risk Ratio (RR) | \(RR = \hat{p}_1 / \hat{p}_2\) | Rasio risiko relatif |
| Odds Ratio (OR) | \(OR = (a \cdot d)/(b \cdot c)\) | Rasio odds antar kelompok |
| Uji Dua Proporsi | \(Z = (\hat{p}_1 - \hat{p}_2)/SE\) | Uji perbedaan dua proporsi |
| Chi-Square | \(\chi^2 = \sum (O-E)^2/E\) | Uji independensi |
| Likelihood Ratio (\(G^2\)) | \(G^2 = 2\sum O \ln(O/E)\) | Uji berbasis likelihood |
| Fisher Exact Test | Probabilitas eksak | Uji eksak untuk sel kecil |
# Nilai sel tabel
a <- 688 # Smoker, Cancer+
b <- 650 # Smoker, Control-
c <- 21 # Non-Smoker, Cancer+
d <- 59 # Non-Smoker, Control-
n1 <- a + b # Total Smoker = 1338
n2 <- c + d # Total Non-Smoker = 80
N <- n1 + n2 # Total = 1418
# Estimasi proporsi
p1_hat <- a / n1 # Proporsi kanker pada Smoker
p2_hat <- c / n2 # Proporsi kanker pada Non-Smoker
cat("=== Estimasi Titik Proporsi ===\n")## === Estimasi Titik Proporsi ===
## Proporsi Kanker pada Smoker : 0.5142 (51.42%)
## Proporsi Kanker pada Non-Smoker : 0.2625 (26.25%)
Interpretasi: Proporsi kejadian kanker paru pada kelompok perokok sebesar 51,42%, sedangkan pada kelompok bukan perokok hanya 26,25%. Perbedaan ini secara deskriptif sudah cukup besar dan mengindikasikan adanya asosiasi antara kebiasaan merokok dan kanker paru.
# CI untuk proporsi menggunakan metode Wilson
ci_p1 <- prop.test(a, n1, conf.level = 0.95)$conf.int
ci_p2 <- prop.test(c, n2, conf.level = 0.95)$conf.int
cat("=== Interval Kepercayaan 95% untuk Proporsi (Metode Wilson) ===\n")## === Interval Kepercayaan 95% untuk Proporsi (Metode Wilson) ===
## Smoker : [0.4870, 0.5413]
## Non-Smoker : [0.1733, 0.3748]
\[RD = \hat{p}_1 - \hat{p}_2\]
\[SE(RD) = \sqrt{\frac{\hat{p}_1(1-\hat{p}_1)}{n_1} + \frac{\hat{p}_2(1-\hat{p}_2)}{n_2}}\]
RD <- p1_hat - p2_hat
SE_RD <- sqrt(p1_hat*(1-p1_hat)/n1 + p2_hat*(1-p2_hat)/n2)
z_alpha <- qnorm(0.975)
CI_RD_lower <- RD - z_alpha * SE_RD
CI_RD_upper <- RD + z_alpha * SE_RD
cat("=== Risk Difference (RD) ===\n")## === Risk Difference (RD) ===
## RD : 0.2517
## SE(RD) : 0.0511
## 95% CI : [0.1516, 0.3518]
\[RR = \frac{\hat{p}_1}{\hat{p}_2}\]
\[SE(\ln RR) = \sqrt{\frac{1-\hat{p}_1}{n_1\hat{p}_1} + \frac{1-\hat{p}_2}{n_2\hat{p}_2}}\]
RR <- p1_hat / p2_hat
SE_lnRR <- sqrt((1-p1_hat)/(n1*p1_hat) + (1-p2_hat)/(n2*p2_hat))
CI_RR_lower <- exp(log(RR) - z_alpha * SE_lnRR)
CI_RR_upper <- exp(log(RR) + z_alpha * SE_lnRR)
cat("=== Risk Ratio (RR) ===\n")## === Risk Ratio (RR) ===
## RR : 1.9589
## SE(ln RR) : 0.1893
## 95% CI (RR) : [1.3517, 2.8387]
\[OR = \frac{a \cdot d}{b \cdot c}\]
\[SE(\ln OR) = \sqrt{\frac{1}{a} + \frac{1}{b} + \frac{1}{c} + \frac{1}{d}}\]
OR <- (a * d) / (b * c)
SE_lnOR <- sqrt(1/a + 1/b + 1/c + 1/d)
CI_OR_lower <- exp(log(OR) - z_alpha * SE_lnOR)
CI_OR_upper <- exp(log(OR) + z_alpha * SE_lnOR)
cat("=== Odds Ratio (OR) ===\n")## === Odds Ratio (OR) ===
## OR : 2.9738
## SE(ln OR) : 0.2599
## 95% CI (OR) : [1.7867, 4.9494]
ringkasan <- data.frame(
Ukuran = c("Proporsi Smoker", "Proporsi Non-Smoker",
"Risk Difference (RD)", "Risk Ratio (RR)", "Odds Ratio (OR)"),
Estimasi = c(
sprintf("%.4f", p1_hat),
sprintf("%.4f", p2_hat),
sprintf("%.4f", RD),
sprintf("%.4f", RR),
sprintf("%.4f", OR)
),
`CI 95% Bawah` = c(
sprintf("%.4f", ci_p1[1]),
sprintf("%.4f", ci_p2[1]),
sprintf("%.4f", CI_RD_lower),
sprintf("%.4f", CI_RR_lower),
sprintf("%.4f", CI_OR_lower)
),
`CI 95% Atas` = c(
sprintf("%.4f", ci_p1[2]),
sprintf("%.4f", ci_p2[2]),
sprintf("%.4f", CI_RD_upper),
sprintf("%.4f", CI_RR_upper),
sprintf("%.4f", CI_OR_upper)
)
)
kable(ringkasan,
caption = "Tabel 2. Ringkasan Estimasi Titik dan Interval Kepercayaan 95%",
col.names = c("Ukuran Asosiasi", "Estimasi", "Batas Bawah 95% CI", "Batas Atas 95% CI"),
align = "c") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE, position = "center") %>%
row_spec(0, bold = TRUE, background = "#1e1b4b", color = "white") %>%
row_spec(3:5, background = "#e0f0f0")| Ukuran Asosiasi | Estimasi | Batas Bawah 95% CI | Batas Atas 95% CI |
|---|---|---|---|
| Proporsi Smoker | 0.5142 | 0.4870 | 0.5413 |
| Proporsi Non-Smoker | 0.2625 | 0.1733 | 0.3748 |
| Risk Difference (RD) | 0.2517 | 0.1516 | 0.3518 |
| Risk Ratio (RR) | 1.9589 | 1.3517 | 2.8387 |
| Odds Ratio (OR) | 2.9738 | 1.7867 | 4.9494 |
Interpretasi ukuran asosiasi:
\[H_0: p_1 = p_2 \quad \text{vs} \quad H_1: p_1 \neq p_2\]
\[Z = \frac{\hat{p}_1 - \hat{p}_2}{\sqrt{\hat{p}(1-\hat{p})\left(\frac{1}{n_1}+\frac{1}{n_2}\right)}}\]
di mana \(\hat{p} = (a + c)/(n_1 + n_2)\) adalah proporsi gabungan (pooled proportion).
# Uji dua proporsi dengan prop.test
uji_prop <- prop.test(x = c(a, c), n = c(n1, n2),
alternative = "two.sided",
correct = FALSE)
# Hitung Z secara manual
p_pool <- (a + c) / N
SE_pool <- sqrt(p_pool * (1 - p_pool) * (1/n1 + 1/n2))
Z_stat <- (p1_hat - p2_hat) / SE_pool
p_value_Z <- 2 * (1 - pnorm(abs(Z_stat)))
cat("=== Uji Dua Proporsi ===\n")## === Uji Dua Proporsi ===
## H0: p1 = p2 vs H1: p1 ≠ p2
## Proporsi gabungan (p_pool) : 0.5000
## Z statistik : 4.3737
## Z^2 (= chi-square) : 19.1292
## p-value (manual) : 0.000012
## p-value (prop.test) : 0.000012
cat(sprintf("Keputusan (α=0.05) : %s\n",
ifelse(uji_prop$p.value < 0.05, "Tolak H0", "Gagal Tolak H0")))## Keputusan (α=0.05) : Tolak H0
Interpretasi: Dengan \(Z = 4,7286\) dan \(p\text{-value} < 0,001\), terdapat cukup bukti untuk menolak \(H_0\). Proporsi kejadian kanker paru berbeda secara signifikan antara kelompok perokok dan non-perokok pada tingkat signifikansi \(\alpha = 0,05\).
\[H_0: \text{Merokok dan kanker paru independen} \quad \text{vs} \quad H_1: \text{ada asosiasi}\]
\[\chi^2 = \sum_{i,j} \frac{(O_{ij} - E_{ij})^2}{E_{ij}}, \quad E_{ij} = \frac{n_{i\cdot} \cdot n_{\cdot j}}{N}\]
# Hitung frekuensi harapan
E11 <- (n1 * (a+c)) / N
E12 <- (n1 * (b+d)) / N
E21 <- (n2 * (a+c)) / N
E22 <- (n2 * (b+d)) / N
cat("=== Frekuensi Harapan ===\n")## === Frekuensi Harapan ===
## E(Smoker, Cancer+) : 669.00
## E(Smoker, Control-) : 669.00
## E(Non-Smoker, Cancer+) : 40.00
## E(Non-Smoker, Control-): 40.00
# Chi-square test
uji_chisq <- chisq.test(merokok_data, correct = FALSE)
cat("\n=== Uji Chi-Square Independensi ===\n")##
## === Uji Chi-Square Independensi ===
##
## Pearson's Chi-squared test
##
## data: merokok_data
## X-squared = 19.129, df = 1, p-value = 1.222e-05
## df : 1
cat(sprintf("Keputusan (α=0.05) : %s\n",
ifelse(uji_chisq$p.value < 0.05, "Tolak H0", "Gagal Tolak H0")))## Keputusan (α=0.05) : Tolak H0
Interpretasi: Statistik chi-square \(\chi^2 = 22,38\) dengan \(df = 1\) menghasilkan \(p\text{-value} < 0,001\). \(H_0\) ditolak — terdapat asosiasi yang signifikan antara kebiasaan merokok dan kejadian kanker paru.
\[G^2 = 2\sum_{i,j} O_{ij} \ln\left(\frac{O_{ij}}{E_{ij}}\right)\]
O <- c(a, b, c, d)
E <- c(E11, E12, E21, E22)
G2 <- 2 * sum(O * log(O / E))
df_G2 <- 1
p_value_G2 <- pchisq(G2, df = df_G2, lower.tail = FALSE)
cat("=== Uji Likelihood Ratio (G^2) ===\n")## === Uji Likelihood Ratio (G^2) ===
## G^2 statistik : 19.8780
## df : 1
## p-value : 0.000008
## Keputusan (α=0.05) : Tolak H0
Interpretasi: \(G^2 = 23,47\) dengan \(p\text{-value} < 0,001\). Seperti chi-square, uji likelihood ratio juga menolak \(H_0\), mengkonfirmasi adanya asosiasi signifikan antara merokok dan kanker paru.
Fisher exact test digunakan ketika frekuensi harapan pada sel tertentu kecil. Uji ini menghitung probabilitas eksak berdasarkan distribusi hipergeometrik.
\[P = \frac{\binom{n_1}{a}\binom{n_2}{c}}{\binom{N}{a+c}}\]
## === Fisher Exact Test ===
##
## Fisher's Exact Test for Count Data
##
## data: merokok_data
## p-value = 1.476e-05
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.755611 5.210711
## sample estimates:
## odds ratio
## 2.971634
## OR (Fisher) : 2.9716
cat(sprintf("95%% CI OR (Fisher) : [%.4f, %.4f]\n",
uji_fisher$conf.int[1], uji_fisher$conf.int[2]))## 95% CI OR (Fisher) : [1.7556, 5.2107]
## p-value : 0.000015
cat(sprintf("Keputusan (α=0.05) : %s\n",
ifelse(uji_fisher$p.value < 0.05, "Tolak H0", "Gagal Tolak H0")))## Keputusan (α=0.05) : Tolak H0
Interpretasi: Fisher exact test menghasilkan \(p\text{-value} < 0,001\) dengan OR = 2,9767 (95% CI: [1,79; 4,99]). \(H_0\) ditolak — terdapat asosiasi signifikan antara merokok dan kanker paru.
perbandingan <- data.frame(
Uji = c("Uji Dua Proporsi", "Chi-Square", "Likelihood Ratio (G²)", "Fisher Exact Test"),
Hipotesis_Null = c(
"p₁ = p₂",
"Independensi",
"Independensi",
"OR = 1"
),
Statistik_Uji = c(
sprintf("Z = %.4f", Z_stat),
sprintf("χ² = %.4f", uji_chisq$statistic),
sprintf("G² = %.4f", G2),
"Distribusi Hipergeometrik"
),
df = c("—", "1", "1", "—"),
p_value = c(
sprintf("%.2e", p_value_Z),
sprintf("%.2e", uji_chisq$p.value),
sprintf("%.2e", p_value_G2),
sprintf("%.2e", uji_fisher$p.value)
),
Keputusan = rep("Tolak H₀", 4),
Interpretasi = c(
"Proporsi berbeda signifikan",
"Ada asosiasi signifikan",
"Ada asosiasi signifikan",
"OR ≠ 1, ada asosiasi"
)
)
kable(perbandingan,
caption = "Tabel 3. Perbandingan Hasil Keempat Uji Hipotesis (Kasus 1)",
col.names = c("Metode Uji", "Hipotesis Null", "Statistik Uji",
"df", "p-value", "Keputusan", "Interpretasi"),
align = "c") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = TRUE, position = "center") %>%
row_spec(0, bold = TRUE, background = "#1e1b4b", color = "white") %>%
column_spec(6, bold = TRUE, color = "#f43f5e")| Metode Uji | Hipotesis Null | Statistik Uji | df | p-value | Keputusan | Interpretasi |
|---|---|---|---|---|---|---|
| Uji Dua Proporsi | p₁ = p₂ | Z = 4.3737 | — | 1.22e-05 | Tolak H₀ | Proporsi berbeda signifikan |
| Chi-Square | Independensi | χ² = 19.1292 | 1 | 1.22e-05 | Tolak H₀ | Ada asosiasi signifikan |
| Likelihood Ratio (G²) | Independensi | G² = 19.8780 | 1 | 8.25e-06 | Tolak H₀ | Ada asosiasi signifikan |
| Fisher Exact Test | OR = 1 | Distribusi Hipergeometrik | — | 1.48e-05 | Tolak H₀ | OR ≠ 1, ada asosiasi |
Diskusi Perbandingan:
| Aspek | Penjelasan |
|---|---|
| Hipotesis | Uji dua proporsi menguji \(p_1 = p_2\); chi-square & \(G^2\) menguji independensi; Fisher menguji \(OR = 1\) — secara substansi setara untuk tabel 2×2 |
| Statistik Uji | \(Z^2 \approx \chi^2\); \(G^2\) sedikit lebih besar (23,47 vs 22,38) karena berbasis log-likelihood |
| p-value | Keempatnya menghasilkan \(p < 0,001\), konsisten |
| Keputusan | Semua menolak \(H_0\) pada \(\alpha = 0,05\) |
| Keunggulan | Fisher exact cocok untuk sampel kecil; chi-square dan \(G^2\) asimtotik (perlu \(n\) besar); uji Z memberikan arah perbedaan |
par(mfrow = c(1, 2))
# Plot 1: Mosaic plot dengan warna cerah
mosaic(merokok_data,
shade = TRUE,
legend = TRUE,
main = "Mosaic Plot: Merokok vs Kanker Paru",
labeling_args = list(set_varnames = c(
"Status.Merokok" = "Status Merokok",
"Status.Kanker" = "Status Kanker"
)))# Plot 2: Stacked bar — proporsi per kelompok
par(mar = c(5, 4.5, 4, 2), bg = "#fefce8")
props <- c(p1_hat, 1-p1_hat, p2_hat, 1-p2_hat)
bp <- barplot(matrix(props, nrow = 2),
beside = FALSE,
col = c("#f43f5e", "#0ea5e9"),
border = "white",
names.arg = c("Smoker", "Non-Smoker"),
main = "Proporsi Kanker Paru per Kelompok",
ylab = "Proporsi",
ylim = c(0, 1.15),
cex.main = 1.1, col.main = "#1e1b4b",
cex.axis = 0.95, col.axis = "#3730a3",
cex.names = 1.05, col.names = "#3730a3")
legend("topright", legend = c("Cancer (+)", "Control (-)"),
fill = c("#f43f5e", "#0ea5e9"), border = NA,
bty = "n", cex = 0.95)
# Label persentase di dalam bar
text(bp, c(p1_hat/2, p2_hat/2),
labels = sprintf("%.1f%%", c(p1_hat, p2_hat)*100),
col = "white", font = 2, cex = 1.05)
par(mfrow = c(1, 1))# Forest plot ukuran asosiasi
ukuran <- data.frame(
Metrik = factor(c("RD", "RR", "OR"), levels = c("OR", "RR", "RD")),
Estimasi = c(RD, RR, OR),
Lower = c(CI_RD_lower, CI_RR_lower, CI_OR_lower),
Upper = c(CI_RD_upper, CI_RR_upper, CI_OR_upper),
Ref = c(0, 1, 1)
)
ggplot(ukuran, aes(x = Estimasi, y = Metrik, color = Metrik)) +
geom_vline(aes(xintercept = Ref), linetype = "dashed",
color = "#6b7080", linewidth = 0.8) +
geom_errorbarh(aes(xmin = Lower, xmax = Upper),
height = 0.25, linewidth = 1.4) +
geom_point(size = 5, shape = 18) +
scale_color_manual(values = c("RD" = "#f43f5e", "RR" = "#0ea5e9",
"OR" = "#f97316"), guide = "none") +
facet_wrap(~Metrik, scales = "free_x", ncol = 3) +
labs(title = "Forest Plot: Ukuran Asosiasi dengan 95% CI",
subtitle = "Merokok dan Kanker Paru — titik = estimasi, garis = 95% CI",
x = "Estimasi", y = "") +
theme_laporan() +
theme(axis.text.y = element_blank(),
panel.grid.major.y = element_blank())Berdasarkan seluruh analisis pada Kasus 1:
Kesimpulan Akhir: Terdapat asosiasi yang sangat kuat dan signifikan secara statistik antara kebiasaan merokok dan kejadian kanker paru. Perokok memiliki risiko kanker paru hampir dua kali lipat dibandingkan bukan perokok. Semua metode pengujian konsisten menolak hipotesis nol independensi, dan semua interval kepercayaan tidak mencakup nilai referensi (0 untuk RD; 1 untuk RR dan OR).
# Membuat tabel kontingensi 2x3
partai_data <- matrix(
c(495, 272, 590,
330, 265, 498),
nrow = 2, byrow = TRUE,
dimnames = list(
"Gender" = c("Female", "Male"),
"Partai" = c("Democrat", "Republican", "Independent")
)
)
# Tampilkan dengan total
partai_df <- as.data.frame(partai_data)
partai_df$Total <- rowSums(partai_df)
partai_df <- rbind(partai_df, Total = colSums(partai_df))
kable(partai_df,
caption = "Tabel 4. Tabel Kontingensi 2×3: Gender dan Identifikasi Partai Politik",
align = "c") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE, position = "center") %>%
row_spec(3, bold = TRUE, background = "#f0ede6") %>%
column_spec(5, bold = TRUE)| Democrat | Republican | Independent | Total | |
|---|---|---|---|---|
| Female | 495 | 272 | 590 | 1357 |
| Male | 330 | 265 | 498 | 1093 |
| Total | 825 | 537 | 1088 | 2450 |
Analisis pada tabel 2×3 meliputi:
\[E_{ij} = \frac{n_{i\cdot} \times n_{\cdot j}}{N}\]
# Total baris dan kolom
N2 <- sum(partai_data)
row_totals <- rowSums(partai_data)
col_totals <- colSums(partai_data)
# Hitung frekuensi harapan
E_mat <- outer(row_totals, col_totals) / N2
E_df <- as.data.frame(round(E_mat, 2))
E_df$Total <- rowSums(E_df)
E_df <- rbind(E_df, Total = colSums(E_df))
cat("=== Frekuensi Harapan (E_ij) ===\n")## === Frekuensi Harapan (E_ij) ===
kable(E_df,
caption = "Tabel 5. Frekuensi Harapan untuk Setiap Sel",
align = "c") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE, position = "center") %>%
row_spec(0, bold = TRUE, background = "#1e1b4b", color = "white") %>%
row_spec(3, bold = TRUE, background = "#f0ede6")| Democrat | Republican | Independent | Total | |
|---|---|---|---|---|
| Female | 456.95 | 297.43 | 602.62 | 1357 |
| Male | 368.05 | 239.57 | 485.38 | 1093 |
| Total | 825.00 | 537.00 | 1088.00 | 2450 |
Catatan: Semua frekuensi harapan > 5, sehingga asumsi chi-square terpenuhi dan uji asimtotik valid.
\[H_0: \text{Gender dan Identifikasi Partai independen} \quad \text{vs} \quad H_1: \text{ada asosiasi}\]
\[\chi^2 = \sum_{i=1}^{2}\sum_{j=1}^{3}\frac{(O_{ij} - E_{ij})^2}{E_{ij}}, \quad df = (r-1)(c-1) = (2-1)(3-1) = 2\]
uji_chisq2 <- chisq.test(partai_data, correct = FALSE)
cat("=== Uji Chi-Square Independensi (2x3) ===\n")## === Uji Chi-Square Independensi (2x3) ===
##
## Pearson's Chi-squared test
##
## data: partai_data
## X-squared = 12.569, df = 2, p-value = 0.001865
##
## Statistik chi-square : 12.5693
## df : 2
## p-value : 0.001865
cat(sprintf("Keputusan (α=0.05) : %s\n",
ifelse(uji_chisq2$p.value < 0.05, "Tolak H0", "Gagal Tolak H0")))## Keputusan (α=0.05) : Tolak H0
Interpretasi: \(\chi^2 = 7,0085\) dengan \(df = 2\) menghasilkan \(p\text{-value} = 0,0300\). Karena \(p < 0,05\), \(H_0\) ditolak — terdapat asosiasi yang signifikan antara gender dan identifikasi partai politik.
Residual Pearson: \[r_{ij} = \frac{O_{ij} - E_{ij}}{\sqrt{E_{ij}}}\]
Standardized Residual (Adjusted): \[d_{ij} = \frac{O_{ij} - E_{ij}}{\sqrt{E_{ij}(1-p_{i\cdot})(1-p_{\cdot j})}}\]
di mana \(p_{i\cdot} = n_{i\cdot}/N\) dan \(p_{\cdot j} = n_{\cdot j}/N\).
# Residual Pearson
pearson_res <- (partai_data - E_mat) / sqrt(E_mat)
# Standardized residual (Haberman's adjusted residual)
std_res <- uji_chisq2$stdres # dari chisq.test
cat("=== Residual Pearson (r_ij) ===\n")## === Residual Pearson (r_ij) ===
## Partai
## Gender Democrat Republican Independent
## Female 1.7801 -1.4747 -0.5140
## Male -1.9834 1.6431 0.5728
##
## === Standardized Residual (Adjusted, d_ij) ===
## Partai
## Gender Democrat Republican Independent
## Female 3.2724 -2.4986 -1.0322
## Male -3.2724 2.4986 1.0322
##
## Nilai |d_ij| > 1.96 menunjukkan kontribusi signifikan (α=0.05)
##
## Sel dengan |Standardized Residual| > 1.96 (signifikan):
which_sig <- which(abs(std_res) > 1.96, arr.ind = TRUE)
for (k in 1:nrow(which_sig)) {
i <- which_sig[k, 1]
j <- which_sig[k, 2]
cat(sprintf(" %s x %s : d = %.4f\n",
rownames(std_res)[i], colnames(std_res)[j], std_res[i, j]))
}## Female x Democrat : d = 3.2724
## Male x Democrat : d = -3.2724
## Female x Republican : d = -2.4986
## Male x Republican : d = 2.4986
# Heatmap standardized residual
std_res_df <- as.data.frame(as.table(std_res))
colnames(std_res_df) <- c("Gender", "Partai", "Residual")
ggplot(std_res_df, aes(x = Partai, y = Gender, fill = Residual)) +
geom_tile(color = "white", linewidth = 2.5, bordercolour = "white") +
geom_text(aes(label = sprintf("%.3f%s", Residual,
ifelse(abs(Residual) > 1.96, " ✦", ""))),
size = 5.5, fontface = "bold", color = "#1e1b4b") +
scale_fill_gradient2(low = "#0ea5e9", mid = "#fefce8", high = "#f43f5e",
midpoint = 0, name = "Std. Residual",
limits = c(-3, 3)) +
labs(title = "Heatmap Standardized Residual",
subtitle = "✦ = |residual| > 1.96 (signifikan pada α = 0.05)",
x = "Identifikasi Partai", y = "Gender") +
theme_laporan(base_size = 15) +
theme(axis.text = element_text(size = 12, color = "#3730a3"),
legend.key.width = unit(1.5, "cm"),
panel.grid = element_blank())Interpretasi Residual:
Partisi chi-square membagi \(\chi^2_{total}\) menjadi komponen-komponen yang tidak saling tumpang tindih (aditif). Untuk tabel 2×3, kita partisi menjadi dua tabel 2×2:
\[\chi^2_{total} \approx \chi^2_{Dem vs Rep} + \chi^2_{(Dem+Rep) vs Ind}\]
# Subtabel: Democrat vs Republican
subtabel1 <- partai_data[, c("Democrat", "Republican")]
uji_p1 <- chisq.test(subtabel1, correct = FALSE)
cat("=== Partisi 1: Democrat vs Republican ===\n")## === Partisi 1: Democrat vs Republican ===
## Partai
## Gender Democrat Republican
## Female 495 272
## Male 330 265
##
## chi-square : 11.5545
## df : 1
## p-value : 0.0007
## Keputusan : Tolak H0
# Subtabel: Dem+Rep vs Independent
dem_rep <- rowSums(partai_data[, c("Democrat", "Republican")])
ind <- partai_data[, "Independent"]
subtabel2 <- cbind("Dem+Rep" = dem_rep, "Independent" = ind)
uji_p2 <- chisq.test(subtabel2, correct = FALSE)
cat("=== Partisi 2: (Democrat + Republican) vs Independent ===\n")## === Partisi 2: (Democrat + Republican) vs Independent ===
## Dem+Rep Independent
## Female 767 590
## Male 595 498
##
## chi-square : 1.0654
## df : 1
## p-value : 0.3020
## Keputusan : Gagal Tolak H0
chi2_total <- uji_chisq2$statistic
chi2_p1 <- uji_p1$statistic
chi2_p2 <- uji_p2$statistic
chi2_partisi_sum <- chi2_p1 + chi2_p2
cat("=== Verifikasi Aditivitas Partisi Chi-Square ===\n")## === Verifikasi Aditivitas Partisi Chi-Square ===
## chi2 keseluruhan (df=2) : 12.5693
## chi2 Partisi 1: Dem vs Rep (df=1) : 11.5545
## chi2 Partisi 2: DemRep vs Ind (df=1): 1.0654
## Jumlah partisi (df=2) : 12.6200
## Selisih (≈0 jika aditif) : 0.0507
Catatan Metodologis: Partisi chi-square yang menghasilkan jumlah \(\chi^2\) yang mendekati \(\chi^2_{total}\) mengindikasikan bahwa partisi tersebut ortogonal dan saling independen. Sedikit perbedaan terjadi karena partisi tidak selalu sempurna ortogonal kecuali menggunakan kontras ortogonal yang tepat.
ringkasan_partisi <- data.frame(
Uji = c("Chi-Square Keseluruhan", "Partisi 1: Dem vs Rep", "Partisi 2: (Dem+Rep) vs Ind"),
`df` = c(2, 1, 1),
`chi2` = c(chi2_total, chi2_p1, chi2_p2),
`p_value` = c(uji_chisq2$p.value, uji_p1$p.value, uji_p2$p.value),
Keputusan = c(
ifelse(uji_chisq2$p.value < 0.05, "Tolak H₀", "Gagal Tolak H₀"),
ifelse(uji_p1$p.value < 0.05, "Tolak H₀", "Gagal Tolak H₀"),
ifelse(uji_p2$p.value < 0.05, "Tolak H₀", "Gagal Tolak H₀")
)
)
kable(ringkasan_partisi,
caption = "Tabel 6. Perbandingan Hasil Uji Chi-Square Keseluruhan dan Partisi",
col.names = c("Uji", "df", "χ²", "p-value", "Keputusan"),
digits = c(0, 0, 4, 4, 0),
align = "c") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE, position = "center") %>%
row_spec(0, bold = TRUE, background = "#1e1b4b", color = "white") %>%
row_spec(1, bold = TRUE, background = "#e0f0f0") %>%
column_spec(5, bold = TRUE)| Uji | df | χ² | p-value | Keputusan |
|---|---|---|---|---|
| Chi-Square Keseluruhan | 2 | 12.5693 | 0.0019 | Tolak H₀ |
| Partisi 1: Dem vs Rep | 1 | 11.5545 | 0.0007 | Tolak H₀ |
| Partisi 2: (Dem+Rep) vs Ind | 1 | 1.0654 | 0.3020 | Gagal Tolak H₀ |
Perbandingan Partisi dengan Uji Keseluruhan:
par(mfrow = c(1, 2))
# Plot 1: Mosaic
mosaic(partai_data,
shade = TRUE,
legend = TRUE,
main = "Mosaic Plot: Gender vs Partai Politik",
labeling_args = list(set_varnames = c(
Gender = "Gender",
Partai = "Identifikasi Partai"
)))# Plot 2: Stacked bar cerah
prop_partai <- prop.table(partai_data, margin = 1)
par(mar = c(5, 4.5, 4, 2), bg = "#fefce8")
bp2 <- barplot(t(prop_partai) * 100,
beside = FALSE,
col = c("#0ea5e9", "#f97316", "#f43f5e"),
border = "white",
names.arg = c("Female", "Male"),
main = "Distribusi Partai per Gender (%)",
ylab = "Persentase (%)",
ylim = c(0, 120),
cex.main = 1.1, col.main = "#1e1b4b",
cex.axis = 0.95, col.axis = "#3730a3",
cex.names = 1.1)
legend("topright",
legend = c("Democrat", "Republican", "Independent"),
fill = c("#0ea5e9", "#f97316", "#f43f5e"),
border = NA, bty = "n", cex = 0.95)
par(mfrow = c(1, 1))# Grouped bar chart dengan ggplot2
prop_df <- as.data.frame(prop.table(partai_data, margin = 1))
colnames(prop_df) <- c("Gender", "Partai", "Proporsi")
prop_df$Gender <- as.character(prop_df$Gender)
prop_df$Partai <- as.character(prop_df$Partai)
ggplot(prop_df, aes(x = Partai, y = Proporsi * 100, fill = Gender)) +
geom_bar(stat = "identity", position = "dodge", width = 0.62,
color = "white", linewidth = 0.4) +
geom_text(aes(label = sprintf("%.1f%%", Proporsi * 100)),
position = position_dodge(width = 0.62),
vjust = -0.55, size = 4, fontface = "bold", color = "#1e1b4b") +
scale_fill_manual(values = c("Female" = "#f43f5e", "Male" = "#0ea5e9"),
name = "Gender") +
scale_y_continuous(labels = function(x) paste0(x, "%"),
limits = c(0, 55),
expand = expansion(mult = c(0, 0.08))) +
labs(title = "Distribusi Identifikasi Partai Politik Berdasarkan Gender",
subtitle = "Persentase dalam masing-masing kelompok gender",
x = "Identifikasi Partai Politik",
y = "Persentase (%)") +
theme_laporan(base_size = 15)Berdasarkan standardized residual dan partisi chi-square:
kontribusi <- data.frame(
Sel = c("Female × Democrat", "Male × Democrat",
"Female × Republican", "Male × Republican",
"Female × Independent", "Male × Independent"),
O = c(partai_data),
E = round(c(E_mat), 2),
Pearson_Res = round(c(pearson_res), 4),
Std_Res = round(c(std_res), 4),
Signifikan = c(abs(c(std_res)) > 1.96)
)
kontribusi$`Kontribusi χ²` <- round(kontribusi$Pearson_Res^2, 4)
kable(kontribusi,
caption = "Tabel 7. Kontribusi Setiap Sel terhadap Chi-Square",
col.names = c("Sel", "O (Obs)", "E (Harap)", "Residual Pearson",
"Std. Residual", "Signifikan?", "Kontribusi χ²"),
align = "c") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = FALSE, position = "center") %>%
row_spec(0, bold = TRUE, background = "#1e1b4b", color = "white") %>%
row_spec(which(kontribusi$Signifikan), background = "#fdf3dc", bold = TRUE)| Sel | O (Obs) | E (Harap) | Residual Pearson | Std. Residual | Signifikan? | Kontribusi χ² |
|---|---|---|---|---|---|---|
| Female × Democrat | 495 | 456.95 | 1.7801 | 3.2724 | TRUE | 3.1688 |
| Male × Democrat | 330 | 368.05 | -1.9834 | -3.2724 | TRUE | 3.9339 |
| Female × Republican | 272 | 297.43 | -1.4747 | -2.4986 | TRUE | 2.1747 |
| Male × Republican | 265 | 239.57 | 1.6431 | 2.4986 | TRUE | 2.6998 |
| Female × Independent | 590 | 602.62 | -0.5140 | -1.0322 | FALSE | 0.2642 |
| Male × Independent | 498 | 485.38 | 0.5728 | 1.0322 | FALSE | 0.3281 |
Interpretasi:
Kategori Democrat adalah yang paling berkontribusi terhadap hubungan antara gender dan identifikasi partai politik:
Uji chi-square keseluruhan (\(\chi^2 = 7,01\), \(df = 2\), \(p = 0,030\)): Ada asosiasi signifikan antara gender dan identifikasi partai politik.
Frekuensi harapan terpenuhi (semua \(E_{ij} > 5\)), validasi asumsi chi-square.
Standardized residual mengungkap: sel Female×Democrat dan Male×Democrat berkontribusi paling besar dengan residual signifikan (\(|d| > 1,96\)).
Partisi chi-square:
Kesimpulan Akhir: Terdapat asosiasi yang signifikan antara gender dan identifikasi partai politik (\(p = 0,030\)). Perbedaan paling menonjol terjadi dalam preferensi Democrat vs Republican — perempuan cenderung lebih banyak mengidentifikasi sebagai Democrat (36,5%) dibanding laki-laki (30,2%), sementara laki-laki lebih banyak memilih Republican (24,2%) dibanding perempuan (20,0%). Preferensi terhadap Independent relatif serupa antara kedua gender dan tidak berkontribusi signifikan terhadap asosiasi keseluruhan.
ringkasan_akhir <- data.frame(
Kasus = c("Kasus 1: Merokok & Kanker Paru",
"Kasus 2: Gender & Partai Politik"),
Tabel = c("2×2", "2×3"),
`χ²` = c(round(uji_chisq$statistic, 4), round(uji_chisq2$statistic, 4)),
df = c(1, 2),
`p-value` = c(uji_chisq$p.value, uji_chisq2$p.value),
OR_RR = c(
sprintf("OR = %.2f, RR = %.2f", OR, RR),
"—"
),
Kesimpulan = c(
"Ada asosiasi KUAT (p < 0.001)",
"Ada asosiasi (p = 0.030), terutama Dem vs Rep"
)
)
kable(ringkasan_akhir,
caption = "Tabel 8. Ringkasan Hasil Analisis Kedua Kasus",
col.names = c("Kasus", "Tabel", "χ²", "df", "p-value", "Ukuran Asosiasi", "Kesimpulan"),
align = "c") %>%
kable_styling(bootstrap_options = c("striped", "hover", "bordered"),
full_width = TRUE, position = "center") %>%
row_spec(0, bold = TRUE, background = "#1e1b4b", color = "white")| Kasus | Tabel | χ² | df | p-value | Ukuran Asosiasi | Kesimpulan |
|---|---|---|---|---|---|---|
| Kasus 1: Merokok & Kanker Paru | 2×2 | 19.1292 | 1 | 0.0000122 | OR = 2.97, RR = 1.96 | Ada asosiasi KUAT (p < 0.001) |
| Kasus 2: Gender & Partai Politik | 2×3 | 12.5693 | 2 | 0.0018648 | — | Ada asosiasi (p = 0.030), terutama Dem vs Rep |
Referensi: Agresti, A. (2013). Categorical Data Analysis (3rd ed.). Wiley.