packages <- c("nnet", "MASS", "ggplot2", "dplyr", "tidyr",
"broom", "RColorBrewer", "pscl", "AER",
"lmtest", "gridExtra", "knitr", "kableExtra")
for (pkg in packages) {
if (!requireNamespace(pkg, quietly = TRUE)) {
install.packages(pkg, repos = "https://cloud.r-project.org")
}
}
library(nnet); library(MASS); library(ggplot2)
library(dplyr); library(tidyr); library(broom)
library(RColorBrewer); library(pscl); library(AER)
library(lmtest); library(gridExtra); library(knitr); library(kableExtra)Dataset ini berisi data dari 200 siswa yang
dikumpulkan untuk mengetahui pengaruh status sosial ekonomi
(ses) dan kemampuan menulis (write) terhadap
jenis program pendidikan (prog) yang diikuti siswa.
| Variabel | Tipe | Keterangan |
|---|---|---|
prog |
Respon | Jenis program: General, Academic, Vocational |
ses |
Prediktor | Status sosial ekonomi: Low, Middle, High |
write |
Prediktor | Skor kemampuan menulis (kontinu) |
data_multi <- data.frame(
ses = c(
"low","middle","high","low","middle","high","middle","middle","middle","middle",
"low","high","low","middle","low","low","low","middle","middle","high",
"middle","middle","low","low","low","high","low","middle","low","low",
"middle","low","low","low","middle","middle","middle","high","low","middle",
"middle","high","low","high","low","low","middle","low","middle","middle",
"middle","middle","low","low","middle","low","high","high","low","high",
"middle","middle","high","middle","middle","middle","low","middle","middle","middle",
"high","middle","high","low","middle","low","middle","middle","middle","middle",
"low","middle","high","low","high","low","middle","middle","middle","middle",
"high","high","high","middle","middle","middle","middle","middle","middle","middle",
"middle","middle","middle","middle","middle","low","low","middle","middle","middle",
"low","middle","middle","middle","middle","middle","high","high","middle","middle",
"low","middle","middle","high","middle","high","middle","middle","middle","high",
"middle","high","low","high","middle","high","middle","high","low","high",
"high","low","middle","middle","middle","middle","middle","low","middle","high",
"middle","high","middle","middle","low","middle","high","high","low","high",
"high","high","low","high","middle","middle","high","high","high","high",
"low","high","low","high","high","low","middle","low","low","high",
"high","middle","high","middle","high","high","high","high","high","middle",
"high","high","middle","high","high","high","middle","middle","middle","middle"),
write = c(
35,33,39,37,31,36,36,31,41,37,44,33,31,44,35,44,46,46,46,49,
39,44,47,44,37,38,49,44,41,50,44,39,40,46,41,39,33,59,41,47,
31,42,55,40,44,54,46,54,42,49,41,41,44,57,52,49,41,57,62,33,
54,40,52,57,49,46,57,49,52,53,54,57,41,52,57,54,52,52,44,46,
49,54,52,44,49,46,54,39,59,45,52,54,44,50,62,59,52,52,46,41,
52,52,55,41,49,59,54,54,59,55,62,54,54,54,59,57,61,52,59,62,
60,59,65,61,59,59,59,59,65,57,59,49,49,59,62,59,54,63,43,54,
52,57,57,57,61,62,62,65,57,59,59,62,65,62,62,62,54,62,65,62,
67,65,60,59,59,59,65,62,65,59,59,61,65,67,65,65,67,65,62,52,
63,59,65,59,67,67,60,67,62,54,65,62,59,60,63,65,63,67,65,62),
prog = c(
"vocation","general","vocation","vocation","vocation","general","vocation","vocation","vocation","vocation",
"vocation","academic","vocation","vocation","vocation","general","general","vocation","academic","vocation",
"general","vocation","vocation","vocation","academic","academic","general","general","academic","academic",
"general","vocation","academic","academic","vocation","vocation","vocation","academic","general","academic",
"general","academic","academic","vocation","academic","vocation","vocation","general","vocation","academic",
"academic","vocation","general","academic","academic","general","academic","general","vocation","general",
"vocation","academic","academic","vocation","vocation","vocation","general","academic","academic","general",
"academic","academic","academic","general","vocation","general","vocation","general","general","academic",
"vocation","academic","academic","general","vocation","academic","general","general","general","vocation",
"vocation","general","vocation","academic","vocation","general","academic","vocation","vocation","general",
"academic","vocation","vocation","vocation","general","academic","general","general","academic","academic",
"academic","academic","academic","academic","academic","vocation","academic","academic","academic","academic",
"vocation","academic","academic","academic","academic","academic","academic","academic","academic","academic",
"academic","academic","general","academic","academic","general","academic","academic","academic","academic",
"academic","academic","general","general","academic","general","general","general","academic","academic",
"academic","academic","academic","vocation","general","academic","academic","academic","academic","academic",
"general","general","academic","academic","academic","vocation","general","academic","academic","general",
"general","academic","academic","vocation","academic","academic","academic","academic","academic","academic",
"academic","academic","academic","general","academic","academic","academic","academic","academic","academic",
"academic","academic","academic","academic","academic","academic","vocation","academic","academic","academic")
)
data_multi$ses <- factor(data_multi$ses, levels = c("low","middle","high"))
data_multi$prog <- factor(data_multi$prog, levels = c("general","academic","vocation"))
cat("Dimensi data:", nrow(data_multi), "baris x", ncol(data_multi), "kolom\n")## Dimensi data: 200 baris x 3 kolom
# Tabel frekuensi prog
kable(as.data.frame(table(Program = data_multi$prog)),
caption = "Distribusi Jenis Program") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Program | Freq |
|---|---|
| general | 45 |
| academic | 105 |
| vocation | 50 |
# Tabel frekuensi ses
kable(as.data.frame(table(SES = data_multi$ses)),
caption = "Distribusi Status Sosial Ekonomi") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| SES | Freq |
|---|---|
| low | 47 |
| middle | 95 |
| high | 58 |
# Ringkasan write
kable(data.frame(
Min = min(data_multi$write),
Q1 = quantile(data_multi$write, 0.25),
Median = median(data_multi$write),
Mean = round(mean(data_multi$write), 2),
Q3 = quantile(data_multi$write, 0.75),
Max = max(data_multi$write)
), caption = "Statistik Write Score") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Min | Q1 | Median | Mean | Q3 | Max | |
|---|---|---|---|---|---|---|
| 25% | 31 | 45.75 | 54 | 52.77 | 60 | 67 |
ggplot(data_multi, aes(x = ses, fill = prog)) +
geom_bar(position = "dodge", color = "white") +
scale_fill_brewer(palette = "Set2", name = "Program") +
labs(title = "Distribusi Jenis Program per Status Sosial Ekonomi",
x = "SES", y = "Frekuensi") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))Distribusi program per SES
ggplot(data_multi, aes(x = prog, y = write, fill = prog)) +
geom_boxplot(alpha = 0.7) +
scale_fill_brewer(palette = "Set2") +
labs(title = "Distribusi Write Score per Jenis Program",
x = "Program", y = "Write Score") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold", hjust = 0.5),
legend.position = "none")Write score per program
# Kategori referensi: "general"
model_multi <- multinom(prog ~ ses + write, data = data_multi, trace = FALSE)
summary(model_multi)## Call:
## multinom(formula = prog ~ ses + write, data = data_multi, trace = FALSE)
##
## Coefficients:
## (Intercept) sesmiddle seshigh write
## academic -2.851973 0.5332914 1.1628257 0.05792480
## vocation 2.366097 0.8246384 0.1802176 -0.05567514
##
## Std. Errors:
## (Intercept) sesmiddle seshigh write
## academic 1.166437 0.4437319 0.5142215 0.02141092
## vocation 1.174251 0.4901237 0.6484508 0.02333135
##
## Residual Deviance: 359.9635
## AIC: 375.9635
z_multi <- summary(model_multi)$coefficients / summary(model_multi)$standard.errors
p_multi <- (1 - pnorm(abs(z_multi), 0, 1)) * 2
kable(round(p_multi, 4), caption = "P-value Koefisien") %>%
kable_styling(bootstrap_options = c("striped","hover"))| (Intercept) | sesmiddle | seshigh | write | |
|---|---|---|---|---|
| academic | 0.0145 | 0.2294 | 0.0237 | 0.0068 |
| vocation | 0.0439 | 0.0925 | 0.7811 | 0.0170 |
or_multi <- exp(coef(model_multi))
ci_multi <- exp(confint(model_multi))
kable(round(or_multi, 4), caption = "Odds Ratio") %>%
kable_styling(bootstrap_options = c("striped","hover"))| (Intercept) | sesmiddle | seshigh | write | |
|---|---|---|---|---|
| academic | 0.0577 | 1.7045 | 3.1990 | 1.0596 |
| vocation | 10.6557 | 2.2811 | 1.1975 | 0.9458 |
null_multi <- multinom(prog ~ 1, data = data_multi, trace = FALSE)
lr_test <- anova(null_multi, model_multi)
mcfadden_m <- as.numeric(1 - logLik(model_multi)/logLik(null_multi))
cat("AIC :", round(AIC(model_multi), 2), "\n")## AIC : 375.96
## BIC : 402.35
## Pseudo R² : 0.1182
## Likelihood ratio tests of Multinomial Models
##
## Response: prog
## Model Resid. df Resid. Dev Test Df LR stat. Pr(Chi)
## 1 1 398 408.1933
## 2 ses + write 392 359.9635 1 vs 2 6 48.2299 1.063001e-08
pred_multi <- predict(model_multi, type = "class")
conf_mat <- table(Aktual = data_multi$prog, Prediksi = pred_multi)
kable(conf_mat, caption = "Confusion Matrix") %>%
kable_styling(bootstrap_options = c("striped","hover"))| general | academic | vocation | |
|---|---|---|---|
| general | 7 | 27 | 11 |
| academic | 4 | 92 | 9 |
| vocation | 4 | 23 | 23 |
## Akurasi: 61 %
write_seq <- seq(min(data_multi$write), max(data_multi$write), length.out = 100)
pred_df <- expand.grid(write = write_seq, ses = levels(data_multi$ses))
prob_pred <- predict(model_multi, newdata = pred_df, type = "probs")
pred_df <- cbind(pred_df, prob_pred)
pred_long <- pivot_longer(pred_df, cols = c("general","academic","vocation"),
names_to = "prog", values_to = "prob")
ggplot(pred_long, aes(x = write, y = prob, color = prog)) +
geom_line(linewidth = 1.1) +
facet_wrap(~ ses, labeller = labeller(
ses = c(low = "SES: Low", middle = "SES: Middle", high = "SES: High"))) +
scale_color_brewer(palette = "Set2", name = "Program") +
labs(title = "Probabilitas Prediksi berdasarkan Write Score & SES",
x = "Write Score", y = "Probabilitas") +
theme_minimal(base_size = 12) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))Probabilitas prediksi per write score & SES
Data tabel kontingensi tiga arah tentang kepercayaan terhadap kehidupan setelah kematian (afterlife).
| Variabel | Tipe | Keterangan |
|---|---|---|
opinion |
Respon | yes > undecided > no (ordinal) |
race |
Prediktor | white / black |
gender |
Prediktor | female / male |
data_ord_raw <- data.frame(
race = c(rep("white",3), rep("white",3), rep("black",3), rep("black",3)),
gender = c(rep("female",3), rep("male",3), rep("female",3), rep("male",3)),
opinion= rep(c("yes","undecided","no"), 4),
n = c(371,49,74, 250,45,71, 64,9,15, 25,5,13)
)
# Expand ke unit record
data_ord <- data_ord_raw[rep(seq_len(nrow(data_ord_raw)), data_ord_raw$n),
c("race","gender","opinion")]
rownames(data_ord) <- NULL
data_ord$opinion <- factor(data_ord$opinion,
levels = c("no","undecided","yes"), ordered = TRUE)
data_ord$race <- factor(data_ord$race, levels = c("white","black"))
data_ord$gender <- factor(data_ord$gender, levels = c("female","male"))
cat("Total observasi:", nrow(data_ord), "\n")## Total observasi: 991
kable(as.data.frame(table(Opinion = data_ord$opinion)),
caption = "Distribusi Opinion") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Opinion | Freq |
|---|---|
| no | 173 |
| undecided | 108 |
| yes | 710 |
kable(as.data.frame(ftable(data_ord$race, data_ord$gender, data_ord$opinion)),
caption = "Tabel Kontingensi 3 Arah") %>%
kable_styling(bootstrap_options = c("striped","hover"))| Var1 | Var2 | Var3 | Freq |
|---|---|---|---|
| white | female | no | 74 |
| black | female | no | 15 |
| white | male | no | 71 |
| black | male | no | 13 |
| white | female | undecided | 49 |
| black | female | undecided | 9 |
| white | male | undecided | 45 |
| black | male | undecided | 5 |
| white | female | yes | 371 |
| black | female | yes | 64 |
| white | male | yes | 250 |
| black | male | yes | 25 |
data_ord_raw$opinion <- factor(data_ord_raw$opinion, levels = c("no","undecided","yes"))
ggplot(data_ord_raw, aes(x = gender, y = n, fill = opinion)) +
geom_bar(stat = "identity", position = "fill", color = "white") +
facet_wrap(~ race) +
scale_fill_manual(
values = c("no"="#d73027","undecided"="#fee08b","yes"="#1a9850"),
name = "Opinion") +
labs(title = "Proporsi Pendapat tentang Afterlife berdasarkan Ras dan Gender",
x = "Gender", y = "Proporsi") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))Proporsi opinion per race dan gender
## Call:
## polr(formula = opinion ~ race + gender, data = data_ord, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## raceblack -0.2657 0.2003 -1.327
## gendermale -0.3786 0.1407 -2.691
##
## Intercepts:
## Value Std. Error t value
## no|undecided -1.7579 0.1116 -15.7487
## undecided|yes -1.1266 0.1008 -11.1765
##
## Residual Deviance: 1547.736
## AIC: 1555.736
ct_ord <- coef(summary(model_ord))
p_ord <- pnorm(abs(ct_ord[,"t value"]), lower.tail = FALSE) * 2
ct_out <- cbind(round(ct_ord, 4), "p.value" = round(p_ord, 5))
kable(ct_out, caption = "Koefisien, SE, t-value, dan P-value") %>%
kable_styling(bootstrap_options = c("striped","hover"))| Value | Std. Error | t value | p.value | |
|---|---|---|---|---|
| raceblack | -0.2657 | 0.2003 | -1.3268 | 0.18457 |
| gendermale | -0.3786 | 0.1407 | -2.6911 | 0.00712 |
| no|undecided | -1.7579 | 0.1116 | -15.7487 | 0.00000 |
| undecided|yes | -1.1266 | 0.1008 | -11.1765 | 0.00000 |
or_ord <- exp(coef(model_ord))
ci_ord <- exp(confint(model_ord))
or_tbl <- data.frame(OR = round(or_ord, 4),
Lower95 = round(ci_ord[,1], 4),
Upper95 = round(ci_ord[,2], 4))
kable(or_tbl, caption = "Odds Ratio dan Interval Kepercayaan 95%") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| OR | Lower95 | Upper95 | |
|---|---|---|---|
| raceblack | 0.7666 | 0.5210 | 1.1444 |
| gendermale | 0.6849 | 0.5198 | 0.9025 |
null_ord <- polr(opinion ~ 1, data = data_ord, Hess = TRUE)
mcfadden_o <- as.numeric(1 - logLik(model_ord)/logLik(null_ord))
cat("AIC:", round(AIC(model_ord), 2), "\n")## AIC: 1555.74
## BIC: 1575.33
## Pseudo R² McFadden: 0.0054
## Likelihood ratio tests of ordinal regression models
##
## Response: opinion
## Model Resid. df Resid. Dev Test Df LR stat. Pr(Chi)
## 1 1 989 1556.197
## 2 race + gender 987 1547.736 1 vs 2 2 8.460807 0.01454652
new_data_ord <- expand.grid(race = c("white","black"), gender = c("female","male"))
prob_ord <- predict(model_ord, newdata = new_data_ord, type = "probs")
prob_ord_df <- cbind(new_data_ord, prob_ord)
prob_long <- pivot_longer(prob_ord_df, cols = c("no","undecided","yes"),
names_to = "opinion", values_to = "prob")
prob_long$opinion <- factor(prob_long$opinion, levels = c("no","undecided","yes"))
prob_long$group <- paste(prob_long$race, prob_long$gender, sep = " - ")
ggplot(prob_long, aes(x = group, y = prob, fill = opinion)) +
geom_bar(stat = "identity", color = "white") +
scale_fill_manual(
values = c("no"="#d73027","undecided"="#fee08b","yes"="#1a9850"),
name = "Opinion") +
labs(title = "Probabilitas Prediksi Ordinal per Kelompok",
x = "Kelompok", y = "Probabilitas") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold", hjust = 0.5),
axis.text.x = element_text(angle = 20, hjust = 1))Probabilitas prediksi ordinal per kelompok
or_df_ord <- data.frame(
term = names(or_ord),
OR = or_ord,
lower = ci_ord[,1],
upper = ci_ord[,2]
)
ggplot(or_df_ord, aes(x = term, y = OR)) +
geom_point(size = 4, color = "#2166ac") +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, color = "#2166ac") +
geom_hline(yintercept = 1, linetype = "dashed", color = "red") +
labs(title = "Odds Ratio dengan CI 95% — Regresi Ordinal",
x = "Prediktor", y = "Odds Ratio") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))Forest plot OR regresi ordinal
Dataset Horseshoe Crabs (n = 173). Variabel respon adalah
sat (jumlah satelit — data cacahan), dengan prediktor
berupa karakteristik fisik kepiting.
| Variabel | Tipe | Keterangan |
|---|---|---|
sat |
Respon | Jumlah kepiting jantan satelit (cacahan ≥ 0) |
weight |
Prediktor | Berat kepiting betina (kg) |
width |
Prediktor | Lebar karapas (cm) |
color |
Prediktor | Warna cangkang (1–4) |
spine |
Prediktor | Kondisi duri (1–3) |
crabs_raw <- "crab,sat,weight,width,color,spine
1,8,3.050,28.3,2,3
2,0,1.550,22.5,3,3
3,9,2.300,26.0,1,1
4,0,2.100,24.8,3,3
5,4,2.600,26.0,3,3
6,0,2.100,23.8,2,3
7,0,2.350,26.5,1,1
8,0,1.900,24.7,3,2
9,0,1.950,23.7,2,1
10,0,2.150,25.6,3,3
11,0,2.150,24.3,3,3
12,0,2.650,25.8,2,3
13,11,3.050,28.2,2,3
14,0,1.850,21.0,4,2
15,14,2.300,26.0,2,1
16,8,2.950,27.1,1,1
17,1,2.000,25.2,2,3
18,1,3.000,29.0,2,3
19,0,2.200,24.7,4,3
20,5,2.700,27.4,2,3
21,4,1.950,23.2,2,2
22,3,2.300,25.0,1,2
23,1,1.600,22.5,2,1
24,2,2.600,26.7,3,3
25,3,2.000,25.8,4,3
26,0,1.300,26.2,4,3
27,3,3.150,28.7,2,3
28,5,2.700,26.8,2,1
29,0,2.600,27.5,4,3
30,0,2.100,24.9,2,3
31,4,3.200,29.3,1,1
32,0,2.600,25.8,1,3
33,0,2.000,25.7,2,2
34,8,2.000,25.7,2,1
35,5,2.700,26.7,2,1
36,0,1.850,23.7,4,3
37,0,2.650,26.8,2,3
38,6,3.150,27.5,2,3
39,0,1.900,23.4,4,3
40,6,2.800,27.9,2,3
41,3,3.100,27.5,3,3
42,5,2.800,26.1,1,1
43,6,2.500,27.7,1,1
44,5,3.300,30.0,2,1
45,9,3.250,28.5,3,1
46,4,2.800,28.9,3,3
47,6,2.600,28.2,2,3
48,4,2.100,25.0,2,3
49,3,3.000,28.5,2,3
50,3,3.600,30.3,2,1
51,5,2.100,24.7,4,3
52,5,2.900,27.7,2,3
53,6,2.700,27.4,1,1
54,4,1.600,22.9,2,3
55,5,2.000,25.7,2,1
56,15,3.000,28.3,2,3
57,3,2.700,27.2,2,3
58,3,2.300,26.2,3,3
59,0,2.750,27.8,2,1
60,0,2.250,25.5,4,3
61,0,2.550,27.1,3,3
62,5,2.050,24.5,3,3
63,3,2.450,27.0,3,1
64,5,2.150,26.0,2,3
65,1,2.800,28.0,2,3
66,8,3.050,30.0,2,3
67,10,3.200,29.0,2,3
68,0,2.400,26.2,2,3
69,0,1.300,26.5,2,1
70,3,2.400,26.2,2,3
71,7,2.800,25.6,3,3
72,1,1.650,23.0,3,3
73,0,1.800,23.0,3,3
74,6,2.250,25.4,2,3
75,0,1.900,24.2,3,3
76,0,1.600,22.9,2,2
77,3,2.200,26.0,3,2
78,4,2.250,25.4,2,3
79,0,1.200,25.7,3,3
80,5,2.100,25.1,2,3
81,0,2.250,24.5,3,2
82,0,2.900,27.5,4,3
83,0,1.650,23.1,3,3
84,4,2.550,25.9,3,1
85,0,2.300,25.8,2,3
86,3,2.250,27.0,4,3
87,0,3.050,28.5,2,3
88,0,2.750,25.5,4,1
89,0,1.900,23.5,4,3
90,0,1.700,24.0,2,2
91,5,3.850,29.7,2,1
92,0,2.550,26.8,2,1
93,0,2.450,26.7,4,3
94,0,3.200,28.7,2,1
95,0,1.550,23.1,3,3
96,1,2.800,29.0,2,1
97,0,2.250,25.5,3,3
98,1,1.967,26.5,3,3
99,1,2.200,24.5,3,3
100,1,3.000,28.5,3,3
101,1,2.867,28.2,2,3
102,1,1.600,24.5,2,3
103,1,2.550,27.5,2,3
104,4,2.550,24.7,2,2
105,1,2.000,25.2,2,1
106,1,2.900,27.3,3,3
107,1,2.400,26.3,2,3
108,1,3.100,29.0,2,3
109,2,1.900,25.3,2,3
110,4,2.300,26.5,2,3
111,3,3.250,27.8,2,3
112,6,2.500,27.0,2,3
113,0,2.100,25.7,3,3
114,2,2.100,25.0,2,3
115,2,3.325,31.9,2,3
116,0,1.800,23.7,4,3
117,12,3.225,29.3,4,3
118,0,1.400,22.0,3,3
119,5,2.400,25.0,2,3
120,6,2.500,27.0,3,3
121,6,1.800,23.8,3,3
122,2,3.275,30.2,1,1
123,0,2.225,26.2,3,3
124,2,1.650,24.2,2,3
125,3,2.900,27.4,2,3
126,0,2.300,25.4,2,2
127,3,3.200,28.4,3,3
128,4,1.475,22.5,4,3
129,2,2.025,26.2,2,3
130,6,2.300,24.9,2,1
131,6,1.950,24.5,1,2
132,0,1.800,25.1,2,3
133,4,2.900,28.0,2,1
134,10,2.250,25.8,4,3
135,7,3.050,27.9,2,3
136,0,2.200,24.9,2,3
137,5,3.100,28.4,2,1
138,5,2.400,27.2,3,3
139,6,2.250,25.0,2,2
140,6,2.625,27.5,2,3
141,7,5.200,33.5,2,1
142,3,3.325,30.5,2,3
143,3,2.925,29.0,3,3
144,0,2.000,24.3,2,1
145,0,2.400,25.8,2,3
146,8,2.100,25.0,4,3
147,4,3.725,31.7,2,1
148,4,3.025,29.5,2,3
149,10,1.900,24.0,3,3
150,9,3.000,30.0,2,3
151,4,2.850,27.6,2,3
152,0,2.300,26.2,2,3
153,0,2.000,23.1,2,1
154,0,1.600,22.9,2,1
155,0,1.900,24.5,4,3
156,4,1.950,24.7,2,3
157,0,3.200,28.3,2,3
158,2,1.850,23.9,2,3
159,0,1.800,23.8,3,3
160,4,3.500,29.8,3,2
161,4,2.350,26.5,2,3
162,3,2.275,26.0,2,3
163,8,3.050,28.2,2,3
164,0,2.150,25.7,4,3
165,7,2.750,26.5,2,3
166,0,2.200,25.8,2,3
167,0,1.800,24.1,3,3
168,2,2.175,26.2,3,3
169,3,2.750,26.1,3,3
170,4,3.275,29.0,3,3
171,0,2.625,28.0,1,1
172,0,2.625,27.0,4,3
173,0,2.000,24.5,2,2"
data_crabs <- read.csv(text = crabs_raw)
data_crabs$color <- factor(data_crabs$color, labels = c("light","medium","dark","darker"))
data_crabs$spine <- factor(data_crabs$spine, labels = c("both_good","one_worn","both_worn"))
cat("Dimensi data:", nrow(data_crabs), "baris x", ncol(data_crabs), "kolom\n")## Dimensi data: 173 baris x 6 kolom
kable(summary(data_crabs[, c("sat","weight","width")]),
caption = "Statistik Deskriptif Variabel Kontinu") %>%
kable_styling(bootstrap_options = c("striped","hover"))| sat | weight | width | |
|---|---|---|---|
| Min. : 0.000 | Min. :1.200 | Min. :21.0 | |
| 1st Qu.: 0.000 | 1st Qu.:2.000 | 1st Qu.:24.9 | |
| Median : 2.000 | Median :2.350 | Median :26.1 | |
| Mean : 2.919 | Mean :2.437 | Mean :26.3 | |
| 3rd Qu.: 5.000 | 3rd Qu.:2.850 | 3rd Qu.:27.7 | |
| Max. :15.000 | Max. :5.200 | Max. :33.5 |
cat("Mean sat:", round(mean(data_crabs$sat), 3),
"| Var sat:", round(var(data_crabs$sat), 3),
"| Rasio Var/Mean:", round(var(data_crabs$sat)/mean(data_crabs$sat), 3), "\n")## Mean sat: 2.919 | Var sat: 9.912 | Rasio Var/Mean: 3.396
ggplot(data_crabs, aes(x = sat)) +
geom_histogram(bins = 16, fill = "#4393c3", color = "white") +
geom_vline(aes(xintercept = mean(sat)), color = "red",
linetype = "dashed", linewidth = 1) +
labs(title = "Distribusi Jumlah Satelit (sat)",
subtitle = paste("Mean =", round(mean(data_crabs$sat), 2)),
x = "Jumlah Satelit", y = "Frekuensi") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))Distribusi jumlah satelit
p_w <- ggplot(data_crabs, aes(x = weight, y = sat)) +
geom_point(alpha = 0.5, color = "#2c7bb6") +
geom_smooth(method = "loess", color = "red", se = TRUE) +
labs(title = "Sat vs Weight", x = "Weight (kg)", y = "Jumlah Satelit") +
theme_minimal()
p_wd <- ggplot(data_crabs, aes(x = width, y = sat)) +
geom_point(alpha = 0.5, color = "#1a9641") +
geom_smooth(method = "loess", color = "red", se = TRUE) +
labs(title = "Sat vs Width", x = "Width (cm)", y = "Jumlah Satelit") +
theme_minimal()
grid.arrange(p_w, p_wd, ncol = 2)Hubungan sat dengan weight dan width
p_c <- data_crabs %>%
group_by(color) %>%
summarise(mean_sat = mean(sat), se = sd(sat)/sqrt(n())) %>%
ggplot(aes(x = color, y = mean_sat, fill = color)) +
geom_bar(stat = "identity", color = "white") +
geom_errorbar(aes(ymin = mean_sat-se, ymax = mean_sat+se), width = 0.2) +
scale_fill_brewer(palette = "YlOrBr") +
labs(title = "Rata-rata Satelit per Warna", x = "Warna", y = "Mean Sat") +
theme_minimal() + theme(legend.position = "none")
p_s <- data_crabs %>%
group_by(spine) %>%
summarise(mean_sat = mean(sat), se = sd(sat)/sqrt(n())) %>%
ggplot(aes(x = spine, y = mean_sat, fill = spine)) +
geom_bar(stat = "identity", color = "white") +
geom_errorbar(aes(ymin = mean_sat-se, ymax = mean_sat+se), width = 0.2) +
scale_fill_brewer(palette = "Set2") +
labs(title = "Rata-rata Satelit per Kondisi Duri", x = "Duri", y = "Mean Sat") +
theme_minimal() + theme(legend.position = "none")
grid.arrange(p_c, p_s, ncol = 2)Rata-rata satelit per color dan spine
model_pois <- glm(sat ~ weight + width + color + spine,
family = poisson(link = "log"),
data = data_crabs)
summary(model_pois)##
## Call:
## glm(formula = sat ~ weight + width + color + spine, family = poisson(link = "log"),
## data = data_crabs)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.36180 0.96655 -0.374 0.70817
## weight 0.49647 0.16626 2.986 0.00283 **
## width 0.01675 0.04892 0.342 0.73207
## colormedium -0.26485 0.16811 -1.575 0.11515
## colordark -0.51371 0.19536 -2.629 0.00855 **
## colordarker -0.53086 0.22692 -2.339 0.01931 *
## spineone_worn -0.15037 0.21358 -0.704 0.48139
## spineboth_worn 0.08728 0.11993 0.728 0.46674
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 632.79 on 172 degrees of freedom
## Residual deviance: 549.59 on 165 degrees of freedom
## AIC: 920.88
##
## Number of Fisher Scoring iterations: 6
irr_tbl <- data.frame(
IRR = round(exp(coef(model_pois)), 4),
Lower95 = round(exp(confint(model_pois))[,1], 4),
Upper95 = round(exp(confint(model_pois))[,2], 4)
)
kable(irr_tbl, caption = "IRR dan CI 95% — Model Poisson") %>%
kable_styling(bootstrap_options = c("striped","hover"))| IRR | Lower95 | Upper95 | |
|---|---|---|---|
| (Intercept) | 0.6964 | 0.1057 | 4.6710 |
| weight | 1.6429 | 1.1877 | 2.2782 |
| width | 1.0169 | 0.9232 | 1.1183 |
| colormedium | 0.7673 | 0.5563 | 1.0767 |
| colordark | 0.5983 | 0.4097 | 0.8824 |
| colordarker | 0.5881 | 0.3762 | 0.9175 |
| spineone_worn | 0.8604 | 0.5571 | 1.2906 |
| spineboth_worn | 1.0912 | 0.8655 | 1.3854 |
## Deviance/df: 3.331
##
## Overdispersion test
##
## data: model_pois
## z = 5.1494, p-value = 1.307e-07
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 3.104872
model_quasi <- glm(sat ~ weight + width + color + spine,
family = quasipoisson(link = "log"),
data = data_crabs)
summary(model_quasi)##
## Call:
## glm(formula = sat ~ weight + width + color + spine, family = quasipoisson(link = "log"),
## data = data_crabs)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.36180 1.73852 -0.208 0.8354
## weight 0.49647 0.29905 1.660 0.0988 .
## width 0.01675 0.08799 0.190 0.8493
## colormedium -0.26485 0.30238 -0.876 0.3824
## colordark -0.51371 0.35139 -1.462 0.1457
## colordarker -0.53086 0.40815 -1.301 0.1952
## spineone_worn -0.15037 0.38415 -0.391 0.6960
## spineboth_worn 0.08728 0.21571 0.405 0.6863
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasipoisson family taken to be 3.235255)
##
## Null deviance: 632.79 on 172 degrees of freedom
## Residual deviance: 549.59 on 165 degrees of freedom
## AIC: NA
##
## Number of Fisher Scoring iterations: 6
##
## Call:
## glm.nb(formula = sat ~ weight + width + color + spine, data = data_crabs,
## init.theta = 0.9649768526, link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.277348 1.950163 -0.142 0.8869
## weight 0.700891 0.356475 1.966 0.0493 *
## width -0.002437 0.099663 -0.024 0.9805
## colormedium -0.320756 0.372720 -0.861 0.3895
## colordark -0.596268 0.417349 -1.429 0.1531
## colordarker -0.579003 0.466470 -1.241 0.2145
## spineone_worn -0.242703 0.398367 -0.609 0.5424
## spineboth_worn 0.042779 0.248431 0.172 0.8633
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(0.965) family taken to be 1)
##
## Null deviance: 220.67 on 172 degrees of freedom
## Residual deviance: 196.51 on 165 degrees of freedom
## AIC: 763.32
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 0.965
## Std. Err.: 0.176
##
## 2 x log-likelihood: -745.320
##
## IRR Negative Binomial:
irr_nb <- data.frame(
IRR = round(exp(coef(model_nb)), 4),
Lower95 = round(exp(confint(model_nb))[,1], 4),
Upper95 = round(exp(confint(model_nb))[,2], 4)
)
kable(irr_nb, caption = "IRR — Negative Binomial") %>%
kable_styling(bootstrap_options = c("striped","hover"))| IRR | Lower95 | Upper95 | |
|---|---|---|---|
| (Intercept) | 0.7578 | 0.0115 | 48.4076 |
| weight | 2.0155 | 0.9184 | 4.4909 |
| width | 0.9976 | 0.8027 | 1.2409 |
| colormedium | 0.7256 | 0.3315 | 1.4984 |
| colordark | 0.5509 | 0.2355 | 1.2328 |
| colordarker | 0.5605 | 0.2157 | 1.4181 |
| spineone_worn | 0.7845 | 0.3694 | 1.7124 |
| spineboth_worn | 1.0437 | 0.6371 | 1.6814 |
kable(data.frame(
Model = c("Poisson", "Negative Binomial"),
AIC = round(c(AIC(model_pois), AIC(model_nb)), 2),
Deviance = round(c(deviance(model_pois), deviance(model_nb)), 2),
df = c(model_pois$df.residual, model_nb$df.residual)
), caption = "Perbandingan Model") %>%
kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)| Model | AIC | Deviance | df |
|---|---|---|---|
| Poisson | 920.88 | 549.59 | 165 |
| Negative Binomial | 763.32 | 196.51 | 165 |
data_crabs$fitted_pois <- fitted(model_pois)
data_crabs$fitted_nb <- fitted(model_nb)
ggplot(data_crabs) +
geom_point(aes(x = sat, y = fitted_pois, color = "Poisson"), alpha = 0.6) +
geom_point(aes(x = sat, y = fitted_nb, color = "Neg.Binom"), alpha = 0.6) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
scale_color_manual(values = c("Poisson"="#2166ac","Neg.Binom"="#d6604d"),
name = "Model") +
labs(title = "Observed vs Fitted: Poisson vs Negative Binomial",
x = "Observed (sat)", y = "Fitted") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))Observed vs Fitted — Poisson vs Negative Binomial
irr_df <- data.frame(
term = names(coef(model_pois)),
IRR = exp(coef(model_pois)),
lower = exp(confint(model_pois))[,1],
upper = exp(confint(model_pois))[,2]
) %>% filter(term != "(Intercept)")
ggplot(irr_df, aes(x = reorder(term, IRR), y = IRR)) +
geom_point(size = 4, color = "#4dac26") +
geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.3, color = "#4dac26") +
geom_hline(yintercept = 1, linetype = "dashed", color = "red") +
coord_flip() +
labs(title = "Incidence Rate Ratio (IRR) dengan CI 95% — Regresi Poisson",
x = "Prediktor", y = "IRR") +
theme_minimal(base_size = 13) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))Forest plot IRR regresi Poisson
kable(data.frame(
Analisis = c("Multinomial Logistik", "Ordinal Logistik", "Regresi Poisson"),
Data = c("200 siswa", "932 responden", "173 kepiting"),
Respon = c("prog (3 kategori)", "opinion (3 ordinal)", "sat (cacahan)"),
Prediktor = c("ses + write", "race + gender", "weight + width + color + spine"),
`AIC Model` = c(round(AIC(model_multi),1), round(AIC(model_ord),1), round(AIC(model_pois),1))
), caption = "Ringkasan Seluruh Model") %>%
kable_styling(bootstrap_options = c("striped","hover","bordered"))| Analisis | Data | Respon | Prediktor | AIC.Model |
|---|---|---|---|---|
| Multinomial Logistik | 200 siswa | prog (3 kategori) | ses + write | 376.0 |
| Ordinal Logistik | 932 responden | opinion (3 ordinal) | race + gender | 1555.7 |
| Regresi Poisson | 173 kepiting | sat (cacahan) | weight + width + color + spine | 920.9 |
Catatan: - Untuk model Poisson, jika terdeteksi overdispersi, gunakan Quasi-Poisson atau Negative Binomial. - Untuk model ordinal, uji asumsi proportional odds menggunakan
brant::brant(model_ord). - Semua model menggunakan kategori referensi yang disebutkan pada masing-masing bagian.