packages <- c("nnet", "MASS", "ggplot2", "dplyr", "tidyr",
              "broom", "RColorBrewer", "pscl", "AER",
              "lmtest", "gridExtra", "knitr", "kableExtra")

for (pkg in packages) {
  if (!requireNamespace(pkg, quietly = TRUE)) {
    install.packages(pkg, repos = "https://cloud.r-project.org")
  }
}

library(nnet); library(MASS); library(ggplot2)
library(dplyr); library(tidyr); library(broom)
library(RColorBrewer); library(pscl); library(AER)
library(lmtest); library(gridExtra); library(knitr); library(kableExtra)

1 Regresi Logistik Multinomial

1.1 Deskripsi Data

Dataset ini berisi data dari 200 siswa yang dikumpulkan untuk mengetahui pengaruh status sosial ekonomi (ses) dan kemampuan menulis (write) terhadap jenis program pendidikan (prog) yang diikuti siswa.

Variabel Tipe Keterangan
prog Respon Jenis program: General, Academic, Vocational
ses Prediktor Status sosial ekonomi: Low, Middle, High
write Prediktor Skor kemampuan menulis (kontinu)

1.2 Input Data

data_multi <- data.frame(
  ses = c(
    "low","middle","high","low","middle","high","middle","middle","middle","middle",
    "low","high","low","middle","low","low","low","middle","middle","high",
    "middle","middle","low","low","low","high","low","middle","low","low",
    "middle","low","low","low","middle","middle","middle","high","low","middle",
    "middle","high","low","high","low","low","middle","low","middle","middle",
    "middle","middle","low","low","middle","low","high","high","low","high",
    "middle","middle","high","middle","middle","middle","low","middle","middle","middle",
    "high","middle","high","low","middle","low","middle","middle","middle","middle",
    "low","middle","high","low","high","low","middle","middle","middle","middle",
    "high","high","high","middle","middle","middle","middle","middle","middle","middle",
    "middle","middle","middle","middle","middle","low","low","middle","middle","middle",
    "low","middle","middle","middle","middle","middle","high","high","middle","middle",
    "low","middle","middle","high","middle","high","middle","middle","middle","high",
    "middle","high","low","high","middle","high","middle","high","low","high",
    "high","low","middle","middle","middle","middle","middle","low","middle","high",
    "middle","high","middle","middle","low","middle","high","high","low","high",
    "high","high","low","high","middle","middle","high","high","high","high",
    "low","high","low","high","high","low","middle","low","low","high",
    "high","middle","high","middle","high","high","high","high","high","middle",
    "high","high","middle","high","high","high","middle","middle","middle","middle"),
  write = c(
    35,33,39,37,31,36,36,31,41,37,44,33,31,44,35,44,46,46,46,49,
    39,44,47,44,37,38,49,44,41,50,44,39,40,46,41,39,33,59,41,47,
    31,42,55,40,44,54,46,54,42,49,41,41,44,57,52,49,41,57,62,33,
    54,40,52,57,49,46,57,49,52,53,54,57,41,52,57,54,52,52,44,46,
    49,54,52,44,49,46,54,39,59,45,52,54,44,50,62,59,52,52,46,41,
    52,52,55,41,49,59,54,54,59,55,62,54,54,54,59,57,61,52,59,62,
    60,59,65,61,59,59,59,59,65,57,59,49,49,59,62,59,54,63,43,54,
    52,57,57,57,61,62,62,65,57,59,59,62,65,62,62,62,54,62,65,62,
    67,65,60,59,59,59,65,62,65,59,59,61,65,67,65,65,67,65,62,52,
    63,59,65,59,67,67,60,67,62,54,65,62,59,60,63,65,63,67,65,62),
  prog = c(
    "vocation","general","vocation","vocation","vocation","general","vocation","vocation","vocation","vocation",
    "vocation","academic","vocation","vocation","vocation","general","general","vocation","academic","vocation",
    "general","vocation","vocation","vocation","academic","academic","general","general","academic","academic",
    "general","vocation","academic","academic","vocation","vocation","vocation","academic","general","academic",
    "general","academic","academic","vocation","academic","vocation","vocation","general","vocation","academic",
    "academic","vocation","general","academic","academic","general","academic","general","vocation","general",
    "vocation","academic","academic","vocation","vocation","vocation","general","academic","academic","general",
    "academic","academic","academic","general","vocation","general","vocation","general","general","academic",
    "vocation","academic","academic","general","vocation","academic","general","general","general","vocation",
    "vocation","general","vocation","academic","vocation","general","academic","vocation","vocation","general",
    "academic","vocation","vocation","vocation","general","academic","general","general","academic","academic",
    "academic","academic","academic","academic","academic","vocation","academic","academic","academic","academic",
    "vocation","academic","academic","academic","academic","academic","academic","academic","academic","academic",
    "academic","academic","general","academic","academic","general","academic","academic","academic","academic",
    "academic","academic","general","general","academic","general","general","general","academic","academic",
    "academic","academic","academic","vocation","general","academic","academic","academic","academic","academic",
    "general","general","academic","academic","academic","vocation","general","academic","academic","general",
    "general","academic","academic","vocation","academic","academic","academic","academic","academic","academic",
    "academic","academic","academic","general","academic","academic","academic","academic","academic","academic",
    "academic","academic","academic","academic","academic","academic","vocation","academic","academic","academic")
)

data_multi$ses  <- factor(data_multi$ses,  levels = c("low","middle","high"))
data_multi$prog <- factor(data_multi$prog, levels = c("general","academic","vocation"))

cat("Dimensi data:", nrow(data_multi), "baris x", ncol(data_multi), "kolom\n")
## Dimensi data: 200 baris x 3 kolom

1.3 Statistik Deskriptif

# Tabel frekuensi prog
kable(as.data.frame(table(Program = data_multi$prog)),
      caption = "Distribusi Jenis Program") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Distribusi Jenis Program
Program Freq
general 45
academic 105
vocation 50
# Tabel frekuensi ses
kable(as.data.frame(table(SES = data_multi$ses)),
      caption = "Distribusi Status Sosial Ekonomi") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Distribusi Status Sosial Ekonomi
SES Freq
low 47
middle 95
high 58
# Ringkasan write
kable(data.frame(
  Min    = min(data_multi$write),
  Q1     = quantile(data_multi$write, 0.25),
  Median = median(data_multi$write),
  Mean   = round(mean(data_multi$write), 2),
  Q3     = quantile(data_multi$write, 0.75),
  Max    = max(data_multi$write)
), caption = "Statistik Write Score") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Statistik Write Score
Min Q1 Median Mean Q3 Max
25% 31 45.75 54 52.77 60 67

1.4 Visualisasi Eksplorasi

ggplot(data_multi, aes(x = ses, fill = prog)) +
  geom_bar(position = "dodge", color = "white") +
  scale_fill_brewer(palette = "Set2", name = "Program") +
  labs(title = "Distribusi Jenis Program per Status Sosial Ekonomi",
       x = "SES", y = "Frekuensi") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", hjust = 0.5))
Distribusi program per SES

Distribusi program per SES

ggplot(data_multi, aes(x = prog, y = write, fill = prog)) +
  geom_boxplot(alpha = 0.7) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Distribusi Write Score per Jenis Program",
       x = "Program", y = "Write Score") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", hjust = 0.5),
        legend.position = "none")
Write score per program

Write score per program

1.5 Pemodelan

# Kategori referensi: "general"
model_multi <- multinom(prog ~ ses + write, data = data_multi, trace = FALSE)
summary(model_multi)
## Call:
## multinom(formula = prog ~ ses + write, data = data_multi, trace = FALSE)
## 
## Coefficients:
##          (Intercept) sesmiddle   seshigh       write
## academic   -2.851973 0.5332914 1.1628257  0.05792480
## vocation    2.366097 0.8246384 0.1802176 -0.05567514
## 
## Std. Errors:
##          (Intercept) sesmiddle   seshigh      write
## academic    1.166437 0.4437319 0.5142215 0.02141092
## vocation    1.174251 0.4901237 0.6484508 0.02333135
## 
## Residual Deviance: 359.9635 
## AIC: 375.9635

1.5.1 Nilai Z dan P-value

z_multi <- summary(model_multi)$coefficients / summary(model_multi)$standard.errors
p_multi  <- (1 - pnorm(abs(z_multi), 0, 1)) * 2

kable(round(p_multi, 4), caption = "P-value Koefisien") %>%
  kable_styling(bootstrap_options = c("striped","hover"))
P-value Koefisien
(Intercept) sesmiddle seshigh write
academic 0.0145 0.2294 0.0237 0.0068
vocation 0.0439 0.0925 0.7811 0.0170

1.5.2 Odds Ratio

or_multi <- exp(coef(model_multi))
ci_multi <- exp(confint(model_multi))

kable(round(or_multi, 4), caption = "Odds Ratio") %>%
  kable_styling(bootstrap_options = c("striped","hover"))
Odds Ratio
(Intercept) sesmiddle seshigh write
academic 0.0577 1.7045 3.1990 1.0596
vocation 10.6557 2.2811 1.1975 0.9458

1.6 Uji Kebaikan Model

null_multi  <- multinom(prog ~ 1, data = data_multi, trace = FALSE)
lr_test     <- anova(null_multi, model_multi)
mcfadden_m  <- as.numeric(1 - logLik(model_multi)/logLik(null_multi))

cat("AIC         :", round(AIC(model_multi), 2), "\n")
## AIC         : 375.96
cat("BIC         :", round(BIC(model_multi), 2), "\n")
## BIC         : 402.35
cat("Pseudo R²   :", round(mcfadden_m, 4), "\n")
## Pseudo R²   : 0.1182
print(lr_test)
## Likelihood ratio tests of Multinomial Models
## 
## Response: prog
##         Model Resid. df Resid. Dev   Test    Df LR stat.      Pr(Chi)
## 1           1       398   408.1933                                   
## 2 ses + write       392   359.9635 1 vs 2     6  48.2299 1.063001e-08

1.7 Confusion Matrix & Akurasi

pred_multi <- predict(model_multi, type = "class")
conf_mat   <- table(Aktual = data_multi$prog, Prediksi = pred_multi)

kable(conf_mat, caption = "Confusion Matrix") %>%
  kable_styling(bootstrap_options = c("striped","hover"))
Confusion Matrix
general academic vocation
general 7 27 11
academic 4 92 9
vocation 4 23 23
cat("Akurasi:", round(sum(diag(conf_mat))/sum(conf_mat)*100, 2), "%\n")
## Akurasi: 61 %

1.8 Visualisasi Probabilitas Prediksi

write_seq <- seq(min(data_multi$write), max(data_multi$write), length.out = 100)
pred_df   <- expand.grid(write = write_seq, ses = levels(data_multi$ses))
prob_pred <- predict(model_multi, newdata = pred_df, type = "probs")
pred_df   <- cbind(pred_df, prob_pred)
pred_long <- pivot_longer(pred_df, cols = c("general","academic","vocation"),
                           names_to = "prog", values_to = "prob")

ggplot(pred_long, aes(x = write, y = prob, color = prog)) +
  geom_line(linewidth = 1.1) +
  facet_wrap(~ ses, labeller = labeller(
    ses = c(low = "SES: Low", middle = "SES: Middle", high = "SES: High"))) +
  scale_color_brewer(palette = "Set2", name = "Program") +
  labs(title = "Probabilitas Prediksi berdasarkan Write Score & SES",
       x = "Write Score", y = "Probabilitas") +
  theme_minimal(base_size = 12) +
  theme(plot.title = element_text(face = "bold", hjust = 0.5))
Probabilitas prediksi per write score & SES

Probabilitas prediksi per write score & SES


2 Regresi Logistik Ordinal

2.1 Deskripsi Data

Data tabel kontingensi tiga arah tentang kepercayaan terhadap kehidupan setelah kematian (afterlife).

Variabel Tipe Keterangan
opinion Respon yes > undecided > no (ordinal)
race Prediktor white / black
gender Prediktor female / male

2.2 Input Data

data_ord_raw <- data.frame(
  race   = c(rep("white",3), rep("white",3), rep("black",3), rep("black",3)),
  gender = c(rep("female",3), rep("male",3), rep("female",3), rep("male",3)),
  opinion= rep(c("yes","undecided","no"), 4),
  n      = c(371,49,74, 250,45,71, 64,9,15, 25,5,13)
)

# Expand ke unit record
data_ord <- data_ord_raw[rep(seq_len(nrow(data_ord_raw)), data_ord_raw$n),
                          c("race","gender","opinion")]
rownames(data_ord) <- NULL

data_ord$opinion <- factor(data_ord$opinion,
                            levels = c("no","undecided","yes"), ordered = TRUE)
data_ord$race    <- factor(data_ord$race,   levels = c("white","black"))
data_ord$gender  <- factor(data_ord$gender, levels = c("female","male"))

cat("Total observasi:", nrow(data_ord), "\n")
## Total observasi: 991

2.3 Statistik Deskriptif

kable(as.data.frame(table(Opinion = data_ord$opinion)),
      caption = "Distribusi Opinion") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Distribusi Opinion
Opinion Freq
no 173
undecided 108
yes 710
kable(as.data.frame(ftable(data_ord$race, data_ord$gender, data_ord$opinion)),
      caption = "Tabel Kontingensi 3 Arah") %>%
  kable_styling(bootstrap_options = c("striped","hover"))
Tabel Kontingensi 3 Arah
Var1 Var2 Var3 Freq
white female no 74
black female no 15
white male no 71
black male no 13
white female undecided 49
black female undecided 9
white male undecided 45
black male undecided 5
white female yes 371
black female yes 64
white male yes 250
black male yes 25

2.4 Visualisasi Eksplorasi

data_ord_raw$opinion <- factor(data_ord_raw$opinion, levels = c("no","undecided","yes"))

ggplot(data_ord_raw, aes(x = gender, y = n, fill = opinion)) +
  geom_bar(stat = "identity", position = "fill", color = "white") +
  facet_wrap(~ race) +
  scale_fill_manual(
    values = c("no"="#d73027","undecided"="#fee08b","yes"="#1a9850"),
    name = "Opinion") +
  labs(title = "Proporsi Pendapat tentang Afterlife berdasarkan Ras dan Gender",
       x = "Gender", y = "Proporsi") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", hjust = 0.5))
Proporsi opinion per race dan gender

Proporsi opinion per race dan gender

2.5 Pemodelan

model_ord <- polr(opinion ~ race + gender, data = data_ord, Hess = TRUE)
summary(model_ord)
## Call:
## polr(formula = opinion ~ race + gender, data = data_ord, Hess = TRUE)
## 
## Coefficients:
##              Value Std. Error t value
## raceblack  -0.2657     0.2003  -1.327
## gendermale -0.3786     0.1407  -2.691
## 
## Intercepts:
##               Value    Std. Error t value 
## no|undecided   -1.7579   0.1116   -15.7487
## undecided|yes  -1.1266   0.1008   -11.1765
## 
## Residual Deviance: 1547.736 
## AIC: 1555.736

2.5.1 Koefisien + P-value

ct_ord <- coef(summary(model_ord))
p_ord  <- pnorm(abs(ct_ord[,"t value"]), lower.tail = FALSE) * 2
ct_out <- cbind(round(ct_ord, 4), "p.value" = round(p_ord, 5))

kable(ct_out, caption = "Koefisien, SE, t-value, dan P-value") %>%
  kable_styling(bootstrap_options = c("striped","hover"))
Koefisien, SE, t-value, dan P-value
Value Std. Error t value p.value
raceblack -0.2657 0.2003 -1.3268 0.18457
gendermale -0.3786 0.1407 -2.6911 0.00712
no&#124;undecided -1.7579 0.1116 -15.7487 0.00000
undecided&#124;yes -1.1266 0.1008 -11.1765 0.00000

2.5.2 Odds Ratio & CI

or_ord  <- exp(coef(model_ord))
ci_ord  <- exp(confint(model_ord))
or_tbl  <- data.frame(OR = round(or_ord, 4),
                       Lower95 = round(ci_ord[,1], 4),
                       Upper95 = round(ci_ord[,2], 4))

kable(or_tbl, caption = "Odds Ratio dan Interval Kepercayaan 95%") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Odds Ratio dan Interval Kepercayaan 95%
OR Lower95 Upper95
raceblack 0.7666 0.5210 1.1444
gendermale 0.6849 0.5198 0.9025

2.6 Uji Kebaikan Model

null_ord   <- polr(opinion ~ 1, data = data_ord, Hess = TRUE)
mcfadden_o <- as.numeric(1 - logLik(model_ord)/logLik(null_ord))

cat("AIC:", round(AIC(model_ord), 2), "\n")
## AIC: 1555.74
cat("BIC:", round(BIC(model_ord), 2), "\n")
## BIC: 1575.33
cat("Pseudo R² McFadden:", round(mcfadden_o, 4), "\n")
## Pseudo R² McFadden: 0.0054
print(anova(null_ord, model_ord))
## Likelihood ratio tests of ordinal regression models
## 
## Response: opinion
##           Model Resid. df Resid. Dev   Test    Df LR stat.    Pr(Chi)
## 1             1       989   1556.197                                 
## 2 race + gender       987   1547.736 1 vs 2     2 8.460807 0.01454652

2.7 Probabilitas Prediksi

new_data_ord <- expand.grid(race = c("white","black"), gender = c("female","male"))
prob_ord     <- predict(model_ord, newdata = new_data_ord, type = "probs")
prob_ord_df  <- cbind(new_data_ord, prob_ord)
prob_long    <- pivot_longer(prob_ord_df, cols = c("no","undecided","yes"),
                              names_to = "opinion", values_to = "prob")
prob_long$opinion <- factor(prob_long$opinion, levels = c("no","undecided","yes"))
prob_long$group   <- paste(prob_long$race, prob_long$gender, sep = " - ")

ggplot(prob_long, aes(x = group, y = prob, fill = opinion)) +
  geom_bar(stat = "identity", color = "white") +
  scale_fill_manual(
    values = c("no"="#d73027","undecided"="#fee08b","yes"="#1a9850"),
    name = "Opinion") +
  labs(title = "Probabilitas Prediksi Ordinal per Kelompok",
       x = "Kelompok", y = "Probabilitas") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", hjust = 0.5),
        axis.text.x = element_text(angle = 20, hjust = 1))
Probabilitas prediksi ordinal per kelompok

Probabilitas prediksi ordinal per kelompok

2.8 Forest Plot Odds Ratio

or_df_ord <- data.frame(
  term  = names(or_ord),
  OR    = or_ord,
  lower = ci_ord[,1],
  upper = ci_ord[,2]
)

ggplot(or_df_ord, aes(x = term, y = OR)) +
  geom_point(size = 4, color = "#2166ac") +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.2, color = "#2166ac") +
  geom_hline(yintercept = 1, linetype = "dashed", color = "red") +
  labs(title = "Odds Ratio dengan CI 95% — Regresi Ordinal",
       x = "Prediktor", y = "Odds Ratio") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", hjust = 0.5))
Forest plot OR regresi ordinal

Forest plot OR regresi ordinal


3 Regresi Poisson

3.1 Deskripsi Data

Dataset Horseshoe Crabs (n = 173). Variabel respon adalah sat (jumlah satelit — data cacahan), dengan prediktor berupa karakteristik fisik kepiting.

Variabel Tipe Keterangan
sat Respon Jumlah kepiting jantan satelit (cacahan ≥ 0)
weight Prediktor Berat kepiting betina (kg)
width Prediktor Lebar karapas (cm)
color Prediktor Warna cangkang (1–4)
spine Prediktor Kondisi duri (1–3)

3.2 Input Data

crabs_raw <- "crab,sat,weight,width,color,spine
1,8,3.050,28.3,2,3
2,0,1.550,22.5,3,3
3,9,2.300,26.0,1,1
4,0,2.100,24.8,3,3
5,4,2.600,26.0,3,3
6,0,2.100,23.8,2,3
7,0,2.350,26.5,1,1
8,0,1.900,24.7,3,2
9,0,1.950,23.7,2,1
10,0,2.150,25.6,3,3
11,0,2.150,24.3,3,3
12,0,2.650,25.8,2,3
13,11,3.050,28.2,2,3
14,0,1.850,21.0,4,2
15,14,2.300,26.0,2,1
16,8,2.950,27.1,1,1
17,1,2.000,25.2,2,3
18,1,3.000,29.0,2,3
19,0,2.200,24.7,4,3
20,5,2.700,27.4,2,3
21,4,1.950,23.2,2,2
22,3,2.300,25.0,1,2
23,1,1.600,22.5,2,1
24,2,2.600,26.7,3,3
25,3,2.000,25.8,4,3
26,0,1.300,26.2,4,3
27,3,3.150,28.7,2,3
28,5,2.700,26.8,2,1
29,0,2.600,27.5,4,3
30,0,2.100,24.9,2,3
31,4,3.200,29.3,1,1
32,0,2.600,25.8,1,3
33,0,2.000,25.7,2,2
34,8,2.000,25.7,2,1
35,5,2.700,26.7,2,1
36,0,1.850,23.7,4,3
37,0,2.650,26.8,2,3
38,6,3.150,27.5,2,3
39,0,1.900,23.4,4,3
40,6,2.800,27.9,2,3
41,3,3.100,27.5,3,3
42,5,2.800,26.1,1,1
43,6,2.500,27.7,1,1
44,5,3.300,30.0,2,1
45,9,3.250,28.5,3,1
46,4,2.800,28.9,3,3
47,6,2.600,28.2,2,3
48,4,2.100,25.0,2,3
49,3,3.000,28.5,2,3
50,3,3.600,30.3,2,1
51,5,2.100,24.7,4,3
52,5,2.900,27.7,2,3
53,6,2.700,27.4,1,1
54,4,1.600,22.9,2,3
55,5,2.000,25.7,2,1
56,15,3.000,28.3,2,3
57,3,2.700,27.2,2,3
58,3,2.300,26.2,3,3
59,0,2.750,27.8,2,1
60,0,2.250,25.5,4,3
61,0,2.550,27.1,3,3
62,5,2.050,24.5,3,3
63,3,2.450,27.0,3,1
64,5,2.150,26.0,2,3
65,1,2.800,28.0,2,3
66,8,3.050,30.0,2,3
67,10,3.200,29.0,2,3
68,0,2.400,26.2,2,3
69,0,1.300,26.5,2,1
70,3,2.400,26.2,2,3
71,7,2.800,25.6,3,3
72,1,1.650,23.0,3,3
73,0,1.800,23.0,3,3
74,6,2.250,25.4,2,3
75,0,1.900,24.2,3,3
76,0,1.600,22.9,2,2
77,3,2.200,26.0,3,2
78,4,2.250,25.4,2,3
79,0,1.200,25.7,3,3
80,5,2.100,25.1,2,3
81,0,2.250,24.5,3,2
82,0,2.900,27.5,4,3
83,0,1.650,23.1,3,3
84,4,2.550,25.9,3,1
85,0,2.300,25.8,2,3
86,3,2.250,27.0,4,3
87,0,3.050,28.5,2,3
88,0,2.750,25.5,4,1
89,0,1.900,23.5,4,3
90,0,1.700,24.0,2,2
91,5,3.850,29.7,2,1
92,0,2.550,26.8,2,1
93,0,2.450,26.7,4,3
94,0,3.200,28.7,2,1
95,0,1.550,23.1,3,3
96,1,2.800,29.0,2,1
97,0,2.250,25.5,3,3
98,1,1.967,26.5,3,3
99,1,2.200,24.5,3,3
100,1,3.000,28.5,3,3
101,1,2.867,28.2,2,3
102,1,1.600,24.5,2,3
103,1,2.550,27.5,2,3
104,4,2.550,24.7,2,2
105,1,2.000,25.2,2,1
106,1,2.900,27.3,3,3
107,1,2.400,26.3,2,3
108,1,3.100,29.0,2,3
109,2,1.900,25.3,2,3
110,4,2.300,26.5,2,3
111,3,3.250,27.8,2,3
112,6,2.500,27.0,2,3
113,0,2.100,25.7,3,3
114,2,2.100,25.0,2,3
115,2,3.325,31.9,2,3
116,0,1.800,23.7,4,3
117,12,3.225,29.3,4,3
118,0,1.400,22.0,3,3
119,5,2.400,25.0,2,3
120,6,2.500,27.0,3,3
121,6,1.800,23.8,3,3
122,2,3.275,30.2,1,1
123,0,2.225,26.2,3,3
124,2,1.650,24.2,2,3
125,3,2.900,27.4,2,3
126,0,2.300,25.4,2,2
127,3,3.200,28.4,3,3
128,4,1.475,22.5,4,3
129,2,2.025,26.2,2,3
130,6,2.300,24.9,2,1
131,6,1.950,24.5,1,2
132,0,1.800,25.1,2,3
133,4,2.900,28.0,2,1
134,10,2.250,25.8,4,3
135,7,3.050,27.9,2,3
136,0,2.200,24.9,2,3
137,5,3.100,28.4,2,1
138,5,2.400,27.2,3,3
139,6,2.250,25.0,2,2
140,6,2.625,27.5,2,3
141,7,5.200,33.5,2,1
142,3,3.325,30.5,2,3
143,3,2.925,29.0,3,3
144,0,2.000,24.3,2,1
145,0,2.400,25.8,2,3
146,8,2.100,25.0,4,3
147,4,3.725,31.7,2,1
148,4,3.025,29.5,2,3
149,10,1.900,24.0,3,3
150,9,3.000,30.0,2,3
151,4,2.850,27.6,2,3
152,0,2.300,26.2,2,3
153,0,2.000,23.1,2,1
154,0,1.600,22.9,2,1
155,0,1.900,24.5,4,3
156,4,1.950,24.7,2,3
157,0,3.200,28.3,2,3
158,2,1.850,23.9,2,3
159,0,1.800,23.8,3,3
160,4,3.500,29.8,3,2
161,4,2.350,26.5,2,3
162,3,2.275,26.0,2,3
163,8,3.050,28.2,2,3
164,0,2.150,25.7,4,3
165,7,2.750,26.5,2,3
166,0,2.200,25.8,2,3
167,0,1.800,24.1,3,3
168,2,2.175,26.2,3,3
169,3,2.750,26.1,3,3
170,4,3.275,29.0,3,3
171,0,2.625,28.0,1,1
172,0,2.625,27.0,4,3
173,0,2.000,24.5,2,2"

data_crabs        <- read.csv(text = crabs_raw)
data_crabs$color  <- factor(data_crabs$color, labels = c("light","medium","dark","darker"))
data_crabs$spine  <- factor(data_crabs$spine, labels = c("both_good","one_worn","both_worn"))

cat("Dimensi data:", nrow(data_crabs), "baris x", ncol(data_crabs), "kolom\n")
## Dimensi data: 173 baris x 6 kolom

3.3 Statistik Deskriptif

kable(summary(data_crabs[, c("sat","weight","width")]),
      caption = "Statistik Deskriptif Variabel Kontinu") %>%
  kable_styling(bootstrap_options = c("striped","hover"))
Statistik Deskriptif Variabel Kontinu
sat weight width
Min. : 0.000 Min. :1.200 Min. :21.0
1st Qu.: 0.000 1st Qu.:2.000 1st Qu.:24.9
Median : 2.000 Median :2.350 Median :26.1
Mean : 2.919 Mean :2.437 Mean :26.3
3rd Qu.: 5.000 3rd Qu.:2.850 3rd Qu.:27.7
Max. :15.000 Max. :5.200 Max. :33.5
cat("Mean sat:", round(mean(data_crabs$sat), 3),
    "| Var sat:", round(var(data_crabs$sat), 3),
    "| Rasio Var/Mean:", round(var(data_crabs$sat)/mean(data_crabs$sat), 3), "\n")
## Mean sat: 2.919 | Var sat: 9.912 | Rasio Var/Mean: 3.396

3.4 Visualisasi Eksplorasi

ggplot(data_crabs, aes(x = sat)) +
  geom_histogram(bins = 16, fill = "#4393c3", color = "white") +
  geom_vline(aes(xintercept = mean(sat)), color = "red",
             linetype = "dashed", linewidth = 1) +
  labs(title = "Distribusi Jumlah Satelit (sat)",
       subtitle = paste("Mean =", round(mean(data_crabs$sat), 2)),
       x = "Jumlah Satelit", y = "Frekuensi") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", hjust = 0.5))
Distribusi jumlah satelit

Distribusi jumlah satelit

p_w <- ggplot(data_crabs, aes(x = weight, y = sat)) +
  geom_point(alpha = 0.5, color = "#2c7bb6") +
  geom_smooth(method = "loess", color = "red", se = TRUE) +
  labs(title = "Sat vs Weight", x = "Weight (kg)", y = "Jumlah Satelit") +
  theme_minimal()

p_wd <- ggplot(data_crabs, aes(x = width, y = sat)) +
  geom_point(alpha = 0.5, color = "#1a9641") +
  geom_smooth(method = "loess", color = "red", se = TRUE) +
  labs(title = "Sat vs Width", x = "Width (cm)", y = "Jumlah Satelit") +
  theme_minimal()

grid.arrange(p_w, p_wd, ncol = 2)
Hubungan sat dengan weight dan width

Hubungan sat dengan weight dan width

p_c <- data_crabs %>%
  group_by(color) %>%
  summarise(mean_sat = mean(sat), se = sd(sat)/sqrt(n())) %>%
  ggplot(aes(x = color, y = mean_sat, fill = color)) +
  geom_bar(stat = "identity", color = "white") +
  geom_errorbar(aes(ymin = mean_sat-se, ymax = mean_sat+se), width = 0.2) +
  scale_fill_brewer(palette = "YlOrBr") +
  labs(title = "Rata-rata Satelit per Warna", x = "Warna", y = "Mean Sat") +
  theme_minimal() + theme(legend.position = "none")

p_s <- data_crabs %>%
  group_by(spine) %>%
  summarise(mean_sat = mean(sat), se = sd(sat)/sqrt(n())) %>%
  ggplot(aes(x = spine, y = mean_sat, fill = spine)) +
  geom_bar(stat = "identity", color = "white") +
  geom_errorbar(aes(ymin = mean_sat-se, ymax = mean_sat+se), width = 0.2) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Rata-rata Satelit per Kondisi Duri", x = "Duri", y = "Mean Sat") +
  theme_minimal() + theme(legend.position = "none")

grid.arrange(p_c, p_s, ncol = 2)
Rata-rata satelit per color dan spine

Rata-rata satelit per color dan spine

3.5 Pemodelan Poisson

model_pois <- glm(sat ~ weight + width + color + spine,
                  family = poisson(link = "log"),
                  data = data_crabs)
summary(model_pois)
## 
## Call:
## glm(formula = sat ~ weight + width + color + spine, family = poisson(link = "log"), 
##     data = data_crabs)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)   
## (Intercept)    -0.36180    0.96655  -0.374  0.70817   
## weight          0.49647    0.16626   2.986  0.00283 **
## width           0.01675    0.04892   0.342  0.73207   
## colormedium    -0.26485    0.16811  -1.575  0.11515   
## colordark      -0.51371    0.19536  -2.629  0.00855 **
## colordarker    -0.53086    0.22692  -2.339  0.01931 * 
## spineone_worn  -0.15037    0.21358  -0.704  0.48139   
## spineboth_worn  0.08728    0.11993   0.728  0.46674   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 632.79  on 172  degrees of freedom
## Residual deviance: 549.59  on 165  degrees of freedom
## AIC: 920.88
## 
## Number of Fisher Scoring iterations: 6

3.5.1 Incidence Rate Ratio (IRR)

irr_tbl <- data.frame(
  IRR     = round(exp(coef(model_pois)), 4),
  Lower95 = round(exp(confint(model_pois))[,1], 4),
  Upper95 = round(exp(confint(model_pois))[,2], 4)
)

kable(irr_tbl, caption = "IRR dan CI 95% — Model Poisson") %>%
  kable_styling(bootstrap_options = c("striped","hover"))
IRR dan CI 95% — Model Poisson
IRR Lower95 Upper95
(Intercept) 0.6964 0.1057 4.6710
weight 1.6429 1.1877 2.2782
width 1.0169 0.9232 1.1183
colormedium 0.7673 0.5563 1.0767
colordark 0.5983 0.4097 0.8824
colordarker 0.5881 0.3762 0.9175
spineone_worn 0.8604 0.5571 1.2906
spineboth_worn 1.0912 0.8655 1.3854

3.6 Uji Overdispersi

cat("Deviance/df:", round(model_pois$deviance / model_pois$df.residual, 3), "\n")
## Deviance/df: 3.331
print(dispersiontest(model_pois))
## 
##  Overdispersion test
## 
## data:  model_pois
## z = 5.1494, p-value = 1.307e-07
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion 
##   3.104872

3.7 Model Quasi-Poisson (Koreksi Overdispersi)

model_quasi <- glm(sat ~ weight + width + color + spine,
                   family = quasipoisson(link = "log"),
                   data = data_crabs)
summary(model_quasi)
## 
## Call:
## glm(formula = sat ~ weight + width + color + spine, family = quasipoisson(link = "log"), 
##     data = data_crabs)
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    -0.36180    1.73852  -0.208   0.8354  
## weight          0.49647    0.29905   1.660   0.0988 .
## width           0.01675    0.08799   0.190   0.8493  
## colormedium    -0.26485    0.30238  -0.876   0.3824  
## colordark      -0.51371    0.35139  -1.462   0.1457  
## colordarker    -0.53086    0.40815  -1.301   0.1952  
## spineone_worn  -0.15037    0.38415  -0.391   0.6960  
## spineboth_worn  0.08728    0.21571   0.405   0.6863  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for quasipoisson family taken to be 3.235255)
## 
##     Null deviance: 632.79  on 172  degrees of freedom
## Residual deviance: 549.59  on 165  degrees of freedom
## AIC: NA
## 
## Number of Fisher Scoring iterations: 6

3.8 Model Negative Binomial

model_nb <- glm.nb(sat ~ weight + width + color + spine, data = data_crabs)
summary(model_nb)
## 
## Call:
## glm.nb(formula = sat ~ weight + width + color + spine, data = data_crabs, 
##     init.theta = 0.9649768526, link = log)
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)  
## (Intercept)    -0.277348   1.950163  -0.142   0.8869  
## weight          0.700891   0.356475   1.966   0.0493 *
## width          -0.002437   0.099663  -0.024   0.9805  
## colormedium    -0.320756   0.372720  -0.861   0.3895  
## colordark      -0.596268   0.417349  -1.429   0.1531  
## colordarker    -0.579003   0.466470  -1.241   0.2145  
## spineone_worn  -0.242703   0.398367  -0.609   0.5424  
## spineboth_worn  0.042779   0.248431   0.172   0.8633  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for Negative Binomial(0.965) family taken to be 1)
## 
##     Null deviance: 220.67  on 172  degrees of freedom
## Residual deviance: 196.51  on 165  degrees of freedom
## AIC: 763.32
## 
## Number of Fisher Scoring iterations: 1
## 
## 
##               Theta:  0.965 
##           Std. Err.:  0.176 
## 
##  2 x log-likelihood:  -745.320
cat("\nIRR Negative Binomial:\n")
## 
## IRR Negative Binomial:
irr_nb <- data.frame(
  IRR     = round(exp(coef(model_nb)), 4),
  Lower95 = round(exp(confint(model_nb))[,1], 4),
  Upper95 = round(exp(confint(model_nb))[,2], 4)
)
kable(irr_nb, caption = "IRR — Negative Binomial") %>%
  kable_styling(bootstrap_options = c("striped","hover"))
IRR — Negative Binomial
IRR Lower95 Upper95
(Intercept) 0.7578 0.0115 48.4076
weight 2.0155 0.9184 4.4909
width 0.9976 0.8027 1.2409
colormedium 0.7256 0.3315 1.4984
colordark 0.5509 0.2355 1.2328
colordarker 0.5605 0.2157 1.4181
spineone_worn 0.7845 0.3694 1.7124
spineboth_worn 1.0437 0.6371 1.6814

3.9 Perbandingan Model

kable(data.frame(
  Model = c("Poisson", "Negative Binomial"),
  AIC   = round(c(AIC(model_pois), AIC(model_nb)), 2),
  Deviance = round(c(deviance(model_pois), deviance(model_nb)), 2),
  df   = c(model_pois$df.residual, model_nb$df.residual)
), caption = "Perbandingan Model") %>%
  kable_styling(bootstrap_options = c("striped","hover"), full_width = FALSE)
Perbandingan Model
Model AIC Deviance df
Poisson 920.88 549.59 165
Negative Binomial 763.32 196.51 165

3.10 Visualisasi Hasil

data_crabs$fitted_pois <- fitted(model_pois)
data_crabs$fitted_nb   <- fitted(model_nb)

ggplot(data_crabs) +
  geom_point(aes(x = sat, y = fitted_pois, color = "Poisson"), alpha = 0.6) +
  geom_point(aes(x = sat, y = fitted_nb,   color = "Neg.Binom"), alpha = 0.6) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
  scale_color_manual(values = c("Poisson"="#2166ac","Neg.Binom"="#d6604d"),
                     name = "Model") +
  labs(title = "Observed vs Fitted: Poisson vs Negative Binomial",
       x = "Observed (sat)", y = "Fitted") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", hjust = 0.5))
Observed vs Fitted — Poisson vs Negative Binomial

Observed vs Fitted — Poisson vs Negative Binomial

irr_df <- data.frame(
  term  = names(coef(model_pois)),
  IRR   = exp(coef(model_pois)),
  lower = exp(confint(model_pois))[,1],
  upper = exp(confint(model_pois))[,2]
) %>% filter(term != "(Intercept)")

ggplot(irr_df, aes(x = reorder(term, IRR), y = IRR)) +
  geom_point(size = 4, color = "#4dac26") +
  geom_errorbar(aes(ymin = lower, ymax = upper), width = 0.3, color = "#4dac26") +
  geom_hline(yintercept = 1, linetype = "dashed", color = "red") +
  coord_flip() +
  labs(title = "Incidence Rate Ratio (IRR) dengan CI 95% — Regresi Poisson",
       x = "Prediktor", y = "IRR") +
  theme_minimal(base_size = 13) +
  theme(plot.title = element_text(face = "bold", hjust = 0.5))
Forest plot IRR regresi Poisson

Forest plot IRR regresi Poisson


4 Ringkasan Analisis

kable(data.frame(
  Analisis = c("Multinomial Logistik", "Ordinal Logistik", "Regresi Poisson"),
  Data = c("200 siswa", "932 responden", "173 kepiting"),
  Respon = c("prog (3 kategori)", "opinion (3 ordinal)", "sat (cacahan)"),
  Prediktor = c("ses + write", "race + gender", "weight + width + color + spine"),
  `AIC Model` = c(round(AIC(model_multi),1), round(AIC(model_ord),1), round(AIC(model_pois),1))
), caption = "Ringkasan Seluruh Model") %>%
  kable_styling(bootstrap_options = c("striped","hover","bordered"))
Ringkasan Seluruh Model
Analisis Data Respon Prediktor AIC.Model
Multinomial Logistik 200 siswa prog (3 kategori) ses + write 376.0
Ordinal Logistik 932 responden opinion (3 ordinal) race + gender 1555.7
Regresi Poisson 173 kepiting sat (cacahan) weight + width + color + spine 920.9

Catatan: - Untuk model Poisson, jika terdeteksi overdispersi, gunakan Quasi-Poisson atau Negative Binomial. - Untuk model ordinal, uji asumsi proportional odds menggunakan brant::brant(model_ord). - Semua model menggunakan kategori referensi yang disebutkan pada masing-masing bagian.