1. Pendahuluan

Distribusi Poisson digunakan untuk memodelkan jumlah kejadian dalam suatu interval tertentu.
Analisis ini mencakup simulasi data, statistik, visualisasi, serta uji kesesuaian distribusi.

2. Load Library

library(ggplot2)
library(fitdistrplus)
## Loading required package: MASS
## Loading required package: survival

3. Simulasi Data

set.seed(123)
lambda_true <- 3
n <- 200
x <- rpois(n, lambda_true)

mean_x <- mean(x)
var_x  <- var(x)
lambda_hat <- mean_x

cat("Jumlah observasi:", n, "\n")
## Jumlah observasi: 200
cat("Mean:", mean_x, "\n")
## Mean: 3.005
cat("Var:", var_x, "\n")
## Var: 2.487412
cat("Estimasi lambda:", lambda_hat, "\n")
## Estimasi lambda: 3.005

4. Tabel Frekuensi Observasi

tab_obs <- as.data.frame(table(x))
colnames(tab_obs) <- c("k","freq")
tab_obs$k <- as.integer(as.character(tab_obs$k))
tab_obs
##   k freq
## 1 0    6
## 2 1   25
## 3 2   55
## 4 3   44
## 5 4   36
## 6 5   20
## 7 6    9
## 8 7    4
## 9 8    1

5. Probabilitas Teoretik & Frekuensi Harapan

k_max <- max(tab_obs$k, qpois(0.999, lambda_hat))
k_vals <- 0:k_max
pmf_theo <- dpois(k_vals, lambda_hat)
expected_freq <- pmf_theo * n

obs_extended <- merge(data.frame(k=k_vals), tab_obs, by="k", all.x=TRUE)
obs_extended$freq[is.na(obs_extended$freq)] <- 0
obs_extended$expected <- expected_freq
obs_extended
##     k freq   expected
## 1   0    6  9.9077509
## 2   1   25 29.7727914
## 3   2   55 44.7336190
## 4   3   44 44.8081750
## 5   4   36 33.6621415
## 6   5   20 20.2309470
## 7   6    9 10.1323326
## 8   7    4  4.3496657
## 9   8    1  1.6338432
## 10  9    0  0.5455221
## 11 10    0  0.1639294

6. Grafik Observed vs Expected

df_plot <- obs_extended
df_plot$label <- factor(df_plot$k, levels=df_plot$k)

ggplot(df_plot, aes(x=label)) +
  geom_bar(aes(y=freq), stat="identity", alpha=0.6) +
  geom_point(aes(y=expected), size=2, color="red") +
  geom_line(aes(y=expected, group=1), color="red", linetype="dashed") +
  labs(title=paste("Observed vs Expected Poisson — λ =", round(lambda_hat,3)),
       x="Jumlah Kejadian (k)", y="Frekuensi") +
  theme_minimal()

7. Grafik PMF Teoretik

pmf_df <- data.frame(k=k_vals, pmf=pmf_theo)

ggplot(pmf_df, aes(x=k, y=pmf)) +
  geom_col() +
  labs(title=paste("PMF Distribusi Poisson (λ =", round(lambda_hat,3),")"),
       x="k", y="P(X=k)") +
  theme_minimal()

8. Contoh Perhitungan CDF

cat("P(X <= 2) =", ppois(2, lambda_hat), "\n")
## P(X <= 2) = 0.4220708
cat("P(X >= 4) =", 1 - ppois(3, lambda_hat), "\n")
## P(X >= 4) = 0.3538883

9. Uji Chi-Square Goodness-of-Fit

9.1 Penggabungan Tail

combine_tail <- function(k, obs, exp, min_expected = 5) {
  k2 <- k ; obs2 <- obs ; exp2 <- exp
  
  while(any(exp2 < min_expected) && length(exp2) > 1) {
    i <- length(exp2)
    exp2[i-1] <- exp2[i-1] + exp2[i]
    obs2[i-1] <- obs2[i-1] + obs2[i]
    k2 <- k2[-i]
    exp2 <- exp2[-i]
    obs2 <- obs2[-i]
  }
  data.frame(k=k2, obs=obs2, exp=exp2)
}

comb_df <- combine_tail(obs_extended$k, obs_extended$freq, obs_extended$expected)
comb_df
##   k obs       exp
## 1 0   6  9.907751
## 2 1  25 29.772791
## 3 2  55 44.733619
## 4 3  44 44.808175
## 5 4  36 33.662141
## 6 5  20 20.230947
## 7 6   9 10.132333
## 8 7   5  6.692960

9.2 Perhitungan Manual

chisq_stat <- sum((comb_df$obs - comb_df$exp)^2 / comb_df$exp)
df_chisq <- nrow(comb_df) - 1 - 1
p_value <- 1 - pchisq(chisq_stat, df = df_chisq)

cat("Chi-square:", chisq_stat, "\n")
## Chi-square: 5.39687
cat("df:", df_chisq, "\n")
## df: 6
cat("p-value:", p_value, "\n")
## p-value: 0.4940079

9.3 Chi-Square dengan Fungsi R

chisq_builtin <- suppressWarnings(
  chisq.test(
    x = comb_df$obs,
    p = comb_df$exp / sum(comb_df$exp),
    rescale.p = FALSE
  )
)
chisq_builtin
## 
##  Chi-squared test for given probabilities
## 
## data:  comb_df$obs
## X-squared = 5.3953, df = 7, p-value = 0.6118

10. Estimasi λ (MLE)

fit <- fitdist(x, "pois", method = "mle")
fit
## Fitting of the distribution ' pois ' by maximum likelihood 
## Parameters:
##        estimate Std. Error
## lambda    3.005  0.1225765

11. Probabilitas Tambahan

cat("P(X >= 6) =", 1 - ppois(5, lambda_hat), "\n")
## P(X >= 6) = 0.08442288

12. Bootstrap CI

B <- 2000
boot_means <- replicate(B, mean(sample(x, replace = TRUE)))
quantile(boot_means, c(0.025, 0.975))
##     2.5%    97.5% 
## 2.799875 3.230000

13. Kesimpulan

  1. Mean dan varians mendekati λ → karakteristik Poisson terpenuhi.
  2. Grafik observed vs expected mendukung kecocokan distribusi Poisson.
  3. Uji Chi-square menghasilkan p-value besar → model Poisson cocok.
  4. Estimasi λ stabil melalui MLE dan bootstrap.

Analisis Poisson selesai dilakukan.