Distribusi Poisson digunakan untuk memodelkan jumlah kejadian dalam
suatu interval tertentu.
Analisis ini mencakup simulasi data, statistik, visualisasi, serta uji
kesesuaian distribusi.
library(ggplot2)
library(fitdistrplus)
## Loading required package: MASS
## Loading required package: survival
set.seed(123)
lambda_true <- 3
n <- 200
x <- rpois(n, lambda_true)
mean_x <- mean(x)
var_x <- var(x)
lambda_hat <- mean_x
cat("Jumlah observasi:", n, "\n")
## Jumlah observasi: 200
cat("Mean:", mean_x, "\n")
## Mean: 3.005
cat("Var:", var_x, "\n")
## Var: 2.487412
cat("Estimasi lambda:", lambda_hat, "\n")
## Estimasi lambda: 3.005
tab_obs <- as.data.frame(table(x))
colnames(tab_obs) <- c("k","freq")
tab_obs$k <- as.integer(as.character(tab_obs$k))
tab_obs
## k freq
## 1 0 6
## 2 1 25
## 3 2 55
## 4 3 44
## 5 4 36
## 6 5 20
## 7 6 9
## 8 7 4
## 9 8 1
k_max <- max(tab_obs$k, qpois(0.999, lambda_hat))
k_vals <- 0:k_max
pmf_theo <- dpois(k_vals, lambda_hat)
expected_freq <- pmf_theo * n
obs_extended <- merge(data.frame(k=k_vals), tab_obs, by="k", all.x=TRUE)
obs_extended$freq[is.na(obs_extended$freq)] <- 0
obs_extended$expected <- expected_freq
obs_extended
## k freq expected
## 1 0 6 9.9077509
## 2 1 25 29.7727914
## 3 2 55 44.7336190
## 4 3 44 44.8081750
## 5 4 36 33.6621415
## 6 5 20 20.2309470
## 7 6 9 10.1323326
## 8 7 4 4.3496657
## 9 8 1 1.6338432
## 10 9 0 0.5455221
## 11 10 0 0.1639294
df_plot <- obs_extended
df_plot$label <- factor(df_plot$k, levels=df_plot$k)
ggplot(df_plot, aes(x=label)) +
geom_bar(aes(y=freq), stat="identity", alpha=0.6) +
geom_point(aes(y=expected), size=2, color="red") +
geom_line(aes(y=expected, group=1), color="red", linetype="dashed") +
labs(title=paste("Observed vs Expected Poisson — λ =", round(lambda_hat,3)),
x="Jumlah Kejadian (k)", y="Frekuensi") +
theme_minimal()
pmf_df <- data.frame(k=k_vals, pmf=pmf_theo)
ggplot(pmf_df, aes(x=k, y=pmf)) +
geom_col() +
labs(title=paste("PMF Distribusi Poisson (λ =", round(lambda_hat,3),")"),
x="k", y="P(X=k)") +
theme_minimal()
cat("P(X <= 2) =", ppois(2, lambda_hat), "\n")
## P(X <= 2) = 0.4220708
cat("P(X >= 4) =", 1 - ppois(3, lambda_hat), "\n")
## P(X >= 4) = 0.3538883
combine_tail <- function(k, obs, exp, min_expected = 5) {
k2 <- k ; obs2 <- obs ; exp2 <- exp
while(any(exp2 < min_expected) && length(exp2) > 1) {
i <- length(exp2)
exp2[i-1] <- exp2[i-1] + exp2[i]
obs2[i-1] <- obs2[i-1] + obs2[i]
k2 <- k2[-i]
exp2 <- exp2[-i]
obs2 <- obs2[-i]
}
data.frame(k=k2, obs=obs2, exp=exp2)
}
comb_df <- combine_tail(obs_extended$k, obs_extended$freq, obs_extended$expected)
comb_df
## k obs exp
## 1 0 6 9.907751
## 2 1 25 29.772791
## 3 2 55 44.733619
## 4 3 44 44.808175
## 5 4 36 33.662141
## 6 5 20 20.230947
## 7 6 9 10.132333
## 8 7 5 6.692960
chisq_stat <- sum((comb_df$obs - comb_df$exp)^2 / comb_df$exp)
df_chisq <- nrow(comb_df) - 1 - 1
p_value <- 1 - pchisq(chisq_stat, df = df_chisq)
cat("Chi-square:", chisq_stat, "\n")
## Chi-square: 5.39687
cat("df:", df_chisq, "\n")
## df: 6
cat("p-value:", p_value, "\n")
## p-value: 0.4940079
chisq_builtin <- suppressWarnings(
chisq.test(
x = comb_df$obs,
p = comb_df$exp / sum(comb_df$exp),
rescale.p = FALSE
)
)
chisq_builtin
##
## Chi-squared test for given probabilities
##
## data: comb_df$obs
## X-squared = 5.3953, df = 7, p-value = 0.6118
fit <- fitdist(x, "pois", method = "mle")
fit
## Fitting of the distribution ' pois ' by maximum likelihood
## Parameters:
## estimate Std. Error
## lambda 3.005 0.1225765
cat("P(X >= 6) =", 1 - ppois(5, lambda_hat), "\n")
## P(X >= 6) = 0.08442288
B <- 2000
boot_means <- replicate(B, mean(sample(x, replace = TRUE)))
quantile(boot_means, c(0.025, 0.975))
## 2.5% 97.5%
## 2.799875 3.230000
Analisis Poisson selesai dilakukan.