# ANALISIS DATA CHAT MASUK PER JAM (POISSON)
# 1. Load Package (tidak perlu install berulang)
library(readxl)
library(ggplot2)
# 2. Import Data
data_chat <- read_excel("data_chat_masuk_per_jam_7hari.xlsx")
# 3. Cek Data
cat("=== PREVIEW DATA ===\n")
## === PREVIEW DATA ===
print(head(data_chat))
## # A tibble: 6 × 4
## tanggal jam_mulai jam_selesai jumlah_chat
## <chr> <chr> <chr> <dbl>
## 1 2026-01-06 00:00 01:00 0
## 2 2026-01-06 01:00 02:00 1
## 3 2026-01-06 02:00 03:00 2
## 4 2026-01-06 03:00 04:00 3
## 5 2026-01-06 04:00 05:00 0
## 6 2026-01-06 05:00 06:00 0
cat("\n=== NAMA KOLOM ===\n")
##
## === NAMA KOLOM ===
print(colnames(data_chat))
## [1] "tanggal" "jam_mulai" "jam_selesai" "jumlah_chat"
cat("\n=== STRUKTUR DATA ===\n")
##
## === STRUKTUR DATA ===
str(data_chat)
## tibble [168 × 4] (S3: tbl_df/tbl/data.frame)
## $ tanggal : chr [1:168] "2026-01-06" "2026-01-06" "2026-01-06" "2026-01-06" ...
## $ jam_mulai : chr [1:168] "00:00" "01:00" "02:00" "03:00" ...
## $ jam_selesai: chr [1:168] "01:00" "02:00" "03:00" "04:00" ...
## $ jumlah_chat: num [1:168] 0 1 2 3 0 0 4 8 9 7 ...
# 4. Ambil Variabel
chat <- as.numeric(data_chat$jumlah_chat)
# 5. Statistik Deskriptif
cat("\n=== STATISTIK DESKRIPTIF ===\n")
##
## === STATISTIK DESKRIPTIF ===
cat("Mean:", mean(chat), "\n")
## Mean: 7.410714
cat("Median:", median(chat), "\n")
## Median: 7
cat("Varians:", var(chat), "\n")
## Varians: 27.3812
cat("Minimum:", min(chat), "\n")
## Minimum: 0
cat("Maksimum:", max(chat), "\n")
## Maksimum: 23
cat("\nRingkasan Data:\n")
##
## Ringkasan Data:
print(summary(chat))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 3.000 7.000 7.411 11.000 23.000
# 6. Histogram Data
ggplot(data_chat, aes(x = jumlah_chat)) +
geom_histogram(binwidth = 1, fill = "skyblue", color = "black") +
labs(title = "Histogram Jumlah Chat per Jam",
x = "Jumlah Chat",
y = "Frekuensi") +
theme_minimal()

# 7. Estimasi Parameter Poisson
lambda <- mean(chat)
cat("\nNilai Lambda (λ):", lambda, "\n")
##
## Nilai Lambda (λ): 7.410714
# 8. PLOT DATA vs POISSON (TANPA ERROR)
# Buat data Poisson terpisah
x_vals <- 0:max(chat)
y_vals <- dpois(x_vals, lambda)
poisson_df <- data.frame(
jumlah_chat = x_vals,
prob = y_vals
)
# Plot gabungan
ggplot() +
geom_histogram(data = data_chat,
aes(x = jumlah_chat, y = ..density..),
binwidth = 1,
fill = "lightblue",
color = "black") +
geom_point(data = poisson_df,
aes(x = jumlah_chat, y = prob),
color = "red", size = 2) +
geom_line(data = poisson_df,
aes(x = jumlah_chat, y = prob),
color = "red") +
labs(title = "Histogram dan Distribusi Poisson",
x = "Jumlah Chat",
y = "Density") +
theme_minimal()
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# 9. UJI CHI-SQUARE (SUDAH FIX TOTAL)
# Frekuensi observasi
observed <- table(chat)
# Semua kemungkinan nilai
x_all <- 0:max(chat)
# Probabilitas Poisson
expected_prob <- dpois(x_all, lambda)
# Normalisasi agar total = 1
expected_prob <- expected_prob / sum(expected_prob)
# Samakan observed
observed_full <- rep(0, length(x_all))
names(observed_full) <- x_all
observed_full[names(observed)] <- observed
# Uji Chi-Square
chisq <- chisq.test(observed_full, p = expected_prob)
## Warning in chisq.test(observed_full, p = expected_prob): Chi-squared
## approximation may be incorrect
cat("\n=== UJI CHI-SQUARE ===\n")
##
## === UJI CHI-SQUARE ===
print(chisq)
##
## Chi-squared test for given probabilities
##
## data: observed_full
## X-squared = 5671.8, df = 23, p-value < 2.2e-16
# 10. Interpretasi otomatis
if(chisq$p.value > 0.05){
cat("\nKesimpulan: Data MENGIKUTI distribusi Poisson\n")
} else {
cat("\nKesimpulan: Data TIDAK mengikuti distribusi Poisson\n")
}
##
## Kesimpulan: Data TIDAK mengikuti distribusi Poisson