# ANALISIS DATA CHAT MASUK PER JAM (POISSON)
# 1. Load Package (tidak perlu install berulang)
library(readxl)
library(ggplot2)
# 2. Import Data
data_chat <- read_excel("data_chat_masuk_per_jam_7hari.xlsx")
# 3. Cek Data
cat("=== PREVIEW DATA ===\n")
## === PREVIEW DATA ===
print(head(data_chat))
## # A tibble: 6 × 4
##   tanggal    jam_mulai jam_selesai jumlah_chat
##   <chr>      <chr>     <chr>             <dbl>
## 1 2026-01-06 00:00     01:00                 0
## 2 2026-01-06 01:00     02:00                 1
## 3 2026-01-06 02:00     03:00                 2
## 4 2026-01-06 03:00     04:00                 3
## 5 2026-01-06 04:00     05:00                 0
## 6 2026-01-06 05:00     06:00                 0
cat("\n=== NAMA KOLOM ===\n")
## 
## === NAMA KOLOM ===
print(colnames(data_chat))
## [1] "tanggal"     "jam_mulai"   "jam_selesai" "jumlah_chat"
cat("\n=== STRUKTUR DATA ===\n")
## 
## === STRUKTUR DATA ===
str(data_chat)
## tibble [168 × 4] (S3: tbl_df/tbl/data.frame)
##  $ tanggal    : chr [1:168] "2026-01-06" "2026-01-06" "2026-01-06" "2026-01-06" ...
##  $ jam_mulai  : chr [1:168] "00:00" "01:00" "02:00" "03:00" ...
##  $ jam_selesai: chr [1:168] "01:00" "02:00" "03:00" "04:00" ...
##  $ jumlah_chat: num [1:168] 0 1 2 3 0 0 4 8 9 7 ...
# 4. Ambil Variabel
chat <- as.numeric(data_chat$jumlah_chat)
# 5. Statistik Deskriptif
cat("\n=== STATISTIK DESKRIPTIF ===\n")
## 
## === STATISTIK DESKRIPTIF ===
cat("Mean:", mean(chat), "\n")
## Mean: 7.410714
cat("Median:", median(chat), "\n")
## Median: 7
cat("Varians:", var(chat), "\n")
## Varians: 27.3812
cat("Minimum:", min(chat), "\n")
## Minimum: 0
cat("Maksimum:", max(chat), "\n")
## Maksimum: 23
cat("\nRingkasan Data:\n")
## 
## Ringkasan Data:
print(summary(chat))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   3.000   7.000   7.411  11.000  23.000
# 6. Histogram Data
ggplot(data_chat, aes(x = jumlah_chat)) +
  geom_histogram(binwidth = 1, fill = "skyblue", color = "black") +
  labs(title = "Histogram Jumlah Chat per Jam",
       x = "Jumlah Chat",
       y = "Frekuensi") +
  theme_minimal()

# 7. Estimasi Parameter Poisson
lambda <- mean(chat)
cat("\nNilai Lambda (λ):", lambda, "\n")
## 
## Nilai Lambda (λ): 7.410714
# 8. PLOT DATA vs POISSON (TANPA ERROR)
# Buat data Poisson terpisah
x_vals <- 0:max(chat)
y_vals <- dpois(x_vals, lambda)

poisson_df <- data.frame(
  jumlah_chat = x_vals,
  prob = y_vals
)

# Plot gabungan
ggplot() +
  geom_histogram(data = data_chat,
                 aes(x = jumlah_chat, y = ..density..),
                 binwidth = 1,
                 fill = "lightblue",
                 color = "black") +
  geom_point(data = poisson_df,
             aes(x = jumlah_chat, y = prob),
             color = "red", size = 2) +
  geom_line(data = poisson_df,
            aes(x = jumlah_chat, y = prob),
            color = "red") +
  labs(title = "Histogram dan Distribusi Poisson",
       x = "Jumlah Chat",
       y = "Density") +
  theme_minimal()
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# 9. UJI CHI-SQUARE (SUDAH FIX TOTAL)
# Frekuensi observasi
observed <- table(chat)

# Semua kemungkinan nilai
x_all <- 0:max(chat)

# Probabilitas Poisson
expected_prob <- dpois(x_all, lambda)

# Normalisasi agar total = 1
expected_prob <- expected_prob / sum(expected_prob)

# Samakan observed
observed_full <- rep(0, length(x_all))
names(observed_full) <- x_all
observed_full[names(observed)] <- observed

# Uji Chi-Square
chisq <- chisq.test(observed_full, p = expected_prob)
## Warning in chisq.test(observed_full, p = expected_prob): Chi-squared
## approximation may be incorrect
cat("\n=== UJI CHI-SQUARE ===\n")
## 
## === UJI CHI-SQUARE ===
print(chisq)
## 
##  Chi-squared test for given probabilities
## 
## data:  observed_full
## X-squared = 5671.8, df = 23, p-value < 2.2e-16
# 10. Interpretasi otomatis
if(chisq$p.value > 0.05){
  cat("\nKesimpulan: Data MENGIKUTI distribusi Poisson\n")
} else {
  cat("\nKesimpulan: Data TIDAK mengikuti distribusi Poisson\n")
}
## 
## Kesimpulan: Data TIDAK mengikuti distribusi Poisson