Poisson Non Homogen
LIBRARY
## Loading required package: car
## Loading required package: carData
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
LOAD DATASET
## tibble [61 × 4] (S3: tbl_df/tbl/data.frame)
## $ Bulan : chr [1:61] "01-2019" "02-2019" "03-2019" "04-2019" ...
## $ Frekuensi: num [1:61] 1941 2113 2058 1688 1807 ...
## $ Besaran : num [1:61] 2.21e+10 2.40e+10 2.11e+10 1.84e+10 1.82e+10 ...
## $ t : int [1:61] 1 2 3 4 5 6 7 8 9 10 ...
## Bulan Frekuensi Besaran t
## Length:61 Min. : 34 Min. :8.939e+08 Min. : 1
## Class :character 1st Qu.: 216 1st Qu.:6.988e+09 1st Qu.:16
## Mode :character Median : 484 Median :1.334e+10 Median :31
## Mean :1006 Mean :1.889e+10 Mean :31
## 3rd Qu.:1835 3rd Qu.:2.317e+10 3rd Qu.:46
## Max. :3277 Max. :1.615e+11 Max. :61
STATISTIK DESKRIPTIF
## [1] 1006.41
## [1] 770571.6
## [1] 765.6639
VISUALISASI DATA
par(mfrow=c(2,2))
plot(data$t, data$Frekuensi, type = "l", col = "blue",
main = "Frekuensi Klaim Bulanan",
xlab = "Waktu (Bulan)", ylab = "Frekuensi")
points(data$t, data$Frekuensi, pch = 19, col = "blue")
plot(data$t, data$Besaran, type = "l", col = "red",
main = "Total Besaran Klaim Bulanan",
xlab = "Waktu (Bulan)", ylab = "Besaran (Rp)")
points(data$t, data$Besaran, pch = 19, col = "red")
hist(data$Frekuensi, breaks = 20, col = "lightblue",
main = "Distribusi Frekuensi Klaim",
xlab = "Frekuensi")
hist(data$Besaran, breaks = 20, col = "lightcoral",
main = "Distribusi Besaran Klaim",
xlab = "Besaran (Rp)")PEMODELAN POISSON: HOMOGEN VS NON-HOMOGEN
model_homogen <- glm(Frekuensi ~ 1, family = poisson, data = data)
model_linear <- glm(Frekuensi ~ t, family = poisson, data = data)
model_quad <- glm(Frekuensi ~ t + I(t^2), family = poisson, data = data)Model Homogen (λ konstan)
##
## Call:
## glm(formula = Frekuensi ~ 1, family = poisson, data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.914145 0.004036 1713 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 47770 on 60 degrees of freedom
## Residual deviance: 47770 on 60 degrees of freedom
## AIC: 48273
##
## Number of Fisher Scoring iterations: 5
Model Non-Homogen Linear (λ(t) = exp(β₀ + β₁t))
##
## Call:
## glm(formula = Frekuensi ~ t, family = poisson, data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.7444087 0.0067789 1142.4 <2e-16 ***
## t -0.0316419 0.0002504 -126.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 47770 on 60 degrees of freedom
## Residual deviance: 30332 on 59 degrees of freedom
## AIC: 30837
##
## Number of Fisher Scoring iterations: 5
Model Non-Homogen Kuadratik (λ(t) = exp(β₀ + β₁t + β₂t²))
##
## Call:
## glm(formula = Frekuensi ~ t + I(t^2), family = poisson, data = data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.8950563 0.0093752 842.12 <2e-16 ***
## t -0.0502531 0.0008685 -57.86 <2e-16 ***
## I(t^2) 0.0003414 0.0000152 22.46 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 47770 on 60 degrees of freedom
## Residual deviance: 29838 on 58 degrees of freedom
## AIC: 30345
##
## Number of Fisher Scoring iterations: 5
UJI OVERDISPERSION
##
## Overdispersion test
##
## data: model_homogen
## z = 7.9915, p-value = 6.666e-16
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 753.112
LIKELIHOOD RATIO TEST
Homogen vs Linear
## Analysis of Deviance Table
##
## Model 1: Frekuensi ~ 1
## Model 2: Frekuensi ~ t
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 60 47770
## 2 59 30332 1 17438 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Linear vs Kuadratik
## Analysis of Deviance Table
##
## Model 1: Frekuensi ~ t
## Model 2: Frekuensi ~ t + I(t^2)
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 59 30332
## 2 58 29838 1 493.54 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
KRITERIA PEMILIHAN MODEL
data.frame(
Model = c("Homogen", "Linear", "Kuadratik"),
AIC = c(AIC(model_homogen), AIC(model_linear), AIC(model_quad)),
BIC = c(BIC(model_homogen), BIC(model_linear), BIC(model_quad)),
Deviance = c(deviance(model_homogen), deviance(model_linear), deviance(model_quad))
)## Model AIC BIC Deviance
## 1 Homogen 48273.46 48275.57 47770.14
## 2 Linear 30837.01 30841.24 30331.69
## 3 Kuadratik 30345.48 30351.81 29838.16
VISUALISASI PERBANDINGAN MODEL
data$fitted_homogen <- fitted(model_homogen)
data$fitted_linear <- fitted(model_linear)
data$fitted_quad <- fitted(model_quad)
plot(data$t, data$Frekuensi, pch = 19, col = "black",
main = "Perbandingan Model Poisson",
xlab = "Waktu (Bulan)", ylab = "Frekuensi",
ylim = range(c(data$Frekuensi, data$fitted_quad)))
lines(data$t, data$fitted_homogen, col = "green", lwd = 2, lty = 2)
lines(data$t, data$fitted_linear, col = "blue", lwd = 2)
lines(data$t, data$fitted_quad, col = "red", lwd = 2)
legend("topright",
legend = c("Data", "Homogen", "Linear", "Kuadratik"),
col = c("black", "green", "blue", "red"),
pch = c(19, NA, NA, NA),
lty = c(NA, 2, 1, 1), lwd = 2)
# KOEFISIEN MODEL TERPILIH ## Model Linear
## (Intercept) t
## 7.74440866 -0.03164189
FUNGSI INTENSITAS λ(t)
plot(data$t, fitted(model_quad), type = "l", col = "darkred", lwd = 2,
main = "Fungsi Intensitas λ(t) - Model Kuadratik",
xlab = "Waktu (Bulan)", ylab = "λ(t)")
grid()
# DIAGNOSTIK MODEL
par(mfrow=c(2,2))
plot(fitted(model_quad), residuals(model_quad, type = "deviance"),
xlab = "Fitted Values", ylab = "Deviance Residuals",
main = "Residual vs Fitted")
abline(h = 0, col = "red", lty = 2)
qqnorm(residuals(model_quad, type = "deviance"))
qqline(residuals(model_quad, type = "deviance"), col = "red")
plot(fitted(model_quad), sqrt(abs(residuals(model_quad, type = "deviance"))),
xlab = "Fitted Values", ylab = "√|Deviance Residuals|",
main = "Scale-Location")
plot(cooks.distance(model_quad), type = "h",
main = "Cook's Distance", xlab = "Observation")
abline(h = 4/nrow(data), col = "red", lty = 2)PREDIKSI 12 BULAN KE DEPAN
n_pred <- 12
new_data <- data.frame(t = (max(data$t) + 1):(max(data$t) + n_pred))
pred <- predict(model_quad, newdata = new_data, type = "response", se.fit = TRUE)
pred_df <- data.frame(
Bulan = new_data$t,
Prediksi = pred$fit,
SE = pred$se.fit,
Lower = pred$fit - 1.96 * pred$se.fit,
Upper = pred$fit + 1.96 * pred$se.fit
)
pred_df## Bulan Prediksi SE Lower Upper
## 1 62 442.2378 7.498071 427.5416 456.9340
## 2 63 438.9011 7.872309 423.4714 454.3308
## 3 64 435.8872 8.262211 419.6933 452.0811
## 4 65 433.1897 8.668022 416.2004 450.1790
## 5 66 430.8029 9.090080 412.9864 448.6195
## 6 67 428.7220 9.528817 410.0455 447.3985
## 7 68 426.9426 9.984749 407.3725 446.5127
## 8 69 425.4610 10.458477 404.9624 445.9596
## 9 70 424.2742 10.950685 402.8108 445.7375
## 10 71 423.3797 11.462134 400.9139 445.8454
## 11 72 422.7756 11.993667 399.2680 446.2832
## 12 73 422.4609 12.546208 397.8703 447.0514
VISUALISASI PREDIKSI
plot(data$t, data$Frekuensi, pch = 19, col = "black",
xlim = c(1, max(new_data$t)),
ylim = range(c(data$Frekuensi, pred_df$Upper)),
main = "Prediksi Frekuensi Klaim (12 Bulan)",
xlab = "Waktu (Bulan)", ylab = "Frekuensi")
lines(data$t, fitted(model_quad), col = "blue", lwd = 2)
lines(pred_df$Bulan, pred_df$Prediksi, col = "red", lwd = 2, lty = 2)
lines(pred_df$Bulan, pred_df$Lower, col = "red", lwd = 1, lty = 3)
lines(pred_df$Bulan, pred_df$Upper, col = "red", lwd = 1, lty = 3)
polygon(c(pred_df$Bulan, rev(pred_df$Bulan)),
c(pred_df$Lower, rev(pred_df$Upper)),
col = rgb(1, 0, 0, 0.2), border = NA)
legend("topleft",
legend = c("Data", "Fitted", "Prediksi", "95% CI"),
col = c("black", "blue", "red", "red"),
pch = c(19, NA, NA, NA),
lty = c(NA, 1, 2, 3), lwd = 2)
# MODEL NEGATIVE BINOMIAL
##
## Call:
## glm.nb(formula = Frekuensi ~ t + I(t^2), data = data, init.theta = 1.443907116,
## link = log)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 8.0770472 0.3305842 24.433 < 2e-16 ***
## t -0.0688446 0.0246103 -2.797 0.00515 **
## I(t^2) 0.0006475 0.0003848 1.683 0.09242 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for Negative Binomial(1.4439) family taken to be 1)
##
## Null deviance: 93.808 on 60 degrees of freedom
## Residual deviance: 67.724 on 58 degrees of freedom
## AIC: 951.01
##
## Number of Fisher Scoring iterations: 1
##
##
## Theta: 1.444
## Std. Err.: 0.238
##
## 2 x log-likelihood: -943.011
PERBANDINGAN AIC: POISSON VS NEGATIVE BINOMIAL
## Model AIC
## 1 Poisson 30345.4778
## 2 Negative Binomial 951.0115
VISUALISASI PERBANDINGAN
data$fitted_nb <- fitted(nb_model)
plot(data$t, data$Frekuensi, pch = 19, col = "black",
main = "Poisson vs Negative Binomial",
xlab = "Waktu (Bulan)", ylab = "Frekuensi")
lines(data$t, fitted(model_quad), col = "blue", lwd = 2)
lines(data$t, data$fitted_nb, col = "purple", lwd = 2, lty = 2)
legend("topright",
legend = c("Data", "Poisson", "Negative Binomial"),
col = c("black", "blue", "purple"),
pch = c(19, NA, NA),
lty = c(NA, 1, 2), lwd = 2)