1. LOAD DATA
bi_f <- read_excel("D:/File/Lomba/SOA/2026/BI_Final.xlsx", sheet = "freq")
bi_s <- read_excel("D:/File/Lomba/SOA/2026/BI_Final.xlsx", sheet = "sev")
cl_f <- read_excel("D:/File/Lomba/SOA/2026/CL_Final.xlsx", sheet = "freq")
cl_s <- read_excel("D:/File/Lomba/SOA/2026/CL_Final.xlsx", sheet = "sev")
ef_f <- read_excel("D:/File/Lomba/SOA/2026/EF_Final.xlsx", sheet = "freq")
ef_s <- read_excel("D:/File/Lomba/SOA/2026/EF_Final.xlsx", sheet = "sev")
wc_f <- read_excel("D:/File/Lomba/SOA/2026/WC_Final.xlsx", sheet = "freq")
wc_s <- read_excel("D:/File/Lomba/SOA/2026/WC_Final.xlsx", sheet = "sev")
2. Uji Distribusi Parametrik
estimasi_parametrik <- function(df, kolom_target, mode, nama_dataset) {
cat("\n====================================================================\n")
cat(sprintf("MENGANALISIS %s: %s (Mode: %s)\n", toupper(nama_dataset), kolom_target, toupper(mode)))
cat("====================================================================\n")
if (!(kolom_target %in% names(df))) {
cat(sprintf("Kolom '%s' tidak ditemukan di dataset %s!\n", kolom_target, nama_dataset))
return(NULL)
}
# ------------------------------------------------------------------
# JALUR A: SEVERITY (UANG) -> FITUR AUTO-SCALING & ANTI-BADAI
# ------------------------------------------------------------------
if (mode == "sev") {
data_bersih <- df[[kolom_target]][!is.na(df[[kolom_target]]) & df[[kolom_target]] > 0]
if(length(data_bersih) < 10) return(cat("Data terlalu sedikit.\n"))
# JURUS AUTO-SCALING: Cegah otak R meledak karena angka raksasa
teks_skala <- ""
if (max(data_bersih) > 1000000) {
data_bersih <- data_bersih / 1000000
teks_skala <- "(Skala: Jutaan)"
cat(" Angka terlalu besar. Menggunakan Skala Jutaan untuk mencegah error matematis R.\n")
} else if (max(data_bersih) > 1000) {
data_bersih <- data_bersih / 1000
teks_skala <- "(Skala: Ribuan)"
cat(" Angka terlalu besar. Menggunakan Skala Ribuan untuk mencegah error matematis R.\n")
}
cat("Sedang melatih kandidat distribusi keparahan...\n")
# JURUS ANTI-BADAI (try): Kalau 1 gagal, yang lain tetap jalan
list_fit <- list()
nama_fit <- c()
fit_ln <- try(fitdist(data_bersih, "lnorm"), silent = TRUE)
if (!inherits(fit_ln, "try-error")) { list_fit[[length(list_fit)+1]] <- fit_ln; nama_fit <- c(nama_fit, "Lognormal") }
fit_gm <- try(fitdist(data_bersih, "gamma"), silent = TRUE)
if (!inherits(fit_gm, "try-error")) { list_fit[[length(list_fit)+1]] <- fit_gm; nama_fit <- c(nama_fit, "Gamma") }
fit_wb <- try(fitdist(data_bersih, "weibull"), silent = TRUE)
if (!inherits(fit_wb, "try-error")) { list_fit[[length(list_fit)+1]] <- fit_wb; nama_fit <- c(nama_fit, "Weibull") }
fit_ex <- try(fitdist(data_bersih, "exp"), silent = TRUE)
if (!inherits(fit_ex, "try-error")) { list_fit[[length(list_fit)+1]] <- fit_ex; nama_fit <- c(nama_fit, "Exponential") }
# Evaluasi & Visualisasi jika ada model yang berhasil
if (length(list_fit) > 0) {
par(mfrow = c(2, 2), oma = c(0, 0, 2, 0))
denscomp(list_fit, legendtext = nama_fit)
cdfcomp(list_fit, legendtext = nama_fit)
qqcomp(list_fit, legendtext = nama_fit)
ppcomp(list_fit, legendtext = nama_fit)
mtext(sprintf("Distribusi Keparahan %s - %s", teks_skala, nama_dataset), outer = TRUE, cex = 1.2, font = 2)
gof_hasil <- gofstat(list_fit, fitnames = nama_fit)
tabel_aic <- data.frame(Distribusi = nama_fit, AIC = gof_hasil$aic)
tabel_aic <- tabel_aic[order(tabel_aic$AIC), ]
print(tabel_aic)
cat(sprintf("\n PEMENANG SEVERITY: ** %s **\n", tabel_aic$Distribusi[1]))
} else {
cat("Semua kandidat distribusi gagal menghitung parameternya.\n")
}
# ------------------------------------------------------------------
# JALUR B: FREQUENCY (COUNT) -> DENGAN JURUS BUNGKAM WARNING
# ------------------------------------------------------------------
} else if (mode == "freq") {
df_bersih <- df[!is.na(df[[kolom_target]]), , drop = FALSE]
vektor_data <- df_bersih[[kolom_target]]
# Bungkus dengan suppressWarnings agar R tidak ngedumel
fit_p <- try(suppressWarnings(fitdist(vektor_data, "pois")), silent = TRUE)
fit_nb <- try(suppressWarnings(fitdist(vektor_data, "nbinom")), silent = TRUE)
if (!inherits(fit_p, "try-error") & !inherits(fit_nb, "try-error")) {
par(mfrow = c(1, 2), oma = c(0, 0, 2, 0))
denscomp(list(fit_p, fit_nb), legendtext = c("Poisson", "Negative Binomial"), fitlty = 1)
cdfcomp(list(fit_p, fit_nb), legendtext = c("Poisson", "Negative Binomial"), fitlty = 1)
mtext(sprintf("Distribusi Frekuensi - %s", nama_dataset), outer = TRUE, cex = 1.2, font = 2)
}
cat("Sedang melatih 4 model frekuensi tingkat lanjut (Termasuk Zero-Inflated)...\n")
rumus <- as.formula(paste(kolom_target, "~ 1"))
AIC_pois <- NA; AIC_nb <- NA; AIC_zip <- NA; AIC_zinb <- NA
# Bungkus semuanya dengan suppressWarnings
m_pois <- try(suppressWarnings(glm(rumus, family = poisson, data = df_bersih)), silent = TRUE)
if (!inherits(m_pois, "try-error")) AIC_pois <- AIC(m_pois)
m_nb <- try(suppressWarnings(glm.nb(rumus, data = df_bersih)), silent = TRUE)
if (!inherits(m_nb, "try-error")) AIC_nb <- AIC(m_nb)
m_zip <- try(suppressWarnings(zeroinfl(rumus, dist = "poisson", data = df_bersih)), silent = TRUE)
if (!inherits(m_zip, "try-error")) AIC_zip <- AIC(m_zip)
m_zinb <- try(suppressWarnings(zeroinfl(rumus, dist = "negbin", data = df_bersih)), silent = TRUE)
if (!inherits(m_zinb, "try-error")) AIC_zinb <- AIC(m_zinb)
tabel_aic <- data.frame(
Distribusi = c("Poisson", "Negative Binomial", "Zero-Inflated Poisson", "Zero-Infl. Neg. Binomial"),
AIC = c(AIC_pois, AIC_nb, AIC_zip, AIC_zinb)
)
tabel_aic <- tabel_aic[order(tabel_aic$AIC), ]
print(tabel_aic)
cat(sprintf("\n PEMENANG FREQUENCY: ** %s **\n", tabel_aic$Distribusi[1]))
} else {
cat(" Mode tidak dikenali! Gunakan 'sev' atau 'freq'.\n")
}
}
daftar_freq <- list("BI_Freq" = bi_f, "CL_Freq" = cl_f, "EF_Freq" = ef_f, "WC_Freq" = wc_f )
daftar_sev <- list("BI_Sev" = bi_s, "CL_Sev" = cl_s, "EF_Sev" = ef_s, "WC_Sev" = wc_s)
# >>> SIMPAN SEMUA GRAFIK KE 1 FILE PDF <<<
pdf("Laporan_Distribusi_Lengkap.pdf", width = 10, height = 7)
for (nama in names(daftar_freq)) {
estimasi_parametrik(df = daftar_freq[[nama]], kolom_target = "claim_count", mode = "freq", nama_dataset = nama)
}
##
## ====================================================================
## MENGANALISIS BI_FREQ: claim_count (Mode: FREQ)
## ====================================================================
## Sedang melatih 4 model frekuensi tingkat lanjut (Termasuk Zero-Inflated)...
## Distribusi AIC
## 4 Zero-Infl. Neg. Binomial 63145.04
## 2 Negative Binomial 63198.19
## 3 Zero-Inflated Poisson 63268.14
## 1 Poisson 71157.62
##
## PEMENANG FREQUENCY: ** Zero-Infl. Neg. Binomial **
##
## ====================================================================
## MENGANALISIS CL_FREQ: claim_count (Mode: FREQ)
## ====================================================================
## Sedang melatih 4 model frekuensi tingkat lanjut (Termasuk Zero-Inflated)...
## Distribusi AIC
## 4 Zero-Infl. Neg. Binomial 153337.2
## 2 Negative Binomial 153365.3
## 3 Zero-Inflated Poisson 153628.8
## 1 Poisson 159881.6
##
## PEMENANG FREQUENCY: ** Zero-Infl. Neg. Binomial **
##
## ====================================================================
## MENGANALISIS EF_FREQ: claim_count (Mode: FREQ)
## ====================================================================
## Sedang melatih 4 model frekuensi tingkat lanjut (Termasuk Zero-Inflated)...
## Distribusi AIC
## 2 Negative Binomial 56680.49
## 4 Zero-Infl. Neg. Binomial 56682.52
## 3 Zero-Inflated Poisson 57934.69
## 1 Poisson 59602.53
##
## PEMENANG FREQUENCY: ** Negative Binomial **
##
## ====================================================================
## MENGANALISIS WC_FREQ: claim_count (Mode: FREQ)
## ====================================================================
## Sedang melatih 4 model frekuensi tingkat lanjut (Termasuk Zero-Inflated)...
## Distribusi AIC
## 3 Zero-Inflated Poisson 20140.75
## 2 Negative Binomial 20140.93
## 4 Zero-Infl. Neg. Binomial 20142.75
## 1 Poisson 20145.53
##
## PEMENANG FREQUENCY: ** Zero-Inflated Poisson **
for (nama in names(daftar_sev)) {
estimasi_parametrik(df = daftar_sev[[nama]], kolom_target = "claim_amount", mode = "sev", nama_dataset = nama)
}
##
## ====================================================================
## MENGANALISIS BI_SEV: claim_amount (Mode: SEV)
## ====================================================================
## Angka terlalu besar. Menggunakan Skala Jutaan untuk mencegah error matematis R.
## Sedang melatih kandidat distribusi keparahan...
## Distribusi AIC
## Lognormal Lognormal 47325.45
## Weibull Weibull 48948.73
## Gamma Gamma 49414.00
## Exponential Exponential 49669.89
##
## PEMENANG SEVERITY: ** Lognormal **
##
## ====================================================================
## MENGANALISIS CL_SEV: claim_amount (Mode: SEV)
## ====================================================================
## Angka terlalu besar. Menggunakan Skala Jutaan untuk mencegah error matematis R.
## Sedang melatih kandidat distribusi keparahan...
## Distribusi AIC
## Lognormal Lognormal 109311.9
## Weibull Weibull 116194.9
## Gamma Gamma 124021.8
## Exponential Exponential 186929.1
##
## PEMENANG SEVERITY: ** Lognormal **
##
## ====================================================================
## MENGANALISIS EF_SEV: claim_amount (Mode: SEV)
## ====================================================================
## Angka terlalu besar. Menggunakan Skala Jutaan untuk mencegah error matematis R.
## Sedang melatih kandidat distribusi keparahan...
## Distribusi AIC
## Lognormal Lognormal -28055.67
## Gamma Gamma -26678.95
## Weibull Weibull -24940.90
## Exponential Exponential -23288.24
##
## PEMENANG SEVERITY: ** Lognormal **
##
## ====================================================================
## MENGANALISIS WC_SEV: claim_amount (Mode: SEV)
## ====================================================================
## Angka terlalu besar. Menggunakan Skala Ribuan untuk mencegah error matematis R.
## Sedang melatih kandidat distribusi keparahan...
## Distribusi AIC
## Lognormal Lognormal 9941.718
## Weibull Weibull 10689.964
## Gamma Gamma 11080.987
## Exponential Exponential 11700.724
##
## PEMENANG SEVERITY: ** Lognormal **
dev.off() # Wajib dijalankan untuk menyimpan PDF-nya
## png
## 2
cat("\n LAPORAN SELESAI DICETAK!\n")
##
## LAPORAN SELESAI DICETAK!
3. Modelling GLM
cat("\n====================================================================\n")
##
## ====================================================================
cat("1. MEMBANGUN MODEL FREKUENSI (ZERO-INFLATED NEGATIVE BINOMIAL)\n")
## 1. MEMBANGUN MODEL FREKUENSI (ZERO-INFLATED NEGATIVE BINOMIAL)
cat("====================================================================\n")
## ====================================================================
# Rumus ZINB di R terbagi jadi 2 bagian yang dipisah dengan tanda "|" (pipa):
# Bagian Kiri : Prediktor untuk menghitung jumlah klaim (Count Model)
# Bagian Kanan : Prediktor untuk menebak apakah dia "Pasti Nol" atau tidak (Zero Model)
# Jika bagian kanan diisi "1", artinya probabilitas Nol-nya dianggap sama untuk semua orang.
# TIPS AKTUARIS: Jangan lupa tambahkan offset(log(exposure)) jika kamu punya
# kolom durasi polis (misal: polis cuma aktif 0.5 tahun).
# Jika TIDAK ADA kolom exposure, hapus bagian '+ offset(log(exposure))'
model_bi_freq <- zeroinfl(
claim_count ~ station_id + solar_system + production_load + energy_backup_score + supply_chain_index + avg_crew_exp + maintenance_freq + safety_compliance | 1,
data = bi_f,
dist = "negbin",
offset = log(exposure) # <--- Buka tanda pagar ) jika punya kolom exposure
)
# Tampilkan rapor hasil belajar mesin
summary(model_bi_freq)
##
## Call:
## zeroinfl(formula = claim_count ~ station_id + solar_system + production_load +
## energy_backup_score + supply_chain_index + avg_crew_exp + maintenance_freq +
## safety_compliance | 1, data = bi_f, offset = log(exposure), dist = "negbin")
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -0.3033 -0.2635 -0.2411 -0.2039 14.9273
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.090e-01 1.101e-01 0.990 0.322115
## station_idA2 -5.641e-02 1.023e-01 -0.551 0.581327
## station_idA3 7.950e-02 9.913e-02 0.802 0.422590
## station_idA4 -7.402e-02 1.024e-01 -0.723 0.469649
## station_idA5 -4.031e-02 1.010e-01 -0.399 0.689762
## station_idA6 -4.497e-02 1.004e-01 -0.448 0.654221
## station_idA7 -1.129e-01 1.022e-01 -1.105 0.269012
## station_idA8 1.112e-01 9.936e-02 1.119 0.263211
## station_idA9 4.507e-02 9.925e-02 0.454 0.649755
## station_idB1 1.478e-01 9.813e-02 1.506 0.131945
## station_idB2 1.814e-01 9.728e-02 1.864 0.062288 .
## station_idB3 7.253e-03 1.001e-01 0.072 0.942210
## station_idB4 7.248e-02 9.969e-02 0.727 0.467196
## station_idB5 8.924e-02 1.001e-01 0.891 0.372791
## station_idB6 2.081e-02 9.999e-02 0.208 0.835112
## station_idB7 4.188e-02 1.000e-01 0.419 0.675418
## station_idB8 6.652e-03 1.007e-01 0.066 0.947342
## station_idB9 -6.519e-02 1.017e-01 -0.641 0.521591
## station_idG1 4.559e-02 9.863e-02 0.462 0.643943
## station_idG2 1.418e-01 9.958e-02 1.424 0.154489
## station_idG3 4.813e-02 1.001e-01 0.481 0.630813
## station_idG4 6.984e-02 9.935e-02 0.703 0.482106
## station_idG5 4.433e-02 1.010e-01 0.439 0.660655
## station_idG6 1.104e-01 9.836e-02 1.122 0.261872
## station_idG7 -7.696e-05 1.023e-01 -0.001 0.999400
## station_idG8 7.035e-02 1.005e-01 0.700 0.483808
## station_idG9 5.636e-02 1.003e-01 0.562 0.574200
## station_idUnknown -1.278e+01 1.714e+02 -0.075 0.940585
## solar_systemHelionis Cluster -5.577e-02 3.704e-02 -1.506 0.132148
## solar_systemUnknown -1.289e+01 1.568e+02 -0.082 0.934478
## solar_systemZeta -1.299e-02 2.996e-02 -0.434 0.664494
## production_load -3.588e-02 4.692e-02 -0.765 0.444507
## energy_backup_score 3.009e-02 9.479e-03 3.174 0.001502 **
## supply_chain_index 9.102e-02 4.653e-02 1.956 0.050442 .
## avg_crew_exp -2.214e-03 1.547e-03 -1.431 0.152550
## maintenance_freq -1.699e-02 6.679e-03 -2.543 0.010978 *
## safety_compliance 1.884e-04 9.485e-03 0.020 0.984149
## Log(theta) 6.264e-01 1.831e-01 3.420 0.000626 ***
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.49076 0.06438 23.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 1.8708
## Number of iterations in BFGS optimization: 56
## Log-likelihood: -3.188e+04 on 39 Df
cat("\n====================================================================\n")
##
## ====================================================================
cat("2. MEMBANGUN MODEL KEPARAHAN (LOGNORMAL)\n")
## 2. MEMBANGUN MODEL KEPARAHAN (LOGNORMAL)
cat("====================================================================\n")
## ====================================================================
# Karena pemenangnya Lognormal
# Targetnya (claim_amount) kita log-kan, lalu kita pakai family = gaussian (Normal)
model_bi_sev <- glm(
log(claim_amount) ~ station_id + solar_system + production_load + safety_compliance + energy_backup_score,
data = bi_s,
family = gaussian(link = "identity")
)
# Tampilkan rapor hasil belajar mesin
summary(model_bi_sev)
##
## Call:
## glm(formula = log(claim_amount) ~ station_id + solar_system +
## production_load + safety_compliance + energy_backup_score,
## family = gaussian(link = "identity"), data = bi_s)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.5571167 0.0780012 186.627 <2e-16 ***
## station_idA2 -0.0498769 0.0919181 -0.543 0.587
## station_idA3 0.0288717 0.0881457 0.328 0.743
## station_idA4 0.1016590 0.0921364 1.103 0.270
## station_idA5 -0.0240292 0.0908337 -0.265 0.791
## station_idA6 -0.0199222 0.0902398 -0.221 0.825
## station_idA7 0.0847100 0.0923482 0.917 0.359
## station_idA8 0.1115526 0.0881311 1.266 0.206
## station_idA9 0.0291830 0.0886845 0.329 0.742
## station_idB1 0.0061368 0.0869126 0.071 0.944
## station_idB2 0.0069892 0.0860043 0.081 0.935
## station_idB3 0.0161371 0.0894507 0.180 0.857
## station_idB4 -0.0431139 0.0888252 -0.485 0.627
## station_idB5 0.0274912 0.0888454 0.309 0.757
## station_idB6 -0.0225411 0.0895225 -0.252 0.801
## station_idB7 0.1116898 0.0891285 1.253 0.210
## station_idB8 -0.0082140 0.0899678 -0.091 0.927
## station_idB9 -0.0957387 0.0915047 -1.046 0.295
## station_idG1 0.0867753 0.0881005 0.985 0.325
## station_idG2 0.0205741 0.0880563 0.234 0.815
## station_idG3 0.0112271 0.0893266 0.126 0.900
## station_idG4 0.0397896 0.0882837 0.451 0.652
## station_idG5 -0.0552048 0.0899935 -0.613 0.540
## station_idG6 -0.0171968 0.0872327 -0.197 0.844
## station_idG7 -0.0063079 0.0914469 -0.069 0.945
## station_idG8 -0.0521652 0.0893059 -0.584 0.559
## station_idG9 0.1062914 0.0893280 1.190 0.234
## solar_systemHelionis Cluster 0.0137102 0.0331950 0.413 0.680
## solar_systemZeta 0.0041606 0.0266889 0.156 0.876
## production_load 0.0007926 0.0418773 0.019 0.985
## safety_compliance 0.0005911 0.0084411 0.070 0.944
## energy_backup_score -0.0011281 0.0084261 -0.134 0.894
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1.433664)
##
## Null deviance: 14368 on 10032 degrees of freedom
## Residual deviance: 14338 on 10001 degrees of freedom
## AIC: 32121
##
## Number of Fisher Scoring iterations: 2
cat("\n====================================================================\n")
##
## ====================================================================
cat("ITERASI 2: MODEL FREKUENSI BI (VERSI RAMPING)\n")
## ITERASI 2: MODEL FREKUENSI BI (VERSI RAMPING)
cat("====================================================================\n")
## ====================================================================
# Kita HANYA memasukkan 3 variabel pemenang dari hasil sebelumnya
model_bi_freq_v2 <- zeroinfl(
claim_count ~ energy_backup_score + supply_chain_index + maintenance_freq | 1,
data = bi_f,
dist = "negbin"
)
# Lihat laporannya, apakah bintangnya (***) semakin kuat?
summary(model_bi_freq_v2)
##
## Call:
## zeroinfl(formula = claim_count ~ energy_backup_score + supply_chain_index +
## maintenance_freq | 1, data = bi_f, dist = "negbin")
##
## Pearson residuals:
## Min 1Q Median 3Q Max
## -0.2492 -0.2424 -0.2398 -0.2370 10.2632
##
## Count model coefficients (negbin with log link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.868784 0.094940 -9.151 <2e-16 ***
## energy_backup_score 0.024476 0.009199 2.661 0.0078 **
## supply_chain_index 0.078015 0.045232 1.725 0.0846 .
## maintenance_freq -0.014352 0.006497 -2.209 0.0272 *
## Log(theta) 0.149119 0.203154 0.734 0.4629
##
## Zero-inflation model coefficients (binomial with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.2492 0.1085 11.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Theta = 1.1608
## Number of iterations in BFGS optimization: 36
## Log-likelihood: -3.156e+04 on 6 Df
# ==========================================================
# UJI KESAKTIAN: Bandingkan Model Lama vs Model Baru
# ==========================================================
cat("\n--- PERBANDINGAN SKOR AIC (Pilih yang paling kecil) ---\n")
##
## --- PERBANDINGAN SKOR AIC (Pilih yang paling kecil) ---
cat("AIC Model Freq LAMA (Banyak Variabel Sampah) :", AIC(model_bi_freq), "\n")
## AIC Model Freq LAMA (Banyak Variabel Sampah) : 63829.27
cat("AIC Model Freq BARU (Hanya Variabel Penting) :", AIC(model_bi_freq_v2), "\n")
## AIC Model Freq BARU (Hanya Variabel Penting) : 63136.13
cat("\n====================================================================\n")
##
## ====================================================================
cat("ITERASI 2: MODEL KEPARAHAN BI (VERSI INTERCEPT-ONLY)\n")
## ITERASI 2: MODEL KEPARAHAN BI (VERSI INTERCEPT-ONLY)
cat("====================================================================\n")
## ====================================================================
# Karena SEMUA variabel sebelumnya tidak ada yang dapat bintang,
# kita akan buat model "Harga Dasar" (Intercept-Only) pakai angka '1'.
# Ini artinya: "Tebak harga klaim hanya dari rata-rata global saja".
model_bi_sev_v2 <- glm(
log(claim_amount) ~ 1,
data = bi_s,
family = gaussian(link = "identity")
)
summary(model_bi_sev_v2)
##
## Call:
## glm(formula = log(claim_amount) ~ 1, family = gaussian(link = "identity"),
## data = bi_s)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.57531 0.01195 1220 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 1.432202)
##
## Null deviance: 14368 on 10032 degrees of freedom
## Residual deviance: 14368 on 10032 degrees of freedom
## AIC: 32079
##
## Number of Fisher Scoring iterations: 2
cat("\n--- PERBANDINGAN SKOR AIC SEVERITY ---\n")
##
## --- PERBANDINGAN SKOR AIC SEVERITY ---
cat("AIC Model Sev LAMA (Banyak Variabel Sampah) :", AIC(model_bi_sev), "\n")
## AIC Model Sev LAMA (Banyak Variabel Sampah) : 32120.59
cat("AIC Model Sev BARU (Hanya Harga Dasar) :", AIC(model_bi_sev_v2), "\n")
## AIC Model Sev BARU (Hanya Harga Dasar) : 32079.4
4. Pricing
Pure Premium
cat("\n====================================================================\n")
##
## ====================================================================
cat("TAHAP 3: PRICING (MENGHITUNG PURE PREMIUM BI)\n")
## TAHAP 3: PRICING (MENGHITUNG PURE PREMIUM BI)
cat("====================================================================\n")
## ====================================================================
# -----------------------------------------------------------------------------
# 1. MENGHITUNG EXPECTED FREQUENCY (Berapa kali diprediksi klaim?)
# -----------------------------------------------------------------------------
# Fungsi predict() dengan type = "response" sangat ajaib. Dia akan otomatis
# mengalikan Probabilitas Nol (dari Zero-model) dengan Jumlah Klaim (dari Count-model).
bi_f$pred_freq <- predict(model_bi_freq_v2, newdata = bi_f, type = "response")
# -----------------------------------------------------------------------------
# 2. MENGHITUNG EXPECTED SEVERITY (Berapa kerugian rata-ratanya?)
# -----------------------------------------------------------------------------
# Ingat! Model keparahan kita adalah Lognormal Intercept-Only.
# Artinya, mesin menyimpulkan BIAYA KERUSAKAN ITU SAMA RATA untuk semua stasiun.
# Rumus aktuaria untuk nilai ekspektasi (rata-rata sesungguhnya) Lognormal adalah:
# E[Y] = exp(mu + (sigma^2) / 2)
mu <- coef(model_bi_sev_v2)[1] # Intercept (14.57531)
sigma_kuadrat <- summary(model_bi_sev_v2)$dispersion # Dispersion (1.432202)
rata_rata_keparahan <- exp(mu + (sigma_kuadrat / 2))
cat("Besaran Flat Severity (Biaya Rata-Rata per Klaim BI) : Rp", formatC(rata_rata_keparahan, format="f", big.mark=",", digits=2), "\n")
## Besaran Flat Severity (Biaya Rata-Rata per Klaim BI) : Rp 4,374,953.52
# Masukkan harga flat ini ke seluruh baris nasabah
bi_f$pred_sev <- rata_rata_keparahan
# -----------------------------------------------------------------------------
# 3. MENGHITUNG PURE PREMIUM (HARGA DASAR ASURANSI)
# -----------------------------------------------------------------------------
bi_f$pure_premium <- bi_f$pred_freq * bi_f$pred_sev
# Tampilkan 10 nasabah pertama dan harga premi mereka!
cat("\n--- BUKU TARIF (RATEBOOK) SEMENTARA UNTUK 10 STASIUN PERTAMA ---\n")
##
## --- BUKU TARIF (RATEBOOK) SEMENTARA UNTUK 10 STASIUN PERTAMA ---
tampilan_pricing <- bi_f[, c("energy_backup_score", "supply_chain_index", "maintenance_freq", "pred_freq", "pure_premium")]
print(head(tampilan_pricing, 10))
## # A tibble: 10 × 5
## energy_backup_score supply_chain_index maintenance_freq pred_freq
## <dbl> <dbl> <dbl> <dbl>
## 1 2 0.078 0 0.0988
## 2 2 0.962 5 0.0985
## 3 4 0.746 2 0.106
## 4 2 0.717 6 0.0952
## 5 2 0.881 0 0.105
## 6 2 0.363 0 0.101
## 7 3 0.312 5 0.0959
## 8 1 0.723 3 0.0971
## 9 1 0.826 4 0.0965
## 10 4 0.748 0 0.109
## # ℹ 1 more variable: pure_premium <dbl>
cat("\nPRICING SELESAI! Kolom 'pure_premium' sudah ditambahkan ke datamu.\n")
##
## PRICING SELESAI! Kolom 'pure_premium' sudah ditambahkan ke datamu.
Deducible dan Limit
# ==============================================================================
# TAHAP 4: SIMULASI DESAIN PRODUK (DEDUCTIBLE & LIMIT) - REVISI
# ==============================================================================
# Pastikan library actuar sudah di-load
library(actuar)
## Warning: package 'actuar' was built under R version 4.5.2
##
## Attaching package: 'actuar'
## The following objects are masked from 'package:stats':
##
## sd, var
## The following object is masked from 'package:grDevices':
##
## cm
cat("\n====================================================================\n")
##
## ====================================================================
cat("SIMULASI DESAIN PRODUK: BUSINESS INTERRUPTION (BI)\n")
## SIMULASI DESAIN PRODUK: BUSINESS INTERRUPTION (BI)
cat("====================================================================\n")
## ====================================================================
# 1. Masukkan Parameter Lognormal Murni dari Hasil GLM-mu
mu <- 14.57531
sigma_kuadrat <- 1.432202
sigma <- sqrt(sigma_kuadrat)
cat("Parameter Lognormal Terkunci -> Mu:", mu, "| Sigma:", sigma, "\n\n")
## Parameter Lognormal Terkunci -> Mu: 14.57531 | Sigma: 1.196746
# 2. Skenario Produk Asuransi
skenario <- data.frame(
Nama_Produk = c(
"1. Polos (Tanpa Potongan)",
"2. Ada Deductible (Risiko Sendiri Rp 1 Juta)",
"3. Ada Limit (Maksimal Klaim Rp 10 Juta)",
"4. Kombinasi (Ded 1 Juta, Limit 10 Juta)"
),
Deductible = c(0, 1000000, 0, 1000000),
Limit = c(Inf, Inf, 10000000, 10000000) # Inf = Infinity (Tak Terhingga)
)
# 3. Hitung Ulang Keparahan Menggunakan Fungsi levlnorm() yang BENAR
skenario$LEV_Limit <- levlnorm(skenario$Limit, meanlog = mu, sdlog = sigma)
skenario$LEV_Deduct <- levlnorm(skenario$Deductible, meanlog = mu, sdlog = sigma)
# Rata-rata UANG YANG KELUAR DARI KAS ASURANSI
skenario$Net_Severity_Asuransi <- skenario$LEV_Limit - skenario$LEV_Deduct
# 4. Tampilkan Hasil Desain Produk
tampilan_produk <- skenario[, c("Nama_Produk", "Deductible", "Limit", "Net_Severity_Asuransi")]
tampilan_produk$Deductible <- formatC(tampilan_produk$Deductible, format="f", big.mark=",", digits=0)
tampilan_produk$Limit <- formatC(tampilan_produk$Limit, format="f", big.mark=",", digits=0)
tampilan_produk$Net_Severity_Asuransi <- formatC(tampilan_produk$Net_Severity_Asuransi, format="f", big.mark=",", digits=2)
print(tampilan_produk)
## Nama_Produk Deductible Limit
## 1 1. Polos (Tanpa Potongan) 0 Inf
## 2 2. Ada Deductible (Risiko Sendiri Rp 1 Juta) 1,000,000 Inf
## 3 3. Ada Limit (Maksimal Klaim Rp 10 Juta) 0 10,000,000
## 4 4. Kombinasi (Ded 1 Juta, Limit 10 Juta) 1,000,000 10,000,000
## Net_Severity_Asuransi
## 1 4,374,973.41
## 2 3,491,149.62
## 3 3,335,264.89
## 4 2,451,441.09
cat("\n====================================================================\n")
##
## ====================================================================
cat("BAGAIMANA PENGARUHNYA KE PREMI NASABAH?\n")
## BAGAIMANA PENGARUHNYA KE PREMI NASABAH?
cat("====================================================================\n")
## ====================================================================
# Asumsi Stasiun A (Freq = 0.0952)
frekuensi_nasabah_A <- 0.0952
cat("Jika Stasiun A (Freq = 0.0952) membeli produk ini, maka Harga Pure Premium-nya:\n")
## Jika Stasiun A (Freq = 0.0952) membeli produk ini, maka Harga Pure Premium-nya:
for(i in 1:nrow(skenario)){
premi <- frekuensi_nasabah_A * skenario$Net_Severity_Asuransi[i]
cat(skenario$Nama_Produk[i], " -> Rp", formatC(premi, format="f", big.mark=",", digits=2), "\n")
}
## 1. Polos (Tanpa Potongan) -> Rp 416,497.47
## 2. Ada Deductible (Risiko Sendiri Rp 1 Juta) -> Rp 332,357.44
## 3. Ada Limit (Maksimal Klaim Rp 10 Juta) -> Rp 317,517.22
## 4. Kombinasi (Ded 1 Juta, Limit 10 Juta) -> Rp 233,377.19