############################################################
## 1. BACA DATA & PERBAIKI NAMA KOLOM
############################################################
# CSV dari Excel Indonesia biasanya pakai pemisah ';'
dat <- read.csv2(
"D:/leasing.csv",
header = TRUE,
check.names = FALSE,
stringsAsFactors = FALSE
)
# Cek nama kolom awal
names(dat)
## [1] "No" "Kategori" "Y" "Xl" "X2" "X3"
## [7] "X4" "XS" "X6" "X7" "X8" "X9"
## [13] "X10" "X11" "X12"
# Dari Excel: "No" "Kategori" "Y" "Xl" "X2" "X3" "X4" "XS" "X6" "X7" "X8" "X9" "X10" "X11" "X12"
# Ganti Xl -> X1, XS -> X5
names(dat)[names(dat) == "Xl"] <- "X1"
names(dat)[names(dat) == "XS"] <- "X5"
# Pastikan Y numerik, buat Yg = Y - 1 (untuk Geometric dengan support 0,1,2,...)
dat$Y <- as.numeric(dat$Y)
dat$Yg <- dat$Y - 1
# Pastikan variabel numerik benar-benar numeric
dat$No <- as.numeric(dat$No)
dat$X1 <- as.numeric(dat$X1) # usia
dat$X8 <- as.numeric(dat$X8) # lama menetap
dat$X9 <- as.numeric(dat$X9) # lama bekerja
############################################################
## 2. SET VARIABEL KATEGORI (FAKTOR)
############################################################
# Kategori (kalau mau dipakai suatu saat)
dat$Kategori <- factor(dat$Kategori)
# X2: Jenis kelamin (0=Pria, 1=Wanita)
dat$X2 <- factor(
dat$X2,
levels = c(0, 1),
labels = c("Pria", "Wanita")
)
# X3: Status perkawinan (0,1,2)
dat$X3 <- factor(
dat$X3,
levels = c(0, 1, 2),
labels = c("BelumMenikah", "Menikah", "Cerai")
)
# X4: Down Payment / DP
dat$X4 <- factor(
dat$X4,
levels = c(0, 1, 2, 3),
labels = c("<=20", "20.1-30", "30.1-50", ">50")
)
# X5: Tenor
dat$X5 <- factor(
dat$X5,
levels = c(0, 1, 2),
labels = c("0-24", "25-36", "37-48")
)
# X6: Pendidikan
dat$X6 <- factor(
dat$X6,
levels = c(0, 1, 2, 3),
labels = c("SD", "SLTP", "SLTA", "PT")
)
# X7: Status rumah
dat$X7 <- factor(
dat$X7,
levels = c(0, 1),
labels = c("Keluarga", "Sendiri")
)
# X10: Angsuran bulanan
dat$X10 <- factor(
dat$X10,
levels = c(0, 1, 2, 3),
labels = c("<=2jt", "2-3jt", "3-4jt", ">4jt")
)
# X11: Premi
dat$X11 <- factor(
dat$X11,
levels = c(0, 1, 2, 3),
labels = c("<=2jt", "2-3jt", "3-4jt", ">4jt")
)
# X12: Tipe pekerjaan (10 kategori)
dat$X12 <- factor(
dat$X12,
levels = 0:9,
labels = c(
"Guru", "KaryawanSwasta", "PNS",
"Pengusaha", "Petani", "Pensiunan",
"MahasiswaPelajar", "IRT", "PegawaiBUMN", "Lainnya"
)
)
# Cek struktur akhir (respon + X1–X12)
str(dat[, c("Yg", paste0("X", 1:12))])
## 'data.frame': 336 obs. of 13 variables:
## $ Yg : num 1 2 2 2 2 2 3 3 3 3 ...
## $ X1 : num 40 45 27 47 40 32 45 25 38 39 ...
## $ X2 : Factor w/ 2 levels "Pria","Wanita": 1 1 1 1 1 1 2 1 1 1 ...
## $ X3 : Factor w/ 3 levels "BelumMenikah",..: 2 2 2 2 2 2 2 1 2 2 ...
## $ X4 : Factor w/ 4 levels "<=20","20.1-30",..: 2 2 2 2 2 2 2 2 3 2 ...
## $ X5 : Factor w/ 3 levels "0-24","25-36",..: 3 1 3 3 2 3 1 3 2 2 ...
## $ X6 : Factor w/ 4 levels "SD","SLTP","SLTA",..: 2 4 1 3 2 3 3 3 4 4 ...
## $ X7 : Factor w/ 2 levels "Keluarga","Sendiri": 1 1 2 2 2 2 2 1 2 2 ...
## $ X8 : num 10 5 3 45 5 32 19 25 10 8 ...
## $ X9 : num 1 1 0 3 0 6 0 0 1 5 ...
## $ X10: Factor w/ 4 levels "<=2jt","2-3jt",..: 4 1 4 3 3 4 4 4 2 4 ...
## $ X11: Factor w/ 4 levels "<=2jt","2-3jt",..: 4 1 4 4 4 4 4 4 4 4 ...
## $ X12: Factor w/ 10 levels "Guru","KaryawanSwasta",..: 4 4 4 4 4 4 4 2 4 2 ...
############################################################
## 3. BERSIHKAN DATA (HAPUS NA SEBELUM PEMODELAN)
############################################################
dat <- dat[complete.cases(dat[, c("Yg", paste0("X", 1:12))]), ]
############################################################
## 4. UJI OVERDISPERSI (POISSON) – VALIDASI PAKAI GEOMETRIC
############################################################
mod_pois <- glm(
Yg ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 +
X8 + X9 + X10 + X11 + X12,
family = poisson(link = "log"),
data = dat
)
dispersiontest(mod_pois)
##
## Overdispersion test
##
## data: mod_pois
## z = 11, p-value < 2.2e-16
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion
## 4.877605
# Jika p-value sangat kecil & dispersion > 1 → overdispersi → Geometric lebih cocok
############################################################
## 5. REGRESI GEOMETRIK (FULL MODEL) – LINK BENAR
############################################################
mod_full <- vglm(
Yg ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 +
X8 + X9 + X10 + X11 + X12,
family = geometric(link = "logitlink"), # BUKAN "log"
data = dat
)
summary(mod_full)
##
## Call:
## vglm(formula = Yg ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 +
## X10 + X11 + X12, family = geometric(link = "logitlink"),
## data = dat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.7110978 0.8497621 -2.014 0.044 *
## X1 0.0018654 0.0093183 0.200 0.841
## X2Wanita -0.0443132 0.1972398 -0.225 0.822
## X3Menikah -0.2422605 0.2221942 -1.090 0.276
## X3Cerai -0.1351978 0.3528233 -0.383 0.702
## X420.1-30 -0.5337759 0.4561845 -1.170 0.242
## X430.1-50 -0.6064751 0.4756521 -1.275 0.202
## X4>50 -0.4631805 0.6646531 -0.697 0.486
## X525-36 -0.2577941 0.2300541 -1.121 0.262
## X537-48 -0.3308859 0.2713462 -1.219 0.223
## X6SLTP -0.0123001 0.2063055 -0.060 0.952
## X6SLTA 0.1020464 0.1652110 0.618 0.537
## X6PT 0.0766652 0.1967650 0.390 0.697
## X7Sendiri -0.0799826 0.1394525 -0.574 0.566
## X8 -0.0066830 0.0059591 -1.121 0.262
## X9 0.0005706 0.0112714 0.051 0.960
## X102-3jt 0.1516316 0.1754449 0.864 0.387
## X103-4jt 0.1767444 0.2325613 0.760 0.447
## X10>4jt -0.0112781 0.2421106 -0.047 0.963
## X112-3jt 0.1805234 0.1992094 0.906 0.365
## X113-4jt -0.0363407 0.2118582 -0.172 0.864
## X11>4jt 0.0874703 0.2335412 0.375 0.708
## X12KaryawanSwasta -0.2001489 0.5801275 -0.345 0.730
## X12PNS -0.3237058 0.7706743 -0.420 0.674
## X12Pengusaha -0.0277984 0.5747180 -0.048 0.961
## X12Petani 0.4200062 0.9630544 0.436 0.663
## X12Pensiunan 0.6984904 1.2543918 0.557 0.578
## X12MahasiswaPelajar -0.6045491 0.6831834 -0.885 0.376
## X12IRT -0.1902249 0.6511829 -0.292 0.770
## X12PegawaiBUMN -0.9347347 1.1884344 -0.787 0.432
## X12Lainnya 0.1003382 0.7846712 0.128 0.898
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Name of linear predictor: logitlink(prob)
##
## Log-likelihood: -1251.533 on 303 degrees of freedom
##
## Number of Fisher scoring iterations: 5
##
## No Hauck-Donner effect found in any of the estimates
############################################################
## 6. STEPWISE SELEKSI MODEL (VGAM: step4vglm)
############################################################
mod_step <- step4vglm(
mod_full,
trace = TRUE # tampilkan proses penghapusan/penambahan
)
## Start: AIC=2565.07
## Yg ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 +
## X12
##
## Df logLik AIC
## - X12 9 -1254.0 2552.1
## - X6 3 -1251.8 2559.7
## - X11 3 -1252.0 2560.1
## - X4 3 -1252.3 2560.5
## - X10 3 -1252.3 2560.7
## - X3 2 -1252.2 2562.3
## - X5 2 -1252.2 2562.5
## - X9 1 -1251.5 2563.1
## - X1 1 -1251.5 2563.1
## - X2 1 -1251.6 2563.1
## - X7 1 -1251.7 2563.4
## - X8 1 -1252.1 2564.3
## <none> -1251.5 2565.1
##
## Step: AIC=2552.08
## Yg ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11
##
## Df logLik AIC
## - X6 3 -1254.4 2546.8
## - X11 3 -1254.4 2546.9
## - X4 3 -1254.8 2547.5
## - X10 3 -1255.0 2548.1
## - X3 2 -1254.5 2549.0
## - X5 2 -1255.0 2549.9
## - X2 1 -1254.0 2550.1
## - X7 1 -1254.2 2550.4
## - X1 1 -1254.2 2550.4
## - X9 1 -1254.2 2550.4
## - X8 1 -1254.5 2551.1
## <none> -1254.0 2552.1
##
## Step: AIC=2546.78
## Yg ~ X1 + X2 + X3 + X4 + X5 + X7 + X8 + X9 + X10 + X11
##
## Df logLik AIC
## - X11 3 -1254.8 2541.5
## - X4 3 -1255.1 2542.2
## - X10 3 -1255.4 2542.8
## - X3 2 -1254.8 2543.6
## - X5 2 -1255.4 2544.8
## - X2 1 -1254.4 2544.8
## - X1 1 -1254.5 2545.1
## - X7 1 -1254.5 2545.1
## - X9 1 -1254.6 2545.1
## - X8 1 -1254.9 2545.8
## <none> -1254.4 2546.8
##
## Step: AIC=2541.52
## Yg ~ X1 + X2 + X3 + X4 + X5 + X7 + X8 + X9 + X10
##
## Df logLik AIC
## - X4 3 -1255.5 2536.9
## - X10 3 -1255.8 2537.5
## - X3 2 -1255.2 2538.4
## - X2 1 -1254.8 2539.5
## - X7 1 -1254.9 2539.7
## - X1 1 -1254.9 2539.8
## - X5 2 -1255.9 2539.8
## - X9 1 -1255.0 2540.1
## - X8 1 -1255.2 2540.5
## <none> -1254.8 2541.5
##
## Step: AIC=2536.92
## Yg ~ X1 + X2 + X3 + X5 + X7 + X8 + X9 + X10
##
## Df logLik AIC
## - X10 3 -1256.7 2533.3
## - X3 2 -1256.0 2534.0
## - X2 1 -1255.5 2534.9
## - X7 1 -1255.5 2535.1
## - X1 1 -1255.6 2535.3
## - X5 2 -1256.7 2535.3
## - X9 1 -1255.8 2535.6
## - X8 1 -1256.0 2536.1
## <none> -1255.5 2536.9
##
## Step: AIC=2533.31
## Yg ~ X1 + X2 + X3 + X5 + X7 + X8 + X9
##
## Df logLik AIC
## - X3 2 -1257.2 2530.3
## - X2 1 -1256.7 2531.3
## - X7 1 -1256.7 2531.4
## - X1 1 -1256.8 2531.6
## - X5 2 -1258.0 2531.9
## - X9 1 -1257.1 2532.2
## - X8 1 -1257.1 2532.2
## <none> -1256.7 2533.3
##
## Step: AIC=2530.33
## Yg ~ X1 + X2 + X5 + X7 + X8 + X9
##
## Df logLik AIC
## - X2 1 -1257.2 2528.3
## - X1 1 -1257.2 2528.4
## - X7 1 -1257.2 2528.5
## - X8 1 -1257.5 2528.9
## - X9 1 -1257.5 2529.0
## - X5 2 -1258.6 2529.1
## <none> -1257.2 2530.3
##
## Step: AIC=2528.34
## Yg ~ X1 + X5 + X7 + X8 + X9
##
## Df logLik AIC
## - X1 1 -1257.2 2526.4
## - X7 1 -1257.2 2526.5
## - X8 1 -1257.5 2526.9
## - X9 1 -1257.5 2527.0
## - X5 2 -1258.6 2527.1
## <none> -1257.2 2528.3
##
## Step: AIC=2526.41
## Yg ~ X5 + X7 + X8 + X9
##
## Df logLik AIC
## - X7 1 -1257.2 2524.5
## - X8 1 -1257.5 2524.9
## - X9 1 -1257.5 2525.0
## - X5 2 -1258.6 2525.2
## <none> -1257.2 2526.4
##
## Step: AIC=2524.48
## Yg ~ X5 + X8 + X9
##
## Df logLik AIC
## - X8 1 -1257.5 2523.0
## - X9 1 -1257.6 2523.2
## - X5 2 -1258.6 2523.3
## <none> -1257.2 2524.5
##
## Step: AIC=2522.96
## Yg ~ X5 + X9
##
## Df logLik AIC
## - X9 1 -1257.8 2521.7
## - X5 2 -1258.9 2521.8
## <none> -1257.5 2523.0
##
## Step: AIC=2521.69
## Yg ~ X5
##
## Df logLik AIC
## - X5 2 -1259.2 2520.3
## <none> -1257.8 2521.7
##
## Step: AIC=2520.35
## Yg ~ 1
summary(mod_step)
##
## Call:
## vglm(formula = Yg ~ 1, family = geometric(link = "logitlink"),
## data = dat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.73833 0.05646 -48.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Name of linear predictor: logitlink(prob)
##
## Log-likelihood: -1259.177 on 333 degrees of freedom
##
## Number of Fisher scoring iterations: 2
##
## No Hauck-Donner effect found in any of the estimates
############################################################
## 7. ODDS RATIO & INTERVAL KEPERCAYAAN
############################################################
coef_mod <- coef(mod_step)
OR <- exp(coef_mod)
# CI 95% dari VGAM, lalu di-exp untuk OR
CI <- exp(confintvglm(mod_step))
OR_table <- data.frame(
Variabel = names(OR),
OR = round(OR, 4),
CI_Lower = round(CI[, 1], 4),
CI_Upper = round(CI[, 2], 4)
)
print(OR_table)
## Variabel OR CI_Lower CI_Upper
## (Intercept) (Intercept) 0.0647 0.0579 0.0722
mod_full <- vglm(
Yg ~ X5,
family = geometric(link = "logitlink"), # BUKAN "log"
data = dat
)
summary(mod_full)
##
## Call:
## vglm(formula = Yg ~ X5, family = geometric(link = "logitlink"),
## data = dat)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.4481 0.1903 -12.864 <2e-16 ***
## X525-36 -0.2862 0.2050 -1.396 0.1628
## X537-48 -0.3565 0.2121 -1.681 0.0928 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Name of linear predictor: logitlink(prob)
##
## Log-likelihood: -1257.847 on 331 degrees of freedom
##
## Number of Fisher scoring iterations: 4
##
## No Hauck-Donner effect found in any of the estimates