############################################################
## 1. BACA DATA & PERBAIKI NAMA KOLOM
############################################################

# CSV dari Excel Indonesia biasanya pakai pemisah ';'
dat <- read.csv2(
  "D:/leasing.csv",
  header = TRUE,
  check.names = FALSE,
  stringsAsFactors = FALSE
)

# Cek nama kolom awal
names(dat)
##  [1] "No"       "Kategori" "Y"        "Xl"       "X2"       "X3"      
##  [7] "X4"       "XS"       "X6"       "X7"       "X8"       "X9"      
## [13] "X10"      "X11"      "X12"
# Dari Excel: "No" "Kategori" "Y" "Xl" "X2" "X3" "X4" "XS" "X6" "X7" "X8" "X9" "X10" "X11" "X12"

# Ganti Xl -> X1, XS -> X5
names(dat)[names(dat) == "Xl"] <- "X1"
names(dat)[names(dat) == "XS"] <- "X5"

# Pastikan Y numerik, buat Yg = Y - 1 (untuk Geometric dengan support 0,1,2,...)
dat$Y  <- as.numeric(dat$Y)
dat$Yg <- dat$Y - 1

# Pastikan variabel numerik benar-benar numeric
dat$No <- as.numeric(dat$No)
dat$X1 <- as.numeric(dat$X1)   # usia
dat$X8 <- as.numeric(dat$X8)   # lama menetap
dat$X9 <- as.numeric(dat$X9)   # lama bekerja
############################################################
## 2. SET VARIABEL KATEGORI (FAKTOR)
############################################################

# Kategori (kalau mau dipakai suatu saat)
dat$Kategori <- factor(dat$Kategori)

# X2: Jenis kelamin (0=Pria, 1=Wanita)
dat$X2 <- factor(
  dat$X2,
  levels = c(0, 1),
  labels = c("Pria", "Wanita")
)

# X3: Status perkawinan (0,1,2)
dat$X3 <- factor(
  dat$X3,
  levels = c(0, 1, 2),
  labels = c("BelumMenikah", "Menikah", "Cerai")
)

# X4: Down Payment / DP
dat$X4 <- factor(
  dat$X4,
  levels = c(0, 1, 2, 3),
  labels = c("<=20", "20.1-30", "30.1-50", ">50")
)

# X5: Tenor
dat$X5 <- factor(
  dat$X5,
  levels = c(0, 1, 2),
  labels = c("0-24", "25-36", "37-48")
)

# X6: Pendidikan
dat$X6 <- factor(
  dat$X6,
  levels = c(0, 1, 2, 3),
  labels = c("SD", "SLTP", "SLTA", "PT")
)

# X7: Status rumah
dat$X7 <- factor(
  dat$X7,
  levels = c(0, 1),
  labels = c("Keluarga", "Sendiri")
)

# X10: Angsuran bulanan
dat$X10 <- factor(
  dat$X10,
  levels = c(0, 1, 2, 3),
  labels = c("<=2jt", "2-3jt", "3-4jt", ">4jt")
)

# X11: Premi
dat$X11 <- factor(
  dat$X11,
  levels = c(0, 1, 2, 3),
  labels = c("<=2jt", "2-3jt", "3-4jt", ">4jt")
)

# X12: Tipe pekerjaan (10 kategori)
dat$X12 <- factor(
  dat$X12,
  levels = 0:9,
  labels = c(
    "Guru", "KaryawanSwasta", "PNS",
    "Pengusaha", "Petani", "Pensiunan",
    "MahasiswaPelajar", "IRT", "PegawaiBUMN", "Lainnya"
  )
)

# Cek struktur akhir (respon + X1–X12)
str(dat[, c("Yg", paste0("X", 1:12))])
## 'data.frame':    336 obs. of  13 variables:
##  $ Yg : num  1 2 2 2 2 2 3 3 3 3 ...
##  $ X1 : num  40 45 27 47 40 32 45 25 38 39 ...
##  $ X2 : Factor w/ 2 levels "Pria","Wanita": 1 1 1 1 1 1 2 1 1 1 ...
##  $ X3 : Factor w/ 3 levels "BelumMenikah",..: 2 2 2 2 2 2 2 1 2 2 ...
##  $ X4 : Factor w/ 4 levels "<=20","20.1-30",..: 2 2 2 2 2 2 2 2 3 2 ...
##  $ X5 : Factor w/ 3 levels "0-24","25-36",..: 3 1 3 3 2 3 1 3 2 2 ...
##  $ X6 : Factor w/ 4 levels "SD","SLTP","SLTA",..: 2 4 1 3 2 3 3 3 4 4 ...
##  $ X7 : Factor w/ 2 levels "Keluarga","Sendiri": 1 1 2 2 2 2 2 1 2 2 ...
##  $ X8 : num  10 5 3 45 5 32 19 25 10 8 ...
##  $ X9 : num  1 1 0 3 0 6 0 0 1 5 ...
##  $ X10: Factor w/ 4 levels "<=2jt","2-3jt",..: 4 1 4 3 3 4 4 4 2 4 ...
##  $ X11: Factor w/ 4 levels "<=2jt","2-3jt",..: 4 1 4 4 4 4 4 4 4 4 ...
##  $ X12: Factor w/ 10 levels "Guru","KaryawanSwasta",..: 4 4 4 4 4 4 4 2 4 2 ...
############################################################
## 3. BERSIHKAN DATA (HAPUS NA SEBELUM PEMODELAN)
############################################################

dat <- dat[complete.cases(dat[, c("Yg", paste0("X", 1:12))]), ]

############################################################
## 4. UJI OVERDISPERSI (POISSON) – VALIDASI PAKAI GEOMETRIC
############################################################

mod_pois <- glm(
  Yg ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 +
        X8 + X9 + X10 + X11 + X12,
  family = poisson(link = "log"),
  data   = dat
)

dispersiontest(mod_pois)
## 
##  Overdispersion test
## 
## data:  mod_pois
## z = 11, p-value < 2.2e-16
## alternative hypothesis: true dispersion is greater than 1
## sample estimates:
## dispersion 
##   4.877605
# Jika p-value sangat kecil & dispersion > 1 → overdispersi → Geometric lebih cocok

############################################################
## 5. REGRESI GEOMETRIK (FULL MODEL) – LINK BENAR
############################################################

mod_full <- vglm(
  Yg ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 +
        X8 + X9 + X10 + X11 + X12,
  family = geometric(link = "logitlink"),  # BUKAN "log"
  data   = dat
)

summary(mod_full)
## 
## Call:
## vglm(formula = Yg ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + 
##     X10 + X11 + X12, family = geometric(link = "logitlink"), 
##     data = dat)
## 
## Coefficients: 
##                       Estimate Std. Error z value Pr(>|z|)  
## (Intercept)         -1.7110978  0.8497621  -2.014    0.044 *
## X1                   0.0018654  0.0093183   0.200    0.841  
## X2Wanita            -0.0443132  0.1972398  -0.225    0.822  
## X3Menikah           -0.2422605  0.2221942  -1.090    0.276  
## X3Cerai             -0.1351978  0.3528233  -0.383    0.702  
## X420.1-30           -0.5337759  0.4561845  -1.170    0.242  
## X430.1-50           -0.6064751  0.4756521  -1.275    0.202  
## X4>50               -0.4631805  0.6646531  -0.697    0.486  
## X525-36             -0.2577941  0.2300541  -1.121    0.262  
## X537-48             -0.3308859  0.2713462  -1.219    0.223  
## X6SLTP              -0.0123001  0.2063055  -0.060    0.952  
## X6SLTA               0.1020464  0.1652110   0.618    0.537  
## X6PT                 0.0766652  0.1967650   0.390    0.697  
## X7Sendiri           -0.0799826  0.1394525  -0.574    0.566  
## X8                  -0.0066830  0.0059591  -1.121    0.262  
## X9                   0.0005706  0.0112714   0.051    0.960  
## X102-3jt             0.1516316  0.1754449   0.864    0.387  
## X103-4jt             0.1767444  0.2325613   0.760    0.447  
## X10>4jt             -0.0112781  0.2421106  -0.047    0.963  
## X112-3jt             0.1805234  0.1992094   0.906    0.365  
## X113-4jt            -0.0363407  0.2118582  -0.172    0.864  
## X11>4jt              0.0874703  0.2335412   0.375    0.708  
## X12KaryawanSwasta   -0.2001489  0.5801275  -0.345    0.730  
## X12PNS              -0.3237058  0.7706743  -0.420    0.674  
## X12Pengusaha        -0.0277984  0.5747180  -0.048    0.961  
## X12Petani            0.4200062  0.9630544   0.436    0.663  
## X12Pensiunan         0.6984904  1.2543918   0.557    0.578  
## X12MahasiswaPelajar -0.6045491  0.6831834  -0.885    0.376  
## X12IRT              -0.1902249  0.6511829  -0.292    0.770  
## X12PegawaiBUMN      -0.9347347  1.1884344  -0.787    0.432  
## X12Lainnya           0.1003382  0.7846712   0.128    0.898  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Name of linear predictor: logitlink(prob) 
## 
## Log-likelihood: -1251.533 on 303 degrees of freedom
## 
## Number of Fisher scoring iterations: 5 
## 
## No Hauck-Donner effect found in any of the estimates
############################################################
## 6. STEPWISE SELEKSI MODEL (VGAM: step4vglm)
############################################################

mod_step <- step4vglm(
  mod_full,
  trace = TRUE   # tampilkan proses penghapusan/penambahan
)
## Start:  AIC=2565.07
## Yg ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11 + 
##     X12
## 
##        Df  logLik    AIC
## - X12   9 -1254.0 2552.1
## - X6    3 -1251.8 2559.7
## - X11   3 -1252.0 2560.1
## - X4    3 -1252.3 2560.5
## - X10   3 -1252.3 2560.7
## - X3    2 -1252.2 2562.3
## - X5    2 -1252.2 2562.5
## - X9    1 -1251.5 2563.1
## - X1    1 -1251.5 2563.1
## - X2    1 -1251.6 2563.1
## - X7    1 -1251.7 2563.4
## - X8    1 -1252.1 2564.3
## <none>    -1251.5 2565.1
## 
## Step:  AIC=2552.08
## Yg ~ X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9 + X10 + X11
## 
##        Df  logLik    AIC
## - X6    3 -1254.4 2546.8
## - X11   3 -1254.4 2546.9
## - X4    3 -1254.8 2547.5
## - X10   3 -1255.0 2548.1
## - X3    2 -1254.5 2549.0
## - X5    2 -1255.0 2549.9
## - X2    1 -1254.0 2550.1
## - X7    1 -1254.2 2550.4
## - X1    1 -1254.2 2550.4
## - X9    1 -1254.2 2550.4
## - X8    1 -1254.5 2551.1
## <none>    -1254.0 2552.1
## 
## Step:  AIC=2546.78
## Yg ~ X1 + X2 + X3 + X4 + X5 + X7 + X8 + X9 + X10 + X11
## 
##        Df  logLik    AIC
## - X11   3 -1254.8 2541.5
## - X4    3 -1255.1 2542.2
## - X10   3 -1255.4 2542.8
## - X3    2 -1254.8 2543.6
## - X5    2 -1255.4 2544.8
## - X2    1 -1254.4 2544.8
## - X1    1 -1254.5 2545.1
## - X7    1 -1254.5 2545.1
## - X9    1 -1254.6 2545.1
## - X8    1 -1254.9 2545.8
## <none>    -1254.4 2546.8
## 
## Step:  AIC=2541.52
## Yg ~ X1 + X2 + X3 + X4 + X5 + X7 + X8 + X9 + X10
## 
##        Df  logLik    AIC
## - X4    3 -1255.5 2536.9
## - X10   3 -1255.8 2537.5
## - X3    2 -1255.2 2538.4
## - X2    1 -1254.8 2539.5
## - X7    1 -1254.9 2539.7
## - X1    1 -1254.9 2539.8
## - X5    2 -1255.9 2539.8
## - X9    1 -1255.0 2540.1
## - X8    1 -1255.2 2540.5
## <none>    -1254.8 2541.5
## 
## Step:  AIC=2536.92
## Yg ~ X1 + X2 + X3 + X5 + X7 + X8 + X9 + X10
## 
##        Df  logLik    AIC
## - X10   3 -1256.7 2533.3
## - X3    2 -1256.0 2534.0
## - X2    1 -1255.5 2534.9
## - X7    1 -1255.5 2535.1
## - X1    1 -1255.6 2535.3
## - X5    2 -1256.7 2535.3
## - X9    1 -1255.8 2535.6
## - X8    1 -1256.0 2536.1
## <none>    -1255.5 2536.9
## 
## Step:  AIC=2533.31
## Yg ~ X1 + X2 + X3 + X5 + X7 + X8 + X9
## 
##        Df  logLik    AIC
## - X3    2 -1257.2 2530.3
## - X2    1 -1256.7 2531.3
## - X7    1 -1256.7 2531.4
## - X1    1 -1256.8 2531.6
## - X5    2 -1258.0 2531.9
## - X9    1 -1257.1 2532.2
## - X8    1 -1257.1 2532.2
## <none>    -1256.7 2533.3
## 
## Step:  AIC=2530.33
## Yg ~ X1 + X2 + X5 + X7 + X8 + X9
## 
##        Df  logLik    AIC
## - X2    1 -1257.2 2528.3
## - X1    1 -1257.2 2528.4
## - X7    1 -1257.2 2528.5
## - X8    1 -1257.5 2528.9
## - X9    1 -1257.5 2529.0
## - X5    2 -1258.6 2529.1
## <none>    -1257.2 2530.3
## 
## Step:  AIC=2528.34
## Yg ~ X1 + X5 + X7 + X8 + X9
## 
##        Df  logLik    AIC
## - X1    1 -1257.2 2526.4
## - X7    1 -1257.2 2526.5
## - X8    1 -1257.5 2526.9
## - X9    1 -1257.5 2527.0
## - X5    2 -1258.6 2527.1
## <none>    -1257.2 2528.3
## 
## Step:  AIC=2526.41
## Yg ~ X5 + X7 + X8 + X9
## 
##        Df  logLik    AIC
## - X7    1 -1257.2 2524.5
## - X8    1 -1257.5 2524.9
## - X9    1 -1257.5 2525.0
## - X5    2 -1258.6 2525.2
## <none>    -1257.2 2526.4
## 
## Step:  AIC=2524.48
## Yg ~ X5 + X8 + X9
## 
##        Df  logLik    AIC
## - X8    1 -1257.5 2523.0
## - X9    1 -1257.6 2523.2
## - X5    2 -1258.6 2523.3
## <none>    -1257.2 2524.5
## 
## Step:  AIC=2522.96
## Yg ~ X5 + X9
## 
##        Df  logLik    AIC
## - X9    1 -1257.8 2521.7
## - X5    2 -1258.9 2521.8
## <none>    -1257.5 2523.0
## 
## Step:  AIC=2521.69
## Yg ~ X5
## 
##        Df  logLik    AIC
## - X5    2 -1259.2 2520.3
## <none>    -1257.8 2521.7
## 
## Step:  AIC=2520.35
## Yg ~ 1
summary(mod_step)
## 
## Call:
## vglm(formula = Yg ~ 1, family = geometric(link = "logitlink"), 
##     data = dat)
## 
## Coefficients: 
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.73833    0.05646   -48.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Name of linear predictor: logitlink(prob) 
## 
## Log-likelihood: -1259.177 on 333 degrees of freedom
## 
## Number of Fisher scoring iterations: 2 
## 
## No Hauck-Donner effect found in any of the estimates
############################################################
## 7. ODDS RATIO & INTERVAL KEPERCAYAAN
############################################################

coef_mod <- coef(mod_step)
OR       <- exp(coef_mod)

# CI 95% dari VGAM, lalu di-exp untuk OR
CI <- exp(confintvglm(mod_step))

OR_table <- data.frame(
  Variabel = names(OR),
  OR       = round(OR, 4),
  CI_Lower = round(CI[, 1], 4),
  CI_Upper = round(CI[, 2], 4)
)

print(OR_table)
##                Variabel     OR CI_Lower CI_Upper
## (Intercept) (Intercept) 0.0647   0.0579   0.0722
mod_full <- vglm(
  Yg ~  X5,
  family = geometric(link = "logitlink"),  # BUKAN "log"
  data   = dat
)

summary(mod_full)
## 
## Call:
## vglm(formula = Yg ~ X5, family = geometric(link = "logitlink"), 
##     data = dat)
## 
## Coefficients: 
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -2.4481     0.1903 -12.864   <2e-16 ***
## X525-36      -0.2862     0.2050  -1.396   0.1628    
## X537-48      -0.3565     0.2121  -1.681   0.0928 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Name of linear predictor: logitlink(prob) 
## 
## Log-likelihood: -1257.847 on 331 degrees of freedom
## 
## Number of Fisher scoring iterations: 4 
## 
## No Hauck-Donner effect found in any of the estimates