library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data_german <- read.csv("C:/Users/Asus/Downloads/german_credit.csv", sep=",",stringsAsFactors = TRUE)
glimpse(data_german)
## Rows: 1,000
## Columns: 21
## $ Creditability      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ Account_Balance    <int> 1, 1, 2, 1, 1, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 1, ~
## $ Duration_Credit    <int> 18, 9, 12, 12, 12, 10, 8, 6, 18, 24, 11, 30, 6, 48,~
## $ Credit_History     <int> 4, 4, 2, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 3, 2, 2, 4, ~
## $ Purpose            <int> 2, 0, 9, 0, 0, 0, 0, 0, 3, 3, 0, 1, 3, 10, 3, 3, 0,~
## $ Credit_Amount      <int> 1049, 2799, 841, 2122, 2171, 2241, 3398, 1361, 1098~
## $ Savings            <int> 1, 1, 2, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 2, 5, 3, 1, ~
## $ Length_employment  <int> 2, 3, 4, 3, 3, 2, 4, 2, 1, 1, 3, 4, 4, 1, 4, 3, 3, ~
## $ Instalment_percent <int> 4, 2, 2, 3, 4, 1, 1, 2, 4, 1, 2, 1, 1, 2, 2, 2, 1, ~
## $ Sex_Marital        <int> 2, 3, 2, 3, 3, 3, 3, 3, 2, 2, 3, 4, 2, 3, 4, 3, 3, ~
## $ Guarantors         <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ Duration_address   <int> 4, 2, 4, 2, 4, 3, 4, 4, 4, 4, 2, 4, 4, 4, 4, 3, 2, ~
## $ Valuable_asset     <int> 2, 1, 1, 1, 2, 1, 1, 1, 3, 4, 1, 3, 3, 4, 3, 1, 1, ~
## $ Age                <int> 21, 36, 23, 39, 38, 48, 39, 40, 65, 23, 36, 24, 31,~
## $ Concurrent_Credits <int> 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ~
## $ Type_apartment     <int> 1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 1, 1, 2, ~
## $ Exist_Credits      <int> 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 2, ~
## $ Occupation         <int> 3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 3, 3, 3, 4, 2, 3, 2, ~
## $ dependents         <int> 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, ~
## $ Telephone          <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, ~
## $ Foreign            <int> 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
data_german$Creditability <- as.factor(data_german$Creditability)

data_german$Account_Balance <- as.factor(data_german$Account_Balance)

data_german$Credit_History <- as.factor(data_german$Credit_History)

data_german$Purpose <- as.factor(data_german$Purpose)

data_german$Savings <- as.factor(data_german$Savings)

data_german$Length_employment <- as.factor(data_german$Length_employment)

data_german$Sex_Marital <- as.factor(data_german$Sex_Marital)

data_german$Guarantors <- as.factor(data_german$Guarantors)

data_german$Valuable_asset <- as.factor(data_german$Valuable_asset)

data_german$Type_apartment <- as.factor(data_german$Type_apartment)

data_german$Occupation <- as.factor(data_german$Occupation)

data_german$Telephone <- as.factor(data_german$Telephone)

data_german$Foreign <- as.factor(data_german$Foreign)

summary(data_german)
##  Creditability Account_Balance Duration_Credit Credit_History    Purpose   
##  0:300         1:274           Min.   : 4.0    0: 40          3      :280  
##  1:700         2:269           1st Qu.:12.0    1: 49          0      :234  
##                3: 63           Median :18.0    2:530          2      :181  
##                4:394           Mean   :20.9    3: 88          1      :103  
##                                3rd Qu.:24.0    4:293          9      : 97  
##                                Max.   :72.0                   6      : 50  
##                                                               (Other): 55  
##  Credit_Amount   Savings Length_employment Instalment_percent Sex_Marital
##  Min.   :  250   1:603   1: 62             Min.   :1.000      1: 50      
##  1st Qu.: 1366   2:103   2:172             1st Qu.:2.000      2:310      
##  Median : 2320   3: 63   3:339             Median :3.000      3:548      
##  Mean   : 3271   4: 48   4:174             Mean   :2.973      4: 92      
##  3rd Qu.: 3972   5:183   5:253             3rd Qu.:4.000                 
##  Max.   :18424                             Max.   :4.000                 
##                                                                          
##  Guarantors Duration_address Valuable_asset      Age        Concurrent_Credits
##  1:907      Min.   :1.000    1:282          Min.   :19.00   Min.   :1.000     
##  2: 41      1st Qu.:2.000    2:232          1st Qu.:27.00   1st Qu.:3.000     
##  3: 52      Median :3.000    3:332          Median :33.00   Median :3.000     
##             Mean   :2.845    4:154          Mean   :35.54   Mean   :2.675     
##             3rd Qu.:4.000                   3rd Qu.:42.00   3rd Qu.:3.000     
##             Max.   :4.000                   Max.   :75.00   Max.   :3.000     
##                                                                               
##  Type_apartment Exist_Credits   Occupation   dependents    Telephone Foreign
##  1:179          Min.   :1.000   1: 22      Min.   :1.000   1:596     1:963  
##  2:714          1st Qu.:1.000   2:200      1st Qu.:1.000   2:404     2: 37  
##  3:107          Median :1.000   3:630      Median :1.000                    
##                 Mean   :1.407   4:148      Mean   :1.155                    
##                 3rd Qu.:2.000              3rd Qu.:1.000                    
##                 Max.   :4.000              Max.   :2.000                    
## 
#library(discretization)
#disk.chim <- chiM(cbind(x=data_german$Duration_Credit,class=data_german$Creditability), 0.01)


#binning variabel 
#data_german$DurationCreditGroup <- cut(data_german$Duration_Credit, breaks = unlist(disk.chim$cutp), labels=1:2, include.lowest = T)
#install.packages("woeBinning")
#install.packages("blorr")
#install.packages("woebin")
library(InformationValue)
library(woeBinning)
library(blorr)
#library(woebin)
IV.data <- data.frame(
  variabel = c(names(data_german)),
  IV = c(0:20),
  Predictiveness = c(0:20)
)
binning <- woe.binning(data_german,'Creditability', 'Duration_Credit', event.class='0')
IV.data[3,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Duration_Credit <- df.with.binned.vars.added[,23]/100
binning
## [[1]]
## [1] "Duration_Credit"
## 
## [[2]]
##                 woe cutpoints.final cutpoints.final[-1] iv.total.final   1   0
## (-Inf,6]  124.59370            -Inf                   6      0.2537678  73   9
## (6,15]     36.53869               6                  15      0.2537678 269  80
## (15,30]   -10.83411              15                  30      0.2537678 268 128
## (30, Inf] -76.63288              30                 Inf      0.2537678  90  83
## Missing          NA             Inf             Missing      0.2537678   0   0
##           col.perc.a col.perc.b     iv.bins
## (-Inf,6]   0.1042857  0.0300000 0.092555320
## (6,15]     0.3842857  0.2666667 0.042976457
## (15,30]    0.3828571  0.4266667 0.004746374
## (30, Inf]  0.1285714  0.2766667 0.113489646
## Missing    0.0000000  0.0000000          NA
## 
## [[3]]
## iv.total.final 
##      0.2537678
binning <- woe.binning(data_german,'Creditability','Credit_Amount', event.class='0')
IV.data[6,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Credit_Amount <- df.with.binned.vars.added[,23]/100

binning <- woe.binning(data_german,'Creditability','Instalment_percent', event.class='0')
IV.data[9,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Instalment_percent <- df.with.binned.vars.added[,23]/100

binning <- woe.binning(data_german,'Creditability','Duration_address', event.class='0')
IV.data[12,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Duration_address <- df.with.binned.vars.added[,23]/100

binning <- woe.binning(data_german,'Creditability','Age',  event.class='0')
IV.data[14,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Age <- df.with.binned.vars.added[,23]/100

binning <- woe.binning(data_german,'Creditability','Concurrent_Credits', event.class='0')
IV.data[15,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Concurrent_Credits <- df.with.binned.vars.added[,23]/100

binning <- woe.binning(data_german,'Creditability','Exist_Credits',  event.class='0')
IV.data[17,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Exist_Credits <- df.with.binned.vars.added[,23]/100

binning <- woe.binning(data_german,'Creditability','dependents',min.perc.total=0.05, min.perc.class=0.1,
stop.limit=0.1, event.class='0')
IV.data[19,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_dependents <- df.with.binned.vars.added[,23]/100
WOE_Sex <- WOE(data_german$Sex_Marital,data_german$Creditability)
IV.data[10,2] <- round(IV(data_german$Sex_Marital,data_german$Creditability, valueOfGood = 1),3)

Tabel_WOE_gender <- WOETable(data_german$Sex_Marital,data_german$Creditability)
Tabel_WOE_gender
##   CAT GOODS BADS TOTAL      PCT_G      PCT_B        WOE          IV
## 1   1    30   20    50 0.04285714 0.06666667 -0.4418328 0.010519827
## 2   2   201  109   310 0.28714286 0.36333333 -0.2353408 0.017930730
## 3   3   402  146   548 0.57428571 0.48666667  0.1655476 0.014505124
## 4   4    67   25    92 0.09571429 0.08333333  0.1385189 0.001714996
WOE_Account_Balance <- WOE(data_german$Account_Balance,data_german$Creditability)
IV.data[2,2] <- round(IV(data_german$Account_Balance,data_german$Creditability, valueOfGood = 1),3)

WOE_Credit_History <- WOE(data_german$Credit_History,data_german$Creditability)
IV.data[4,2] <- round(IV(data_german$Credit_History,data_german$Creditability, valueOfGood = 1),3)

WOE_Purpose <- WOE(data_german$Purpose,data_german$Creditability)
IV.data[5,2] <- round(IV(data_german$Purpose,data_german$Creditability, valueOfGood = 1),3)

WOE_Savings <- WOE(data_german$Savings,data_german$Creditability)
IV.data[7,2] <- round(IV(data_german$Savings,data_german$Creditability, valueOfGood = 1),3)

WOE_Length_employment <- WOE(data_german$Length_employment,data_german$Creditability)
IV.data[8,2] <- round(IV(data_german$Length_employment,data_german$Creditability, valueOfGood = 1),3)

WOE_Guarantors <- WOE(data_german$Guarantors,data_german$Creditability)
IV.data[11,2] <- round(IV(data_german$Guarantors,data_german$Creditability, valueOfGood = 1),3)

WOE_Valuable_asset <- WOE(data_german$Valuable_asset,data_german$Creditability)
IV.data[13,2] <- round(IV(data_german$Valuable_asset,data_german$Creditability, valueOfGood = 1),3)

WOE_Type_apartment <- WOE(data_german$Type_apartment,data_german$Creditability)
IV.data[16,2] <- round(IV(data_german$Type_apartment,data_german$Creditability, valueOfGood = 1),3)

WOE_Occupation <- WOE(data_german$Occupation,data_german$Creditability)
IV.data[18,2] <- round(IV(data_german$Occupation,data_german$Creditability, valueOfGood = 1),3)

WOE_Telephone <- WOE(data_german$Telephone,data_german$Creditability)
IV.data[20,2] <- round(IV(data_german$Telephone,data_german$Creditability, valueOfGood = 1),3)

WOE_Foreign <- WOE(data_german$Foreign,data_german$Creditability)
IV.data[21,2] <- round(IV(data_german$Foreign,data_german$Creditability, valueOfGood = 1),3)
for(i in 2:21){
  if(IV.data[i,2]<0.02){
    IV.data[i,3]=c("unpredictive")
  }
  else if(IV.data[i,2]<=0.1){
    IV.data[i,3]=c("weak")
  }
  else if(IV.data[i,2]<=0.3){
    IV.data[i,3]=c("medium")
  }
  else{
    IV.data[i,3]=c("strong")
  }
}
IV.data
##              variabel    IV Predictiveness
## 1       Creditability 0.000              0
## 2     Account_Balance 0.666         strong
## 3     Duration_Credit 0.254         medium
## 4      Credit_History 0.293         medium
## 5             Purpose 0.169         medium
## 6       Credit_Amount 0.115         medium
## 7             Savings 0.196         medium
## 8   Length_employment 0.086           weak
## 9  Instalment_percent 0.024           weak
## 10        Sex_Marital 0.045           weak
## 11         Guarantors 0.032           weak
## 12   Duration_address 0.003   unpredictive
## 13     Valuable_asset 0.113         medium
## 14                Age 0.089           weak
## 15 Concurrent_Credits 0.042           weak
## 16     Type_apartment 0.085           weak
## 17      Exist_Credits 0.010   unpredictive
## 18         Occupation 0.009   unpredictive
## 19         dependents 0.000   unpredictive
## 20          Telephone 0.006   unpredictive
## 21            Foreign 0.044           weak
dataakhir.WOE <- data.frame(WOE_Account_Balance,WOE_Duration_Credit,WOE_Credit_History,WOE_Purpose,WOE_Credit_Amount,WOE_Savings,WOE_Valuable_asset, status=data_german$Creditability)

##menentukan bobot setiap variabel

modelWOE <- glm(status~WOE_Account_Balance+WOE_Duration_Credit+WOE_Credit_History+WOE_Purpose+WOE_Credit_Amount+WOE_Savings+WOE_Valuable_asset, data=dataakhir.WOE,family="binomial")

modelWOE$coefficients
##         (Intercept) WOE_Account_Balance WOE_Duration_Credit  WOE_Credit_History 
##           0.8384709           0.8203047           0.8173847           0.7807412 
##         WOE_Purpose   WOE_Credit_Amount         WOE_Savings  WOE_Valuable_asset 
##           0.9778155           0.4455526           0.7589765           0.4626983