library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data_german <- read.csv("C:/Users/Asus/Downloads/german_credit.csv", sep=",",stringsAsFactors = TRUE)
glimpse(data_german)
## Rows: 1,000
## Columns: 21
## $ Creditability <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ Account_Balance <int> 1, 1, 2, 1, 1, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 1, ~
## $ Duration_Credit <int> 18, 9, 12, 12, 12, 10, 8, 6, 18, 24, 11, 30, 6, 48,~
## $ Credit_History <int> 4, 4, 2, 4, 4, 4, 4, 4, 4, 2, 4, 4, 4, 3, 2, 2, 4, ~
## $ Purpose <int> 2, 0, 9, 0, 0, 0, 0, 0, 3, 3, 0, 1, 3, 10, 3, 3, 0,~
## $ Credit_Amount <int> 1049, 2799, 841, 2122, 2171, 2241, 3398, 1361, 1098~
## $ Savings <int> 1, 1, 2, 1, 1, 1, 1, 1, 1, 3, 1, 2, 1, 2, 5, 3, 1, ~
## $ Length_employment <int> 2, 3, 4, 3, 3, 2, 4, 2, 1, 1, 3, 4, 4, 1, 4, 3, 3, ~
## $ Instalment_percent <int> 4, 2, 2, 3, 4, 1, 1, 2, 4, 1, 2, 1, 1, 2, 2, 2, 1, ~
## $ Sex_Marital <int> 2, 3, 2, 3, 3, 3, 3, 3, 2, 2, 3, 4, 2, 3, 4, 3, 3, ~
## $ Guarantors <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
## $ Duration_address <int> 4, 2, 4, 2, 4, 3, 4, 4, 4, 4, 2, 4, 4, 4, 4, 3, 2, ~
## $ Valuable_asset <int> 2, 1, 1, 1, 2, 1, 1, 1, 3, 4, 1, 3, 3, 4, 3, 1, 1, ~
## $ Age <int> 21, 36, 23, 39, 38, 48, 39, 40, 65, 23, 36, 24, 31,~
## $ Concurrent_Credits <int> 3, 3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, ~
## $ Type_apartment <int> 1, 1, 1, 1, 2, 1, 2, 2, 2, 1, 1, 1, 2, 2, 1, 1, 2, ~
## $ Exist_Credits <int> 1, 2, 1, 2, 2, 2, 2, 1, 2, 1, 2, 2, 1, 1, 2, 1, 2, ~
## $ Occupation <int> 3, 3, 2, 2, 2, 2, 2, 2, 1, 1, 3, 3, 3, 4, 2, 3, 2, ~
## $ dependents <int> 1, 2, 1, 2, 1, 2, 1, 2, 1, 1, 2, 1, 1, 1, 1, 2, 2, ~
## $ Telephone <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, ~
## $ Foreign <int> 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, ~
data_german$Creditability <- as.factor(data_german$Creditability)
data_german$Account_Balance <- as.factor(data_german$Account_Balance)
data_german$Credit_History <- as.factor(data_german$Credit_History)
data_german$Purpose <- as.factor(data_german$Purpose)
data_german$Savings <- as.factor(data_german$Savings)
data_german$Length_employment <- as.factor(data_german$Length_employment)
data_german$Sex_Marital <- as.factor(data_german$Sex_Marital)
data_german$Guarantors <- as.factor(data_german$Guarantors)
data_german$Valuable_asset <- as.factor(data_german$Valuable_asset)
data_german$Type_apartment <- as.factor(data_german$Type_apartment)
data_german$Occupation <- as.factor(data_german$Occupation)
data_german$Telephone <- as.factor(data_german$Telephone)
data_german$Foreign <- as.factor(data_german$Foreign)
summary(data_german)
## Creditability Account_Balance Duration_Credit Credit_History Purpose
## 0:300 1:274 Min. : 4.0 0: 40 3 :280
## 1:700 2:269 1st Qu.:12.0 1: 49 0 :234
## 3: 63 Median :18.0 2:530 2 :181
## 4:394 Mean :20.9 3: 88 1 :103
## 3rd Qu.:24.0 4:293 9 : 97
## Max. :72.0 6 : 50
## (Other): 55
## Credit_Amount Savings Length_employment Instalment_percent Sex_Marital
## Min. : 250 1:603 1: 62 Min. :1.000 1: 50
## 1st Qu.: 1366 2:103 2:172 1st Qu.:2.000 2:310
## Median : 2320 3: 63 3:339 Median :3.000 3:548
## Mean : 3271 4: 48 4:174 Mean :2.973 4: 92
## 3rd Qu.: 3972 5:183 5:253 3rd Qu.:4.000
## Max. :18424 Max. :4.000
##
## Guarantors Duration_address Valuable_asset Age Concurrent_Credits
## 1:907 Min. :1.000 1:282 Min. :19.00 Min. :1.000
## 2: 41 1st Qu.:2.000 2:232 1st Qu.:27.00 1st Qu.:3.000
## 3: 52 Median :3.000 3:332 Median :33.00 Median :3.000
## Mean :2.845 4:154 Mean :35.54 Mean :2.675
## 3rd Qu.:4.000 3rd Qu.:42.00 3rd Qu.:3.000
## Max. :4.000 Max. :75.00 Max. :3.000
##
## Type_apartment Exist_Credits Occupation dependents Telephone Foreign
## 1:179 Min. :1.000 1: 22 Min. :1.000 1:596 1:963
## 2:714 1st Qu.:1.000 2:200 1st Qu.:1.000 2:404 2: 37
## 3:107 Median :1.000 3:630 Median :1.000
## Mean :1.407 4:148 Mean :1.155
## 3rd Qu.:2.000 3rd Qu.:1.000
## Max. :4.000 Max. :2.000
##
#library(discretization)
#disk.chim <- chiM(cbind(x=data_german$Duration_Credit,class=data_german$Creditability), 0.01)
#binning variabel
#data_german$DurationCreditGroup <- cut(data_german$Duration_Credit, breaks = unlist(disk.chim$cutp), labels=1:2, include.lowest = T)
#install.packages("woeBinning")
#install.packages("blorr")
#install.packages("woebin")
library(InformationValue)
library(woeBinning)
library(blorr)
#library(woebin)
IV.data <- data.frame(
variabel = c(names(data_german)),
IV = c(0:20),
Predictiveness = c(0:20)
)
binning <- woe.binning(data_german,'Creditability', 'Duration_Credit', event.class='0')
IV.data[3,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Duration_Credit <- df.with.binned.vars.added[,23]/100
binning
## [[1]]
## [1] "Duration_Credit"
##
## [[2]]
## woe cutpoints.final cutpoints.final[-1] iv.total.final 1 0
## (-Inf,6] 124.59370 -Inf 6 0.2537678 73 9
## (6,15] 36.53869 6 15 0.2537678 269 80
## (15,30] -10.83411 15 30 0.2537678 268 128
## (30, Inf] -76.63288 30 Inf 0.2537678 90 83
## Missing NA Inf Missing 0.2537678 0 0
## col.perc.a col.perc.b iv.bins
## (-Inf,6] 0.1042857 0.0300000 0.092555320
## (6,15] 0.3842857 0.2666667 0.042976457
## (15,30] 0.3828571 0.4266667 0.004746374
## (30, Inf] 0.1285714 0.2766667 0.113489646
## Missing 0.0000000 0.0000000 NA
##
## [[3]]
## iv.total.final
## 0.2537678
binning <- woe.binning(data_german,'Creditability','Credit_Amount', event.class='0')
IV.data[6,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Credit_Amount <- df.with.binned.vars.added[,23]/100
binning <- woe.binning(data_german,'Creditability','Instalment_percent', event.class='0')
IV.data[9,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Instalment_percent <- df.with.binned.vars.added[,23]/100
binning <- woe.binning(data_german,'Creditability','Duration_address', event.class='0')
IV.data[12,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Duration_address <- df.with.binned.vars.added[,23]/100
binning <- woe.binning(data_german,'Creditability','Age', event.class='0')
IV.data[14,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Age <- df.with.binned.vars.added[,23]/100
binning <- woe.binning(data_german,'Creditability','Concurrent_Credits', event.class='0')
IV.data[15,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Concurrent_Credits <- df.with.binned.vars.added[,23]/100
binning <- woe.binning(data_german,'Creditability','Exist_Credits', event.class='0')
IV.data[17,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_Exist_Credits <- df.with.binned.vars.added[,23]/100
binning <- woe.binning(data_german,'Creditability','dependents',min.perc.total=0.05, min.perc.class=0.1,
stop.limit=0.1, event.class='0')
IV.data[19,2] <- round(binning[[3]],3)
df.with.binned.vars.added <- woe.binning.deploy(data_german, binning,
add.woe.or.dum.var='woe')
WOE_dependents <- df.with.binned.vars.added[,23]/100
WOE_Sex <- WOE(data_german$Sex_Marital,data_german$Creditability)
IV.data[10,2] <- round(IV(data_german$Sex_Marital,data_german$Creditability, valueOfGood = 1),3)
Tabel_WOE_gender <- WOETable(data_german$Sex_Marital,data_german$Creditability)
Tabel_WOE_gender
## CAT GOODS BADS TOTAL PCT_G PCT_B WOE IV
## 1 1 30 20 50 0.04285714 0.06666667 -0.4418328 0.010519827
## 2 2 201 109 310 0.28714286 0.36333333 -0.2353408 0.017930730
## 3 3 402 146 548 0.57428571 0.48666667 0.1655476 0.014505124
## 4 4 67 25 92 0.09571429 0.08333333 0.1385189 0.001714996
WOE_Account_Balance <- WOE(data_german$Account_Balance,data_german$Creditability)
IV.data[2,2] <- round(IV(data_german$Account_Balance,data_german$Creditability, valueOfGood = 1),3)
WOE_Credit_History <- WOE(data_german$Credit_History,data_german$Creditability)
IV.data[4,2] <- round(IV(data_german$Credit_History,data_german$Creditability, valueOfGood = 1),3)
WOE_Purpose <- WOE(data_german$Purpose,data_german$Creditability)
IV.data[5,2] <- round(IV(data_german$Purpose,data_german$Creditability, valueOfGood = 1),3)
WOE_Savings <- WOE(data_german$Savings,data_german$Creditability)
IV.data[7,2] <- round(IV(data_german$Savings,data_german$Creditability, valueOfGood = 1),3)
WOE_Length_employment <- WOE(data_german$Length_employment,data_german$Creditability)
IV.data[8,2] <- round(IV(data_german$Length_employment,data_german$Creditability, valueOfGood = 1),3)
WOE_Guarantors <- WOE(data_german$Guarantors,data_german$Creditability)
IV.data[11,2] <- round(IV(data_german$Guarantors,data_german$Creditability, valueOfGood = 1),3)
WOE_Valuable_asset <- WOE(data_german$Valuable_asset,data_german$Creditability)
IV.data[13,2] <- round(IV(data_german$Valuable_asset,data_german$Creditability, valueOfGood = 1),3)
WOE_Type_apartment <- WOE(data_german$Type_apartment,data_german$Creditability)
IV.data[16,2] <- round(IV(data_german$Type_apartment,data_german$Creditability, valueOfGood = 1),3)
WOE_Occupation <- WOE(data_german$Occupation,data_german$Creditability)
IV.data[18,2] <- round(IV(data_german$Occupation,data_german$Creditability, valueOfGood = 1),3)
WOE_Telephone <- WOE(data_german$Telephone,data_german$Creditability)
IV.data[20,2] <- round(IV(data_german$Telephone,data_german$Creditability, valueOfGood = 1),3)
WOE_Foreign <- WOE(data_german$Foreign,data_german$Creditability)
IV.data[21,2] <- round(IV(data_german$Foreign,data_german$Creditability, valueOfGood = 1),3)
for(i in 2:21){
if(IV.data[i,2]<0.02){
IV.data[i,3]=c("unpredictive")
}
else if(IV.data[i,2]<=0.1){
IV.data[i,3]=c("weak")
}
else if(IV.data[i,2]<=0.3){
IV.data[i,3]=c("medium")
}
else{
IV.data[i,3]=c("strong")
}
}
IV.data
## variabel IV Predictiveness
## 1 Creditability 0.000 0
## 2 Account_Balance 0.666 strong
## 3 Duration_Credit 0.254 medium
## 4 Credit_History 0.293 medium
## 5 Purpose 0.169 medium
## 6 Credit_Amount 0.115 medium
## 7 Savings 0.196 medium
## 8 Length_employment 0.086 weak
## 9 Instalment_percent 0.024 weak
## 10 Sex_Marital 0.045 weak
## 11 Guarantors 0.032 weak
## 12 Duration_address 0.003 unpredictive
## 13 Valuable_asset 0.113 medium
## 14 Age 0.089 weak
## 15 Concurrent_Credits 0.042 weak
## 16 Type_apartment 0.085 weak
## 17 Exist_Credits 0.010 unpredictive
## 18 Occupation 0.009 unpredictive
## 19 dependents 0.000 unpredictive
## 20 Telephone 0.006 unpredictive
## 21 Foreign 0.044 weak
dataakhir.WOE <- data.frame(WOE_Account_Balance,WOE_Duration_Credit,WOE_Credit_History,WOE_Purpose,WOE_Credit_Amount,WOE_Savings,WOE_Valuable_asset, status=data_german$Creditability)
##menentukan bobot setiap variabel
modelWOE <- glm(status~WOE_Account_Balance+WOE_Duration_Credit+WOE_Credit_History+WOE_Purpose+WOE_Credit_Amount+WOE_Savings+WOE_Valuable_asset, data=dataakhir.WOE,family="binomial")
modelWOE$coefficients
## (Intercept) WOE_Account_Balance WOE_Duration_Credit WOE_Credit_History
## 0.8384709 0.8203047 0.8173847 0.7807412
## WOE_Purpose WOE_Credit_Amount WOE_Savings WOE_Valuable_asset
## 0.9778155 0.4455526 0.7589765 0.4626983