library(AUC)
## AUC 0.3.0
## Type AUCNews() to see the change log and ?AUC to get an overview.
data(churn)
auc(sensitivity(churn$predictions,churn$labels))
## [1] 0.8026259
auc(specificity(churn$predictions,churn$labels))
## [1] 0.4591936
auc(accuracy(churn$predictions,churn$labels))
## [1] 0.5034279
auc(roc(churn$predictions,churn$labels))
## [1] 0.8439201
plot(sensitivity(churn$predictions,churn$labels))

plot(specificity(churn$predictions,churn$labels))

plot(accuracy(churn$predictions,churn$labels))

plot(roc(churn$predictions,churn$labels))
#feature selection
library(mlbench)
data(Ozone, package="mlbench")
inputData <- Ozone
names(inputData) <- c("Month", "Day_of_month", "Day_of_week", "ozone_reading", "pressure_height", "Wind_speed", "Humidity", "Temperature_Sandburg", "Temperature_ElMonte", "Inversion_base_height", "Pressure_gradient", "Inversion_temperature", "Visibility")
#NA imputation
library(DMwR)
## Loading required package: lattice
## Loading required package: grid

inputData <- knnImputation(inputData)
inputData_cont <- inputData[, c("pressure_height", "Wind_speed", "Humidity", "Temperature_Sandburg", "Temperature_ElMonte", "Inversion_base_height", "Pressure_gradient", "Inversion_temperature", "Visibility")]
inputData_cat <- inputData[, c("Month", "Day_of_month", "Day_of_week")]
inputData_response <- data.frame(ozone_reading=inputData[, "ozone_reading"])
# response #variable as a dataframe
response_name <- "ozone_reading" # name of response variable
response <- inputData[, response_name] # response variable as a vector
#1. random forest method
library(party)
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
cf1 <- cforest(ozone_reading ~ . , data= inputData, control=cforest_unbiased(mtry=2,ntree=50)) # fit the random forest
varimp(cf1) # get variable importance, based on mean decrease in accuracy
## Month Day_of_month Day_of_week
## 3.7062044 -0.1382535 -0.2174799
## pressure_height Wind_speed Humidity
## 2.9864310 0.1739157 4.0674793
## Temperature_Sandburg Temperature_ElMonte Inversion_base_height
## 10.9701438 11.5560742 6.0519120
## Pressure_gradient Inversion_temperature Visibility
## 2.3410749 9.4198043 1.5428216
varimp(cf1, conditional=TRUE) # conditional=True, adjusts for correlations between predictors
## Month Day_of_month Day_of_week
## 0.56589104 -0.08416212 -0.01287185
## pressure_height Wind_speed Humidity
## 0.17733925 -0.04787313 0.43068883
## Temperature_Sandburg Temperature_ElMonte Inversion_base_height
## 0.91190947 2.18890604 0.87562096
## Pressure_gradient Inversion_temperature Visibility
## 0.29950532 0.90544473 0.21006645
varimpAUC(cf1) # more robust towards class imbalance
## Month Day_of_month Day_of_week
## 3.81324789 0.03788994 -0.04052355
## pressure_height Wind_speed Humidity
## 3.09446180 0.02728874 3.89792164
## Temperature_Sandburg Temperature_ElMonte Inversion_base_height
## 10.92087203 12.04959959 5.06100818
## Pressure_gradient Inversion_temperature Visibility
## 2.26375315 10.22862976 1.62766000
#2. relative importance
library(relaimpo)
## Loading required package: MASS
## Loading required package: boot
##
## Attaching package: 'boot'
## The following object is masked from 'package:lattice':
##
## melanoma
## Loading required package: survey
## Loading required package: Matrix
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:boot':
##
## aml
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
## Loading required package: mitools
## This is the global version of package relaimpo.
## If you are a non-US user, a version with the interesting additional metric pmvd is available
## from Ulrike Groempings web site at prof.beuth-hochschule.de/groemping.
lmMod <- lm(ozone_reading ~ . , data = inputData) # fit lm() model
relImportance <- calc.relimp(lmMod, type = "lmg", rela = TRUE) # calculate relative importance scaled to 100
sort(relImportance$lmg, decreasing=TRUE) # relative importance
## Temperature_ElMonte Temperature_Sandburg Month
## 0.184722438 0.164540381 0.163371978
## Inversion_temperature pressure_height Inversion_base_height
## 0.137890248 0.087594494 0.083696664
## Humidity Visibility Day_of_month
## 0.068573808 0.039202230 0.031248599
## Pressure_gradient Day_of_week Wind_speed
## 0.026557629 0.008371262 0.004230269
#3. MARS in earth package
library(earth)
## Loading required package: plotmo
## Loading required package: plotrix
## Loading required package: TeachingDemos
marsModel <- earth(ozone_reading ~ ., data=inputData)
ev <- evimp (marsModel)
plot (ev)

#4. stepwise regression
base.mod <- lm(ozone_reading ~ 1 , data= inputData)
all.mod <- lm(ozone_reading ~ . , data= inputData)
stepMod <- step(base.mod, scope = list(lower = base.mod, upper = all.mod), direction = "both", trace = 1, steps = 1000)
## Start: AIC=1514.09
## ozone_reading ~ 1
##
## Df Sum of Sq RSS AIC
## + Temperature_Sandburg 1 13565.4 9223.7 1185.0
## + Temperature_ElMonte 1 13383.8 9405.3 1192.2
## + Inversion_temperature 1 12085.6 10703.5 1239.5
## + Month 11 9210.1 13579.0 1346.6
## + pressure_height 1 7906.1 14883.1 1360.2
## + Inversion_base_height 1 7638.0 15151.2 1366.7
## + Humidity 1 4697.7 18091.5 1431.6
## + Visibility 1 3943.0 18846.1 1446.6
## + Pressure_gradient 1 1220.0 21569.1 1496.0
## <none> 22789.2 1514.1
## + Wind_speed 1 0.6 22788.6 1516.1
## + Day_of_week 6 279.0 22510.1 1521.6
## + Day_of_month 30 772.7 22016.4 1561.5
##
## Step: AIC=1185.05
## ozone_reading ~ Temperature_Sandburg
##
## Df Sum of Sq RSS AIC
## + Month 11 1355.1 7868.6 1148.9
## + Humidity 1 877.7 8346.0 1150.5
## + Inversion_base_height 1 868.8 8355.0 1150.8
## + Temperature_ElMonte 1 633.3 8590.4 1161.0
## + Visibility 1 374.9 8848.8 1171.9
## + Inversion_temperature 1 353.9 8869.8 1172.7
## + Pressure_gradient 1 159.5 9064.2 1180.7
## + pressure_height 1 68.4 9155.3 1184.3
## <none> 9223.7 1185.0
## + Wind_speed 1 1.5 9222.2 1187.0
## + Day_of_week 6 120.8 9102.9 1192.2
## + Day_of_month 30 630.3 8593.4 1219.1
## - Temperature_Sandburg 1 13565.4 22789.2 1514.1
##
## Step: AIC=1148.89
## ozone_reading ~ Temperature_Sandburg + Month
##
## Df Sum of Sq RSS AIC
## + Temperature_ElMonte 1 1445.9 6422.7 1076.6
## + Inversion_temperature 1 864.3 7004.4 1108.3
## + Inversion_base_height 1 849.1 7019.5 1109.1
## + Visibility 1 641.5 7227.1 1119.8
## + Humidity 1 593.2 7275.4 1122.2
## + Wind_speed 1 161.6 7707.0 1143.3
## + pressure_height 1 102.1 7766.5 1146.1
## + Pressure_gradient 1 93.1 7775.6 1146.5
## <none> 7868.6 1148.9
## + Day_of_week 6 120.0 7748.6 1155.3
## + Day_of_month 30 632.7 7235.9 1178.2
## - Month 11 1355.1 9223.7 1185.0
## - Temperature_Sandburg 1 5710.4 13579.0 1346.6
##
## Step: AIC=1076.58
## ozone_reading ~ Temperature_Sandburg + Month + Temperature_ElMonte
##
## Df Sum of Sq RSS AIC
## + Humidity 1 731.20 5691.5 1034.3
## + Visibility 1 375.27 6047.5 1056.5
## + Inversion_base_height 1 262.08 6160.7 1063.3
## + pressure_height 1 119.72 6303.0 1071.7
## + Wind_speed 1 75.73 6347.0 1074.2
## + Inversion_temperature 1 52.17 6370.6 1075.6
## <none> 6422.7 1076.6
## + Pressure_gradient 1 26.95 6395.8 1077.0
## - Temperature_Sandburg 1 149.49 6572.2 1083.0
## + Day_of_week 6 80.32 6342.4 1084.0
## + Day_of_month 30 523.90 5898.8 1105.4
## - Temperature_ElMonte 1 1445.90 7868.6 1148.9
## - Month 11 2167.69 8590.4 1161.0
##
## Step: AIC=1034.34
## ozone_reading ~ Temperature_Sandburg + Month + Temperature_ElMonte +
## Humidity
##
## Df Sum of Sq RSS AIC
## + Pressure_gradient 1 113.67 5577.9 1029.0
## + Visibility 1 73.23 5618.3 1031.6
## + Inversion_base_height 1 65.14 5626.4 1032.1
## + Wind_speed 1 54.82 5636.7 1032.8
## <none> 5691.5 1034.3
## + pressure_height 1 24.71 5666.8 1034.8
## + Inversion_temperature 1 23.77 5667.8 1034.8
## + Day_of_week 6 113.25 5578.3 1039.0
## - Temperature_Sandburg 1 129.49 5821.0 1040.6
## + Day_of_month 30 521.20 5170.3 1059.2
## - Humidity 1 731.20 6422.7 1076.6
## - Month 11 1735.93 7427.5 1109.8
## - Temperature_ElMonte 1 1583.91 7275.4 1122.2
##
## Step: AIC=1028.96
## ozone_reading ~ Temperature_Sandburg + Month + Temperature_ElMonte +
## Humidity + Pressure_gradient
##
## Df Sum of Sq RSS AIC
## + Visibility 1 90.83 5487.0 1025.0
## + Wind_speed 1 49.09 5528.8 1027.7
## + Inversion_base_height 1 44.43 5533.4 1028.0
## <none> 5577.9 1029.0
## + pressure_height 1 23.03 5554.8 1029.4
## + Inversion_temperature 1 4.56 5573.3 1030.7
## + Day_of_week 6 106.13 5471.7 1033.9
## - Pressure_gradient 1 113.67 5691.5 1034.3
## - Temperature_Sandburg 1 158.47 5736.3 1037.2
## + Day_of_month 30 492.28 5085.6 1055.1
## - Humidity 1 817.92 6395.8 1077.0
## - Temperature_ElMonte 1 1033.34 6611.2 1089.2
## - Month 11 1810.91 7388.8 1109.9
##
## Step: AIC=1024.95
## ozone_reading ~ Temperature_Sandburg + Month + Temperature_ElMonte +
## Humidity + Pressure_gradient + Visibility
##
## Df Sum of Sq RSS AIC
## + Wind_speed 1 35.66 5451.4 1024.6
## + pressure_height 1 31.20 5455.8 1024.9
## <none> 5487.0 1025.0
## + Inversion_base_height 1 29.90 5457.1 1025.0
## + Inversion_temperature 1 0.70 5486.3 1026.9
## - Visibility 1 90.83 5577.9 1029.0
## + Day_of_week 6 111.27 5375.8 1029.5
## - Pressure_gradient 1 131.27 5618.3 1031.6
## - Temperature_Sandburg 1 160.10 5647.1 1033.5
## + Day_of_month 30 469.31 5017.7 1052.2
## - Humidity 1 560.14 6047.2 1058.5
## - Temperature_ElMonte 1 848.40 6335.4 1075.6
## - Month 11 1886.76 7373.8 1111.1
##
## Step: AIC=1024.56
## ozone_reading ~ Temperature_Sandburg + Month + Temperature_ElMonte +
## Humidity + Pressure_gradient + Visibility + Wind_speed
##
## Df Sum of Sq RSS AIC
## + pressure_height 1 49.37 5402.0 1023.2
## <none> 5451.4 1024.6
## - Wind_speed 1 35.66 5487.0 1025.0
## + Inversion_base_height 1 20.40 5431.0 1025.2
## + Inversion_temperature 1 0.00 5451.4 1026.6
## - Visibility 1 77.40 5528.8 1027.7
## + Day_of_week 6 103.01 5348.4 1029.6
## - Pressure_gradient 1 124.52 5575.9 1030.8
## - Temperature_Sandburg 1 151.32 5602.7 1032.6
## + Day_of_month 30 464.47 4986.9 1052.0
## - Humidity 1 554.48 6005.8 1058.0
## - Temperature_ElMonte 1 825.34 6276.7 1074.2
## - Month 11 1921.47 7372.8 1113.1
##
## Step: AIC=1023.23
## ozone_reading ~ Temperature_Sandburg + Month + Temperature_ElMonte +
## Humidity + Pressure_gradient + Visibility + Wind_speed +
## pressure_height
##
## Df Sum of Sq RSS AIC
## <none> 5402.0 1023.2
## + Inversion_base_height 1 22.66 5379.3 1023.7
## - pressure_height 1 49.37 5451.4 1024.6
## - Wind_speed 1 53.83 5455.8 1024.9
## + Inversion_temperature 1 2.53 5399.5 1025.1
## - Visibility 1 84.40 5486.4 1026.9
## + Day_of_week 6 95.02 5307.0 1028.7
## - Pressure_gradient 1 121.40 5523.4 1029.4
## - Temperature_Sandburg 1 190.02 5592.0 1033.9
## + Day_of_month 30 468.21 4933.8 1050.0
## - Humidity 1 456.50 5858.5 1050.9
## - Temperature_ElMonte 1 825.63 6227.6 1073.3
## - Month 11 1624.84 7026.8 1097.5
shortlistedVars <- names(unlist(stepMod[[1]]))
shortlistedVars <- shortlistedVars[!shortlistedVars %in% "(Intercept)"]
#5. Boruta
library(Boruta)
## Loading required package: ranger
boruta_output <- Boruta(response ~ ., data=na.omit(inputData), doTrace=2)
## 1. run of importance source...
## 2. run of importance source...
## 3. run of importance source...
## 4. run of importance source...
## 5. run of importance source...
## 6. run of importance source...
## 7. run of importance source...
## 8. run of importance source...
## 9. run of importance source...
## 10. run of importance source...
## 11. run of importance source...
## After 11 iterations, +3 secs:
## confirmed 10 attributes: Humidity, Inversion_base_height, Inversion_temperature, Month, ozone_reading and 5 more;
## rejected 1 attribute: Day_of_week;
## still have 2 attributes left.
## 12. run of importance source...
## 13. run of importance source...
## 14. run of importance source...
## 15. run of importance source...
## After 15 iterations, +3.8 secs:
## rejected 1 attribute: Day_of_month;
## still have 1 attribute left.
## 16. run of importance source...
## 17. run of importance source...
## 18. run of importance source...
## 19. run of importance source...
## 20. run of importance source...
## 21. run of importance source...
## 22. run of importance source...
## 23. run of importance source...
## 24. run of importance source...
## 25. run of importance source...
## 26. run of importance source...
## 27. run of importance source...
## 28. run of importance source...
## 29. run of importance source...
## 30. run of importance source...
## 31. run of importance source...
## 32. run of importance source...
## 33. run of importance source...
## 34. run of importance source...
## 35. run of importance source...
## 36. run of importance source...
## 37. run of importance source...
## 38. run of importance source...
## 39. run of importance source...
## 40. run of importance source...
## 41. run of importance source...
## 42. run of importance source...
## 43. run of importance source...
## 44. run of importance source...
## 45. run of importance source...
## 46. run of importance source...
## 47. run of importance source...
## 48. run of importance source...
## 49. run of importance source...
## 50. run of importance source...
## 51. run of importance source...
## 52. run of importance source...
## 53. run of importance source...
## 54. run of importance source...
## 55. run of importance source...
## 56. run of importance source...
## 57. run of importance source...
## 58. run of importance source...
## 59. run of importance source...
## 60. run of importance source...
## 61. run of importance source...
## 62. run of importance source...
## 63. run of importance source...
## 64. run of importance source...
## 65. run of importance source...
## 66. run of importance source...
## 67. run of importance source...
## 68. run of importance source...
## 69. run of importance source...
## 70. run of importance source...
## 71. run of importance source...
## 72. run of importance source...
## 73. run of importance source...
## 74. run of importance source...
## 75. run of importance source...
## 76. run of importance source...
## 77. run of importance source...
## 78. run of importance source...
## 79. run of importance source...
## 80. run of importance source...
## 81. run of importance source...
## 82. run of importance source...
## 83. run of importance source...
## 84. run of importance source...
## 85. run of importance source...
## 86. run of importance source...
## 87. run of importance source...
## 88. run of importance source...
## 89. run of importance source...
## 90. run of importance source...
## 91. run of importance source...
## After 91 iterations, +20 secs:
## rejected 1 attribute: Wind_speed;
## no more attributes left.
boruta_signif <- names(boruta_output$finalDecision[boruta_output$finalDecision %in% c("Confirmed", "Tentative")])
boruta_signif
## [1] "Month" "ozone_reading"
## [3] "pressure_height" "Humidity"
## [5] "Temperature_Sandburg" "Temperature_ElMonte"
## [7] "Inversion_base_height" "Pressure_gradient"
## [9] "Inversion_temperature" "Visibility"
#6. information value and woe
library(woe)
library(riv)
## Loading required package: rrcov
## Loading required package: robustbase
##
## Attaching package: 'robustbase'
## The following object is masked from 'package:survival':
##
## heart
## The following object is masked from 'package:boot':
##
## salinity
## Scalable Robust Estimators with High Breakdown Point (version 1.4-3)
## Loading required package: quantreg
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
## The following object is masked from 'package:base':
##
## backsolve
##
## Attaching package: 'quantreg'
## The following object is masked from 'package:survival':
##
## untangle.specials
iv_df <- iv.mult(german_data, y="gb", summary=TRUE, verbose=TRUE)
## Started processing of data frame: german_data
## Calling iv.str for variable: ca_status
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.67
## Calling iv.num for variable: duration
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 5
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0.26
## Formatting output
## Calling iv.str for variable: credit_history
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.29
## Calling iv.str for variable: purpose
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.17
## Calling iv.num for variable: credit_amount
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 5
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0.21
## Formatting output
## Calling iv.str for variable: savings
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.2
## Calling iv.str for variable: present_employment_since
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.09
## Calling iv.num for variable: installment_rate_income
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 2
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0.02
## Formatting output
## Calling iv.str for variable: status_sex
## Assuming good = level 'good' and bad = level 'bad'
## Warning in iv.str(df, x, y, verbose = verbose): Some group for outcome 0 has zero count. This will result in -Inf or Inf WOE. Replacing - ODDS=1, WoE=0, MIV=0.
## The bin is either too small or suspiciously predictive.
## You should fix this before running any model. It does not make any sense to keep WoE = 0 for such bin.
## Information Value NaN
## Calling iv.str for variable: other_debtors
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.03
## Calling iv.num for variable: present_residence_since
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 1
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0
## Formatting output
## Calling iv.str for variable: property
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.11
## Calling iv.num for variable: age
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 5
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0.13
## Formatting output
## Calling iv.str for variable: other_installment
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.06
## Calling iv.str for variable: housing
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.08
## Calling iv.num for variable: existing_credits
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 2
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0.01
## Formatting output
## Calling iv.str for variable: job
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.01
## Calling iv.num for variable: liable_maintenance_people
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 1
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0
## Formatting output
## Calling iv.str for variable: telephone
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.01
## Calling iv.str for variable: foreign_worker
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.04
## Preparing summary
iv <- iv.mult(german_data, y="gb", summary=FALSE, verbose=TRUE)
## Started processing of data frame: german_data
## Calling iv.str for variable: ca_status
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.67
## Calling iv.num for variable: duration
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 5
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0.26
## Formatting output
## Calling iv.str for variable: credit_history
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.29
## Calling iv.str for variable: purpose
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.17
## Calling iv.num for variable: credit_amount
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 5
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0.21
## Formatting output
## Calling iv.str for variable: savings
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.2
## Calling iv.str for variable: present_employment_since
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.09
## Calling iv.num for variable: installment_rate_income
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 2
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0.02
## Formatting output
## Calling iv.str for variable: status_sex
## Assuming good = level 'good' and bad = level 'bad'
## Warning in iv.str(df, x, y, verbose = verbose): Some group for outcome 0 has zero count. This will result in -Inf or Inf WOE. Replacing - ODDS=1, WoE=0, MIV=0.
## The bin is either too small or suspiciously predictive.
## You should fix this before running any model. It does not make any sense to keep WoE = 0 for such bin.
## Information Value NaN
## Calling iv.str for variable: other_debtors
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.03
## Calling iv.num for variable: present_residence_since
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 1
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0
## Formatting output
## Calling iv.str for variable: property
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.11
## Calling iv.num for variable: age
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 5
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0.13
## Formatting output
## Calling iv.str for variable: other_installment
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.06
## Calling iv.str for variable: housing
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.08
## Calling iv.num for variable: existing_credits
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 2
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0.01
## Formatting output
## Calling iv.str for variable: job
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.01
## Calling iv.num for variable: liable_maintenance_people
## Building rpart model
## Model finished
## Sending model to tree parser
## Rules parsed: 1
## Mapping nodes to data
## SQL Merge
## DF Merge
## Calling iv.str for nodes
## Information Value 0
## Formatting output
## Calling iv.str for variable: telephone
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.01
## Calling iv.str for variable: foreign_worker
## Assuming good = level 'good' and bad = level 'bad'
## Information Value 0.04
iv.plot.summary(iv_df)

german_data_iv <- iv.replace.woe(german_data, iv, verbose=TRUE) # add woe to data frame
## Var Name: ca_status
## WOE Name: ca_status_woe
## Var Name: duration
## WOE Name: duration_woe
## Var Name: credit_history
## WOE Name: credit_history_woe
## Var Name: purpose
## WOE Name: purpose_woe
## Var Name: credit_amount
## WOE Name: credit_amount_woe
## Var Name: savings
## WOE Name: savings_woe
## Var Name: present_employment_since
## WOE Name: present_employment_since_woe
## Var Name: installment_rate_income
## WOE Name: installment_rate_income_woe
## Var Name: status_sex
## WOE Name: status_sex_woe
## Var Name: other_debtors
## WOE Name: other_debtors_woe
## Var Name: present_residence_since
## WOE Name: present_residence_since_woe
## Var Name: property
## WOE Name: property_woe
## Var Name: age
## WOE Name: age_woe
## Var Name: other_installment
## WOE Name: other_installment_woe
## Var Name: housing
## WOE Name: housing_woe
## Var Name: existing_credits
## WOE Name: existing_credits_woe
## Var Name: job
## WOE Name: job_woe
## Var Name: liable_maintenance_people
## WOE Name: liable_maintenance_people_woe
## Var Name: telephone
## WOE Name: telephone_woe
## Var Name: foreign_worker
## WOE Name: foreign_worker_woe