### Setting Working Directory
setwd("C:/Users/rpandey/Desktop/Classes")
## Load library "Party" for running the Decision Tree
library(partykit)
## Loading required package: grid
##Read the data in the file
cust_data<-read.csv("Default_On_Payment.csv")
cust_data <- cust_data[complete.cases(cust_data),]
## Create a new Factor variable "Default_Payment" in the table
cust_data$Default_Payment <- factor(ifelse(cust_data$Default_On_Payment==1,"Default", "Non Default"))
## Check the distribution on new variable "Default_Payment"
table(cust_data$Default_Payment)
##
## Default Non Default
## 12001 28118
pie(table(cust_data$Default_Payment))
# Quick checks and exploration of the data
dim(cust_data)
## [1] 40119 23
str(cust_data)
## 'data.frame': 40119 obs. of 23 variables:
## $ Customer_ID : int 100015 100031 100046 100103 100104 100128 100148 100164 100182 100230 ...
## $ Status_Checking_Acc : Factor w/ 5 levels "2","A11","A12",..: 5 2 3 5 2 2 2 5 3 5 ...
## $ Duration_in_Months : Factor w/ 34 levels "10","11","12",..: 14 3 4 12 12 1 2 12 8 3 ...
## $ Credit_History : Factor w/ 6 levels "46","A30","A31",..: 4 6 6 4 4 4 6 6 4 4 ...
## $ Purposre_Credit_Taken : Factor w/ 12 levels "","A143","A40",..: 3 7 7 3 3 7 3 7 7 3 ...
## $ Credit_Amount : Factor w/ 923 levels "","1007","10127",..: 419 602 877 153 511 373 615 353 875 150 ...
## $ Savings_Acc : Factor w/ 7 levels "","2","A61","A62",..: 3 3 3 3 3 3 3 4 3 5 ...
## $ Years_At_Present_Employment: Factor w/ 7 levels "","A172","A71",..: 5 6 4 5 4 7 5 7 5 5 ...
## $ Inst_Rt_Income : int 3 4 4 2 4 3 1 4 4 2 ...
## $ Marital_Status_Gender : Factor w/ 6 levels "","A192","A91",..: 4 4 5 5 4 5 5 5 6 4 ...
## $ Other_Debtors_Guarantors : Factor w/ 5 levels "","A101","A102",..: 2 2 4 4 2 2 2 2 4 2 ...
## $ Current_Address_Yrs : int 3 3 4 2 1 4 2 4 2 2 ...
## $ Property : Factor w/ 5 levels "","A121","A122",..: 2 2 2 2 3 2 2 3 2 3 ...
## $ Age : int 21 58 23 31 27 52 40 52 25 26 ...
## $ Other_Inst_Plans : Factor w/ 4 levels "","A141","A142",..: 4 4 4 4 4 4 4 2 4 4 ...
## $ Housing : Factor w/ 4 levels "","A151","A152",..: 2 3 3 3 3 3 3 3 3 3 ...
## $ Num_CC : int 1 4 2 1 1 1 2 2 1 1 ...
## $ Job : Factor w/ 5 levels "","A171","A172",..: 4 3 4 4 4 3 3 4 3 4 ...
## $ Dependents : int 1 1 1 1 1 1 2 1 1 1 ...
## $ Telephone : Factor w/ 3 levels "","A191","A192": 2 3 2 3 2 2 2 2 2 2 ...
## $ Foreign_Worker : Factor w/ 3 levels "","A201","A202": 2 2 2 2 2 2 2 2 2 2 ...
## $ Default_On_Payment : int 1 0 0 0 1 0 0 0 0 1 ...
## $ Default_Payment : Factor w/ 2 levels "Default","Non Default": 1 2 2 2 1 2 2 2 2 1 ...
head(cust_data)
## Customer_ID Status_Checking_Acc Duration_in_Months Credit_History
## 1 100015 A14 27 A32
## 2 100031 A11 12 A34
## 3 100046 A12 13 A34
## 4 100103 A14 24 A32
## 5 100104 A11 24 A32
## 6 100128 A11 10 A32
## Purposre_Credit_Taken Credit_Amount Savings_Acc
## 1 A40 2570 A61
## 2 A43 385 A61
## 3 A43 882 A61
## 4 A40 1393 A61
## 5 A40 3123 A61
## 6 A43 2315 A61
## Years_At_Present_Employment Inst_Rt_Income Marital_Status_Gender
## 1 A73 3 A92
## 2 A74 4 A92
## 3 A72 4 A93
## 4 A73 2 A93
## 5 A72 4 A92
## 6 A75 3 A93
## Other_Debtors_Guarantors Current_Address_Yrs Property Age
## 1 A101 3 A121 21
## 2 A101 3 A121 58
## 3 A103 4 A121 23
## 4 A103 2 A121 31
## 5 A101 1 A122 27
## 6 A101 4 A121 52
## Other_Inst_Plans Housing Num_CC Job Dependents Telephone Foreign_Worker
## 1 A143 A151 1 A173 1 A191 A201
## 2 A143 A152 4 A172 1 A192 A201
## 3 A143 A152 2 A173 1 A191 A201
## 4 A143 A152 1 A173 1 A192 A201
## 5 A143 A152 1 A173 1 A191 A201
## 6 A143 A152 1 A172 1 A191 A201
## Default_On_Payment Default_Payment
## 1 1 Default
## 2 0 Non Default
## 3 0 Non Default
## 4 0 Non Default
## 5 1 Default
## 6 0 Non Default
tail(cust_data)
## Customer_ID Status_Checking_Acc Duration_in_Months Credit_History
## 40115 986899 A13 15 A32
## 40116 986907 A11 24 A32
## 40117 986936 A12 24 A34
## 40118 986939 A12 60 A32
## 40119 986944 A12 36 A30
## 40120 986974 A11 20 A34
## Purposre_Credit_Taken Credit_Amount Savings_Acc
## 40115 A46 392 A61
## 40116 A40 1285 A65
## 40117 A49 1935 A61
## 40118 A40 14027 A61
## 40119 A43 3804 A61
## 40120 A40 2235 A61
## Years_At_Present_Employment Inst_Rt_Income Marital_Status_Gender
## 40115 A72 4 A92
## 40116 A74 4 A92
## 40117 A75 4 A91
## 40118 A74 4 A93
## 40119 A73 4 A92
## 40120 A73 4 A94
## Other_Debtors_Guarantors Current_Address_Yrs Property Age
## 40115 A101 4 A122 23
## 40116 A101 4 A124 32
## 40117 A101 4 A121 31
## 40118 A101 2 A124 27
## 40119 A101 1 A123 42
## 40120 A103 2 A122 33
## Other_Inst_Plans Housing Num_CC Job Dependents Telephone
## 40115 A143 A151 1 A173 1 A192
## 40116 A143 A151 1 A173 1 A191
## 40117 A143 A152 2 A173 1 A192
## 40118 A143 A152 1 A174 1 A192
## 40119 A143 A152 1 A173 1 A192
## 40120 A141 A151 2 A173 1 A191
## Foreign_Worker Default_On_Payment Default_Payment
## 40115 A201 0 Non Default
## 40116 A201 1 Default
## 40117 A201 1 Default
## 40118 A201 1 Default
## 40119 A201 1 Default
## 40120 A202 1 Default
summary(cust_data)
## Customer_ID Status_Checking_Acc Duration_in_Months Credit_History
## Min. :100015 2 : 0 24 : 7386 46 : 0
## 1st Qu.:325149 A11:10990 12 : 7179 A30: 1610
## Median :551443 A12:10799 18 : 4536 A31: 1965
## Mean :552085 A13: 2531 36 : 3326 A32:21256
## 3rd Qu.:778206 A14:15799 6 : 3009 A33: 3530
## Max. :999961 15 : 2567 A34:11758
## (Other):12116
## Purposre_Credit_Taken Credit_Amount Savings_Acc
## A43 :11230 1258 : 121 : 0
## A40 : 9381 1275 : 121 2 : 0
## A42 : 7270 1262 : 120 A61:24191
## A41 : 4131 1393 : 120 A62: 4130
## A49 : 3890 1478 : 120 A63: 2530
## A46 : 2009 1424 : 83 A64: 1923
## (Other): 2208 (Other):39434 A65: 7345
## Years_At_Present_Employment Inst_Rt_Income Marital_Status_Gender
## : 0 Min. :1.000 : 0
## A172: 0 1st Qu.:2.000 A192: 0
## A71 : 2485 Median :3.000 A91 : 2008
## A72 : 6900 Mean :2.973 A92 :12445
## A73 :13603 3rd Qu.:4.000 A93 :21977
## A74 : 6978 Max. :4.000 A94 : 3689
## A75 :10153
## Other_Debtors_Guarantors Current_Address_Yrs Property Age
## : 0 Min. :1.000 : 0 Min. :19.00
## A101:36392 1st Qu.:2.000 A121:11309 1st Qu.:27.00
## A102: 1645 Median :3.000 A122: 9309 Median :33.00
## A103: 2082 Mean :2.845 A123:13317 Mean :35.54
## A201: 0 3rd Qu.:4.000 A124: 6184 3rd Qu.:42.00
## Max. :4.000 Max. :75.00
##
## Other_Inst_Plans Housing Num_CC Job
## : 0 : 0 Min. :1.000 : 0
## A141: 5573 A151: 7182 1st Qu.:1.000 A171: 882
## A142: 1885 A152:28603 Median :1.000 A172: 8018
## A143:32661 A153: 4334 Mean :1.407 A173:25278
## 3rd Qu.:2.000 A174: 5941
## Max. :4.000
##
## Dependents Telephone Foreign_Worker Default_On_Payment
## Min. :1.000 : 0 : 0 Min. :0.0000
## 1st Qu.:1.000 A191:23903 A201:38634 1st Qu.:0.0000
## Median :1.000 A192:16216 A202: 1485 Median :0.0000
## Mean :1.155 Mean :0.2991
## 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :2.000 Max. :1.0000
##
## Default_Payment
## Default :12001
## Non Default:28118
##
##
##
##
##
# Conditional Inference Tree for Default_On_Payment
ctrl<- ctree_control(mincriterion = 0.95, minsplit = 100, minbucket = 100)
fit <- ctree(Default_Payment~ Housing + Status_Checking_Acc,
data=cust_data, control=ctrl)
plot(fit,main="Conditional Inference Tree for Default_Payment ")
print(fit)
##
## Model formula:
## Default_Payment ~ Housing + Status_Checking_Acc
##
## Fitted party:
## [1] root
## | [2] Status_Checking_Acc in A11, A12
## | | [3] Housing in A151, A153
## | | | [4] Status_Checking_Acc in A11: Default (n = 4172, err = 45.2%)
## | | | [5] Status_Checking_Acc in A12
## | | | | [6] Housing in A151: Non Default (n = 1926, err = 47.9%)
## | | | | [7] Housing in A153: Default (n = 1165, err = 44.9%)
## | | [8] Housing in A152
## | | | [9] Status_Checking_Acc in A11: Non Default (n = 6818, err = 45.9%)
## | | | [10] Status_Checking_Acc in A12: Non Default (n = 7708, err = 33.9%)
## | [11] Status_Checking_Acc in A13, A14
## | | [12] Status_Checking_Acc in A14
## | | | [13] Housing in A151: Non Default (n = 2325, err = 17.3%)
## | | | [14] Housing in A152, A153
## | | | | [15] Housing in A153: Non Default (n = 1285, err = 12.6%)
## | | | | [16] Housing in A152: Non Default (n = 12189, err = 10.5%)
## | | [17] Status_Checking_Acc in A13
## | | | [18] Housing in A151, A152
## | | | | [19] Housing in A151: Non Default (n = 322, err = 12.4%)
## | | | | [20] Housing in A152: Non Default (n = 1888, err = 21.3%)
## | | | [21] Housing in A153: Non Default (n = 321, err = 37.7%)
##
## Number of inner nodes: 10
## Number of terminal nodes: 11
###detailed results including splits
summary(fit)
## Length Class Mode
## 1 21 constparty list
## 2 9 constparty list
## 3 5 constparty list
## 4 1 constparty list
## 5 3 constparty list
## 6 1 constparty list
## 7 1 constparty list
## 8 3 constparty list
## 9 1 constparty list
## 10 1 constparty list
## 11 11 constparty list
## 12 5 constparty list
## 13 1 constparty list
## 14 3 constparty list
## 15 1 constparty list
## 16 1 constparty list
## 17 5 constparty list
## 18 3 constparty list
## 19 1 constparty list
## 20 1 constparty list
## 21 1 constparty list
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.