### Setting Working Directory
setwd("C:/Users/rpandey/Desktop/Classes")

## Load library "Party" for running the Decision Tree
library(partykit)
## Loading required package: grid
##Read the data in the file
cust_data<-read.csv("Default_On_Payment.csv")

cust_data <- cust_data[complete.cases(cust_data),]

## Create a new Factor variable "Default_Payment" in the table
cust_data$Default_Payment <- factor(ifelse(cust_data$Default_On_Payment==1,"Default", "Non Default"))

## Check the distribution on new variable "Default_Payment"
table(cust_data$Default_Payment)
## 
##     Default Non Default 
##       12001       28118
pie(table(cust_data$Default_Payment))

# Quick checks and exploration of the data
dim(cust_data)
## [1] 40119    23
str(cust_data)
## 'data.frame':    40119 obs. of  23 variables:
##  $ Customer_ID                : int  100015 100031 100046 100103 100104 100128 100148 100164 100182 100230 ...
##  $ Status_Checking_Acc        : Factor w/ 5 levels "2","A11","A12",..: 5 2 3 5 2 2 2 5 3 5 ...
##  $ Duration_in_Months         : Factor w/ 34 levels "10","11","12",..: 14 3 4 12 12 1 2 12 8 3 ...
##  $ Credit_History             : Factor w/ 6 levels "46","A30","A31",..: 4 6 6 4 4 4 6 6 4 4 ...
##  $ Purposre_Credit_Taken      : Factor w/ 12 levels "","A143","A40",..: 3 7 7 3 3 7 3 7 7 3 ...
##  $ Credit_Amount              : Factor w/ 923 levels "","1007","10127",..: 419 602 877 153 511 373 615 353 875 150 ...
##  $ Savings_Acc                : Factor w/ 7 levels "","2","A61","A62",..: 3 3 3 3 3 3 3 4 3 5 ...
##  $ Years_At_Present_Employment: Factor w/ 7 levels "","A172","A71",..: 5 6 4 5 4 7 5 7 5 5 ...
##  $ Inst_Rt_Income             : int  3 4 4 2 4 3 1 4 4 2 ...
##  $ Marital_Status_Gender      : Factor w/ 6 levels "","A192","A91",..: 4 4 5 5 4 5 5 5 6 4 ...
##  $ Other_Debtors_Guarantors   : Factor w/ 5 levels "","A101","A102",..: 2 2 4 4 2 2 2 2 4 2 ...
##  $ Current_Address_Yrs        : int  3 3 4 2 1 4 2 4 2 2 ...
##  $ Property                   : Factor w/ 5 levels "","A121","A122",..: 2 2 2 2 3 2 2 3 2 3 ...
##  $ Age                        : int  21 58 23 31 27 52 40 52 25 26 ...
##  $ Other_Inst_Plans           : Factor w/ 4 levels "","A141","A142",..: 4 4 4 4 4 4 4 2 4 4 ...
##  $ Housing                    : Factor w/ 4 levels "","A151","A152",..: 2 3 3 3 3 3 3 3 3 3 ...
##  $ Num_CC                     : int  1 4 2 1 1 1 2 2 1 1 ...
##  $ Job                        : Factor w/ 5 levels "","A171","A172",..: 4 3 4 4 4 3 3 4 3 4 ...
##  $ Dependents                 : int  1 1 1 1 1 1 2 1 1 1 ...
##  $ Telephone                  : Factor w/ 3 levels "","A191","A192": 2 3 2 3 2 2 2 2 2 2 ...
##  $ Foreign_Worker             : Factor w/ 3 levels "","A201","A202": 2 2 2 2 2 2 2 2 2 2 ...
##  $ Default_On_Payment         : int  1 0 0 0 1 0 0 0 0 1 ...
##  $ Default_Payment            : Factor w/ 2 levels "Default","Non Default": 1 2 2 2 1 2 2 2 2 1 ...
head(cust_data)
##   Customer_ID Status_Checking_Acc Duration_in_Months Credit_History
## 1      100015                 A14                 27            A32
## 2      100031                 A11                 12            A34
## 3      100046                 A12                 13            A34
## 4      100103                 A14                 24            A32
## 5      100104                 A11                 24            A32
## 6      100128                 A11                 10            A32
##   Purposre_Credit_Taken Credit_Amount Savings_Acc
## 1                   A40          2570         A61
## 2                   A43           385         A61
## 3                   A43           882         A61
## 4                   A40          1393         A61
## 5                   A40          3123         A61
## 6                   A43          2315         A61
##   Years_At_Present_Employment Inst_Rt_Income Marital_Status_Gender
## 1                         A73              3                   A92
## 2                         A74              4                   A92
## 3                         A72              4                   A93
## 4                         A73              2                   A93
## 5                         A72              4                   A92
## 6                         A75              3                   A93
##   Other_Debtors_Guarantors Current_Address_Yrs Property Age
## 1                     A101                   3     A121  21
## 2                     A101                   3     A121  58
## 3                     A103                   4     A121  23
## 4                     A103                   2     A121  31
## 5                     A101                   1     A122  27
## 6                     A101                   4     A121  52
##   Other_Inst_Plans Housing Num_CC  Job Dependents Telephone Foreign_Worker
## 1             A143    A151      1 A173          1      A191           A201
## 2             A143    A152      4 A172          1      A192           A201
## 3             A143    A152      2 A173          1      A191           A201
## 4             A143    A152      1 A173          1      A192           A201
## 5             A143    A152      1 A173          1      A191           A201
## 6             A143    A152      1 A172          1      A191           A201
##   Default_On_Payment Default_Payment
## 1                  1         Default
## 2                  0     Non Default
## 3                  0     Non Default
## 4                  0     Non Default
## 5                  1         Default
## 6                  0     Non Default
tail(cust_data)
##       Customer_ID Status_Checking_Acc Duration_in_Months Credit_History
## 40115      986899                 A13                 15            A32
## 40116      986907                 A11                 24            A32
## 40117      986936                 A12                 24            A34
## 40118      986939                 A12                 60            A32
## 40119      986944                 A12                 36            A30
## 40120      986974                 A11                 20            A34
##       Purposre_Credit_Taken Credit_Amount Savings_Acc
## 40115                   A46           392         A61
## 40116                   A40          1285         A65
## 40117                   A49          1935         A61
## 40118                   A40         14027         A61
## 40119                   A43          3804         A61
## 40120                   A40          2235         A61
##       Years_At_Present_Employment Inst_Rt_Income Marital_Status_Gender
## 40115                         A72              4                   A92
## 40116                         A74              4                   A92
## 40117                         A75              4                   A91
## 40118                         A74              4                   A93
## 40119                         A73              4                   A92
## 40120                         A73              4                   A94
##       Other_Debtors_Guarantors Current_Address_Yrs Property Age
## 40115                     A101                   4     A122  23
## 40116                     A101                   4     A124  32
## 40117                     A101                   4     A121  31
## 40118                     A101                   2     A124  27
## 40119                     A101                   1     A123  42
## 40120                     A103                   2     A122  33
##       Other_Inst_Plans Housing Num_CC  Job Dependents Telephone
## 40115             A143    A151      1 A173          1      A192
## 40116             A143    A151      1 A173          1      A191
## 40117             A143    A152      2 A173          1      A192
## 40118             A143    A152      1 A174          1      A192
## 40119             A143    A152      1 A173          1      A192
## 40120             A141    A151      2 A173          1      A191
##       Foreign_Worker Default_On_Payment Default_Payment
## 40115           A201                  0     Non Default
## 40116           A201                  1         Default
## 40117           A201                  1         Default
## 40118           A201                  1         Default
## 40119           A201                  1         Default
## 40120           A202                  1         Default
summary(cust_data)
##   Customer_ID     Status_Checking_Acc Duration_in_Months Credit_History
##  Min.   :100015   2  :    0           24     : 7386      46 :    0     
##  1st Qu.:325149   A11:10990           12     : 7179      A30: 1610     
##  Median :551443   A12:10799           18     : 4536      A31: 1965     
##  Mean   :552085   A13: 2531           36     : 3326      A32:21256     
##  3rd Qu.:778206   A14:15799           6      : 3009      A33: 3530     
##  Max.   :999961                       15     : 2567      A34:11758     
##                                       (Other):12116                    
##  Purposre_Credit_Taken Credit_Amount   Savings_Acc
##  A43    :11230         1258   :  121      :    0  
##  A40    : 9381         1275   :  121   2  :    0  
##  A42    : 7270         1262   :  120   A61:24191  
##  A41    : 4131         1393   :  120   A62: 4130  
##  A49    : 3890         1478   :  120   A63: 2530  
##  A46    : 2009         1424   :   83   A64: 1923  
##  (Other): 2208         (Other):39434   A65: 7345  
##  Years_At_Present_Employment Inst_Rt_Income  Marital_Status_Gender
##      :    0                  Min.   :1.000       :    0           
##  A172:    0                  1st Qu.:2.000   A192:    0           
##  A71 : 2485                  Median :3.000   A91 : 2008           
##  A72 : 6900                  Mean   :2.973   A92 :12445           
##  A73 :13603                  3rd Qu.:4.000   A93 :21977           
##  A74 : 6978                  Max.   :4.000   A94 : 3689           
##  A75 :10153                                                       
##  Other_Debtors_Guarantors Current_Address_Yrs Property          Age       
##      :    0               Min.   :1.000           :    0   Min.   :19.00  
##  A101:36392               1st Qu.:2.000       A121:11309   1st Qu.:27.00  
##  A102: 1645               Median :3.000       A122: 9309   Median :33.00  
##  A103: 2082               Mean   :2.845       A123:13317   Mean   :35.54  
##  A201:    0               3rd Qu.:4.000       A124: 6184   3rd Qu.:42.00  
##                           Max.   :4.000                    Max.   :75.00  
##                                                                           
##  Other_Inst_Plans Housing          Num_CC        Job       
##      :    0           :    0   Min.   :1.000       :    0  
##  A141: 5573       A151: 7182   1st Qu.:1.000   A171:  882  
##  A142: 1885       A152:28603   Median :1.000   A172: 8018  
##  A143:32661       A153: 4334   Mean   :1.407   A173:25278  
##                                3rd Qu.:2.000   A174: 5941  
##                                Max.   :4.000               
##                                                            
##    Dependents    Telephone    Foreign_Worker Default_On_Payment
##  Min.   :1.000       :    0       :    0     Min.   :0.0000    
##  1st Qu.:1.000   A191:23903   A201:38634     1st Qu.:0.0000    
##  Median :1.000   A192:16216   A202: 1485     Median :0.0000    
##  Mean   :1.155                               Mean   :0.2991    
##  3rd Qu.:1.000                               3rd Qu.:1.0000    
##  Max.   :2.000                               Max.   :1.0000    
##                                                                
##     Default_Payment 
##  Default    :12001  
##  Non Default:28118  
##                     
##                     
##                     
##                     
## 
# Conditional Inference Tree for Default_On_Payment
ctrl<- ctree_control(mincriterion = 0.95, minsplit = 100, minbucket = 100)
fit <- ctree(Default_Payment~ Housing + Status_Checking_Acc, 
             data=cust_data, control=ctrl)
plot(fit,main="Conditional Inference Tree for Default_Payment ")

print(fit)
## 
## Model formula:
## Default_Payment ~ Housing + Status_Checking_Acc
## 
## Fitted party:
## [1] root
## |   [2] Status_Checking_Acc in A11, A12
## |   |   [3] Housing in A151, A153
## |   |   |   [4] Status_Checking_Acc in A11: Default (n = 4172, err = 45.2%)
## |   |   |   [5] Status_Checking_Acc in A12
## |   |   |   |   [6] Housing in A151: Non Default (n = 1926, err = 47.9%)
## |   |   |   |   [7] Housing in A153: Default (n = 1165, err = 44.9%)
## |   |   [8] Housing in A152
## |   |   |   [9] Status_Checking_Acc in A11: Non Default (n = 6818, err = 45.9%)
## |   |   |   [10] Status_Checking_Acc in A12: Non Default (n = 7708, err = 33.9%)
## |   [11] Status_Checking_Acc in A13, A14
## |   |   [12] Status_Checking_Acc in A14
## |   |   |   [13] Housing in A151: Non Default (n = 2325, err = 17.3%)
## |   |   |   [14] Housing in A152, A153
## |   |   |   |   [15] Housing in A153: Non Default (n = 1285, err = 12.6%)
## |   |   |   |   [16] Housing in A152: Non Default (n = 12189, err = 10.5%)
## |   |   [17] Status_Checking_Acc in A13
## |   |   |   [18] Housing in A151, A152
## |   |   |   |   [19] Housing in A151: Non Default (n = 322, err = 12.4%)
## |   |   |   |   [20] Housing in A152: Non Default (n = 1888, err = 21.3%)
## |   |   |   [21] Housing in A153: Non Default (n = 321, err = 37.7%)
## 
## Number of inner nodes:    10
## Number of terminal nodes: 11
###detailed results including splits
summary(fit)
##    Length Class      Mode
## 1  21     constparty list
## 2   9     constparty list
## 3   5     constparty list
## 4   1     constparty list
## 5   3     constparty list
## 6   1     constparty list
## 7   1     constparty list
## 8   3     constparty list
## 9   1     constparty list
## 10  1     constparty list
## 11 11     constparty list
## 12  5     constparty list
## 13  1     constparty list
## 14  3     constparty list
## 15  1     constparty list
## 16  1     constparty list
## 17  5     constparty list
## 18  3     constparty list
## 19  1     constparty list
## 20  1     constparty list
## 21  1     constparty list

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.