Problem

We are attempting to predict if a person is likely to have be a high risk when we lend them money.

We are analyzing a dataset with over 30,000 entries of previous customers and their risk level for Stark Enterprises. We are building a decision tree with that data and using it for our predictions.

This data comprises dozens of variables about previous customers who applied for credit. We narrowed the variables down to a small handful we thought would be useful. We chose CNT_CHILDREN, AMT_INCOME_TOTAL, AMT_CREDIT, AMT_ANNUITY, AMT_GOODS_PRICE, and DAYS_EMPLOYED as our variables.

library(rpart)
library(rpart.plot)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(tidyr)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(ROSE)
## Loaded ROSE 0.0-4
cred <- read.csv("credit_8.csv", header = TRUE)
names(cred)
##  [1] "X"                           "SK_ID_CURR"                 
##  [3] "TARGET"                      "NAME_CONTRACT_TYPE"         
##  [5] "CODE_GENDER"                 "FLAG_OWN_CAR"               
##  [7] "FLAG_OWN_REALTY"             "CNT_CHILDREN"               
##  [9] "AMT_INCOME_TOTAL"            "AMT_CREDIT"                 
## [11] "AMT_ANNUITY"                 "AMT_GOODS_PRICE"            
## [13] "NAME_TYPE_SUITE"             "NAME_INCOME_TYPE"           
## [15] "NAME_EDUCATION_TYPE"         "NAME_FAMILY_STATUS"         
## [17] "NAME_HOUSING_TYPE"           "DAYS_BIRTH"                 
## [19] "DAYS_EMPLOYED"               "DAYS_REGISTRATION"          
## [21] "DAYS_ID_PUBLISH"             "OWN_CAR_AGE"                
## [23] "FLAG_MOBIL"                  "FLAG_EMP_PHONE"             
## [25] "FLAG_WORK_PHONE"             "FLAG_CONT_MOBILE"           
## [27] "FLAG_PHONE"                  "FLAG_EMAIL"                 
## [29] "OCCUPATION_TYPE"             "CNT_FAM_MEMBERS"            
## [31] "REGION_RATING_CLIENT"        "REGION_RATING_CLIENT_W_CITY"
## [33] "WEEKDAY_APPR_PROCESS_START"  "HOUR_APPR_PROCESS_START"    
## [35] "REG_REGION_NOT_LIVE_REGION"  "REG_REGION_NOT_WORK_REGION" 
## [37] "LIVE_REGION_NOT_WORK_REGION" "REG_CITY_NOT_LIVE_CITY"     
## [39] "REG_CITY_NOT_WORK_CITY"      "LIVE_CITY_NOT_WORK_CITY"    
## [41] "ORGANIZATION_TYPE"           "DAYS_LAST_PHONE_CHANGE"     
## [43] "FLAG_DOCUMENT_2"             "FLAG_DOCUMENT_3"            
## [45] "FLAG_DOCUMENT_4"             "FLAG_DOCUMENT_5"            
## [47] "FLAG_DOCUMENT_6"             "FLAG_DOCUMENT_7"            
## [49] "FLAG_DOCUMENT_8"             "FLAG_DOCUMENT_9"            
## [51] "FLAG_DOCUMENT_10"            "FLAG_DOCUMENT_11"           
## [53] "FLAG_DOCUMENT_12"            "FLAG_DOCUMENT_13"           
## [55] "FLAG_DOCUMENT_14"            "FLAG_DOCUMENT_15"           
## [57] "FLAG_DOCUMENT_16"            "FLAG_DOCUMENT_17"           
## [59] "FLAG_DOCUMENT_18"            "FLAG_DOCUMENT_19"           
## [61] "FLAG_DOCUMENT_20"            "FLAG_DOCUMENT_21"           
## [63] "AMT_REQ_CREDIT_BUREAU_HOUR"  "AMT_REQ_CREDIT_BUREAU_DAY"  
## [65] "AMT_REQ_CREDIT_BUREAU_WEEK"  "AMT_REQ_CREDIT_BUREAU_MON"  
## [67] "AMT_REQ_CREDIT_BUREAU_QRT"   "AMT_REQ_CREDIT_BUREAU_YEAR"
cred <- drop_na(cred)
head(cred,10)
##     X SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR
## 1   1     264634      0         Cash loans           M            Y
## 2   2     343697      0         Cash loans           M            Y
## 3   3     169422      0         Cash loans           M            Y
## 4   4     302321      1         Cash loans           F            Y
## 5   8     126681      0    Revolving loans           M            Y
## 6   9     177072      0    Revolving loans           M            Y
## 7  16     397136      0         Cash loans           M            Y
## 8  25     425686      0    Revolving loans           M            Y
## 9  30     392005      0    Revolving loans           F            Y
## 10 31     180988      0         Cash loans           F            Y
##    FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY
## 1                Y            0           225000   594121.5     32229.0
## 2                N            0           382500   595453.5     30532.5
## 3                N            0           157500   534204.0     31941.0
## 4                Y            0           135000   280170.0     29547.0
## 5                Y            0            94500   180000.0      9000.0
## 6                Y            0           180000   180000.0      9000.0
## 7                N            0           130500   687600.0     18135.0
## 8                Y            1           112500   270000.0     13500.0
## 9                Y            0            45000   135000.0      6750.0
## 10               Y            0           135000  1339884.0     39307.5
##    AMT_GOODS_PRICE NAME_TYPE_SUITE     NAME_INCOME_TYPE
## 1           472500   Unaccompanied              Working
## 2           445500   Unaccompanied Commercial associate
## 3           495000   Unaccompanied              Working
## 4           247500   Unaccompanied        State servant
## 5           180000   Unaccompanied              Working
## 6           180000   Unaccompanied Commercial associate
## 7           450000          Family              Working
## 8           270000   Unaccompanied              Working
## 9           135000   Unaccompanied            Pensioner
## 10         1170000          Family Commercial associate
##              NAME_EDUCATION_TYPE NAME_FAMILY_STATUS   NAME_HOUSING_TYPE
## 1               Higher education            Married    Rented apartment
## 2               Higher education     Civil marriage Municipal apartment
## 3  Secondary / secondary special          Separated Municipal apartment
## 4               Higher education     Civil marriage   House / apartment
## 5  Secondary / secondary special            Married   House / apartment
## 6              Incomplete higher     Civil marriage        With parents
## 7  Secondary / secondary special            Married   House / apartment
## 8  Secondary / secondary special            Married   House / apartment
## 9  Secondary / secondary special     Civil marriage   House / apartment
## 10             Incomplete higher            Married   House / apartment
##    DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH OWN_CAR_AGE
## 1      -15317         -1656             -2851           -2883          13
## 2      -10754          -694             -2809           -2893           9
## 3      -15166          -223             -1891           -4783          22
## 4       -8872         -1350             -8851            -595           7
## 5       -9396         -1224             -1110           -1172          13
## 6       -8006          -478             -3120            -622           7
## 7      -18287          -824             -9736           -1840           6
## 8      -15852         -8079             -4170           -4356           0
## 9      -20908        365243            -13011           -4239          14
## 10      -8696         -1239             -1403           -1085           4
##    FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE
## 1           1              1               0                1          0
## 2           1              1               0                1          1
## 3           1              1               0                1          0
## 4           1              1               0                1          0
## 5           1              1               0                1          1
## 6           1              1               0                1          0
## 7           1              1               1                1          1
## 8           1              1               0                1          1
## 9           1              0               0                1          1
## 10          1              1               1                1          1
##    FLAG_EMAIL       OCCUPATION_TYPE CNT_FAM_MEMBERS REGION_RATING_CLIENT
## 1           0 High skill tech staff               2                    2
## 2           1              Managers               2                    1
## 3           0        Security staff               1                    2
## 4           0        Medicine staff               2                    2
## 5           0    Low-skill Laborers               2                    3
## 6           0              Laborers               2                    2
## 7           0              Laborers               2                    2
## 8           0              Laborers               3                    2
## 9           0                                     2                    2
## 10          0              Laborers               2                    2
##    REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START
## 1                            2                     SUNDAY
## 2                            1                     SUNDAY
## 3                            2                   SATURDAY
## 4                            2                   THURSDAY
## 5                            3                     SUNDAY
## 6                            2                  WEDNESDAY
## 7                            2                    TUESDAY
## 8                            2                  WEDNESDAY
## 9                            2                     MONDAY
## 10                           2                     FRIDAY
##    HOUR_APPR_PROCESS_START REG_REGION_NOT_LIVE_REGION
## 1                       11                          0
## 2                       11                          0
## 3                        3                          0
## 4                       16                          0
## 5                       11                          0
## 6                       10                          1
## 7                       10                          0
## 8                       17                          0
## 9                       11                          0
## 10                      12                          0
##    REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION
## 1                           0                           0
## 2                           0                           0
## 3                           0                           0
## 4                           0                           0
## 5                           0                           0
## 6                           1                           0
## 7                           0                           0
## 8                           0                           0
## 9                           0                           0
## 10                          0                           0
##    REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY
## 1                       0                      0                       0
## 2                       0                      0                       0
## 3                       0                      0                       0
## 4                       1                      1                       0
## 5                       0                      0                       0
## 6                       1                      1                       0
## 7                       0                      0                       0
## 8                       0                      0                       0
## 9                       0                      0                       0
## 10                      0                      0                       0
##         ORGANIZATION_TYPE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2
## 1  Business Entity Type 3                   -439               0
## 2  Business Entity Type 3                  -2207               0
## 3                Security                   -291               0
## 4                Medicine                   -419               0
## 5           Self-employed                  -1100               0
## 6  Business Entity Type 3                   -128               0
## 7  Business Entity Type 3                  -3003               0
## 8       Transport: type 2                  -1143               0
## 9                     XNA                  -1047               0
## 10          Self-employed                   -918               0
##    FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6
## 1                0               0               0               0
## 2                1               0               0               0
## 3                1               0               0               0
## 4                1               0               0               0
## 5                0               0               0               0
## 6                0               0               0               0
## 7                1               0               0               0
## 8                0               0               1               0
## 9                0               0               0               0
## 10               1               0               0               0
##    FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10
## 1                0               1               0                0
## 2                0               0               0                0
## 3                0               0               0                0
## 4                0               0               0                0
## 5                0               0               0                0
## 6                0               0               0                0
## 7                0               0               0                0
## 8                0               0               0                0
## 9                0               0               0                0
## 10               0               0               0                0
##    FLAG_DOCUMENT_11 FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14
## 1                 0                0                0                0
## 2                 0                0                0                0
## 3                 0                0                0                0
## 4                 0                0                0                0
## 5                 0                0                0                0
## 6                 0                0                0                0
## 7                 0                0                0                0
## 8                 0                0                0                0
## 9                 0                0                0                0
## 10                0                0                0                0
##    FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18
## 1                 0                0                0                0
## 2                 0                0                0                0
## 3                 0                0                0                0
## 4                 0                0                0                0
## 5                 0                0                0                0
## 6                 0                0                0                0
## 7                 0                0                0                0
## 8                 0                0                0                0
## 9                 0                0                0                0
## 10                0                0                0                0
##    FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21
## 1                 0                0                0
## 2                 0                0                0
## 3                 0                0                0
## 4                 0                0                0
## 5                 0                0                0
## 6                 0                0                0
## 7                 0                0                0
## 8                 0                0                0
## 9                 0                0                0
## 10                0                0                0
##    AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY
## 1                           0                         0
## 2                           0                         0
## 3                           0                         0
## 4                           0                         0
## 5                           0                         0
## 6                           0                         0
## 7                           0                         0
## 8                           0                         0
## 9                           0                         0
## 10                          0                         0
##    AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON
## 1                           0                         0
## 2                           0                         0
## 3                           0                         0
## 4                           0                         0
## 5                           0                         0
## 6                           0                         0
## 7                           0                         1
## 8                           0                         4
## 9                           1                         0
## 10                          0                         0
##    AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR
## 1                          0                          3
## 2                          1                          4
## 3                          2                          0
## 4                          0                          1
## 5                          0                          0
## 6                          0                          4
## 7                          0                          0
## 8                          0                          0
## 9                          0                          1
## 10                         0                          2
str(cred)
## 'data.frame':    9036 obs. of  68 variables:
##  $ X                          : int  1 2 3 4 8 9 16 25 30 31 ...
##  $ SK_ID_CURR                 : int  264634 343697 169422 302321 126681 177072 397136 425686 392005 180988 ...
##  $ TARGET                     : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ NAME_CONTRACT_TYPE         : chr  "Cash loans" "Cash loans" "Cash loans" "Cash loans" ...
##  $ CODE_GENDER                : chr  "M" "M" "M" "F" ...
##  $ FLAG_OWN_CAR               : chr  "Y" "Y" "Y" "Y" ...
##  $ FLAG_OWN_REALTY            : chr  "Y" "N" "N" "Y" ...
##  $ CNT_CHILDREN               : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ AMT_INCOME_TOTAL           : num  225000 382500 157500 135000 94500 ...
##  $ AMT_CREDIT                 : num  594122 595454 534204 280170 180000 ...
##  $ AMT_ANNUITY                : num  32229 30532 31941 29547 9000 ...
##  $ AMT_GOODS_PRICE            : num  472500 445500 495000 247500 180000 ...
##  $ NAME_TYPE_SUITE            : chr  "Unaccompanied" "Unaccompanied" "Unaccompanied" "Unaccompanied" ...
##  $ NAME_INCOME_TYPE           : chr  "Working" "Commercial associate" "Working" "State servant" ...
##  $ NAME_EDUCATION_TYPE        : chr  "Higher education" "Higher education" "Secondary / secondary special" "Higher education" ...
##  $ NAME_FAMILY_STATUS         : chr  "Married" "Civil marriage" "Separated" "Civil marriage" ...
##  $ NAME_HOUSING_TYPE          : chr  "Rented apartment" "Municipal apartment" "Municipal apartment" "House / apartment" ...
##  $ DAYS_BIRTH                 : int  -15317 -10754 -15166 -8872 -9396 -8006 -18287 -15852 -20908 -8696 ...
##  $ DAYS_EMPLOYED              : int  -1656 -694 -223 -1350 -1224 -478 -824 -8079 365243 -1239 ...
##  $ DAYS_REGISTRATION          : int  -2851 -2809 -1891 -8851 -1110 -3120 -9736 -4170 -13011 -1403 ...
##  $ DAYS_ID_PUBLISH            : int  -2883 -2893 -4783 -595 -1172 -622 -1840 -4356 -4239 -1085 ...
##  $ OWN_CAR_AGE                : int  13 9 22 7 13 7 6 0 14 4 ...
##  $ FLAG_MOBIL                 : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ FLAG_EMP_PHONE             : int  1 1 1 1 1 1 1 1 0 1 ...
##  $ FLAG_WORK_PHONE            : int  0 0 0 0 0 0 1 0 0 1 ...
##  $ FLAG_CONT_MOBILE           : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ FLAG_PHONE                 : int  0 1 0 0 1 0 1 1 1 1 ...
##  $ FLAG_EMAIL                 : int  0 1 0 0 0 0 0 0 0 0 ...
##  $ OCCUPATION_TYPE            : chr  "High skill tech staff" "Managers" "Security staff" "Medicine staff" ...
##  $ CNT_FAM_MEMBERS            : int  2 2 1 2 2 2 2 3 2 2 ...
##  $ REGION_RATING_CLIENT       : int  2 1 2 2 3 2 2 2 2 2 ...
##  $ REGION_RATING_CLIENT_W_CITY: int  2 1 2 2 3 2 2 2 2 2 ...
##  $ WEEKDAY_APPR_PROCESS_START : chr  "SUNDAY" "SUNDAY" "SATURDAY" "THURSDAY" ...
##  $ HOUR_APPR_PROCESS_START    : int  11 11 3 16 11 10 10 17 11 12 ...
##  $ REG_REGION_NOT_LIVE_REGION : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ REG_REGION_NOT_WORK_REGION : int  0 0 0 0 0 1 0 0 0 0 ...
##  $ LIVE_REGION_NOT_WORK_REGION: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ REG_CITY_NOT_LIVE_CITY     : int  0 0 0 1 0 1 0 0 0 0 ...
##  $ REG_CITY_NOT_WORK_CITY     : int  0 0 0 1 0 1 0 0 0 0 ...
##  $ LIVE_CITY_NOT_WORK_CITY    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ ORGANIZATION_TYPE          : chr  "Business Entity Type 3" "Business Entity Type 3" "Security" "Medicine" ...
##  $ DAYS_LAST_PHONE_CHANGE     : int  -439 -2207 -291 -419 -1100 -128 -3003 -1143 -1047 -918 ...
##  $ FLAG_DOCUMENT_2            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_3            : int  0 1 1 1 0 0 1 0 0 1 ...
##  $ FLAG_DOCUMENT_4            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_5            : int  0 0 0 0 0 0 0 1 0 0 ...
##  $ FLAG_DOCUMENT_6            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_7            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_8            : int  1 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_9            : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_10           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_11           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_12           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_13           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_14           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_15           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_16           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_17           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_18           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_19           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_20           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG_DOCUMENT_21           : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ AMT_REQ_CREDIT_BUREAU_HOUR : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ AMT_REQ_CREDIT_BUREAU_DAY  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ AMT_REQ_CREDIT_BUREAU_WEEK : int  0 0 0 0 0 0 0 0 1 0 ...
##  $ AMT_REQ_CREDIT_BUREAU_MON  : int  0 0 0 0 0 0 1 4 0 0 ...
##  $ AMT_REQ_CREDIT_BUREAU_QRT  : int  0 1 2 0 0 0 0 0 0 0 ...
##  $ AMT_REQ_CREDIT_BUREAU_YEAR : int  3 4 0 1 0 4 0 0 1 2 ...
# Choosing to only keep columns that seem like they're good predictors
cred <- cred[ , c(3, 8:12, 19)]
names(cred)
## [1] "TARGET"           "CNT_CHILDREN"     "AMT_INCOME_TOTAL" "AMT_CREDIT"      
## [5] "AMT_ANNUITY"      "AMT_GOODS_PRICE"  "DAYS_EMPLOYED"
cred$cat_TARGET <- ifelse(cred$TARGET <= mean(cred$TARGET, na.rm = TRUE), 0, 1)
set.seed(1331)
train_index <-sample(1:nrow(cred), 0.6*nrow(cred))
valid_index <-setdiff(1:nrow(cred), train_index)
train_df <- cred[train_index, ]
valid_df <- cred[valid_index, ]
nrow(train_df)
## [1] 5421
nrow(valid_df)
## [1] 3615
#Balancing data
train_df$TARGET <- as.factor(train_df$TARGET)
train_df_balanced <- ROSE(TARGET ~  CNT_CHILDREN 
                          + AMT_INCOME_TOTAL + AMT_CREDIT 
                          + AMT_ANNUITY 
                          + AMT_GOODS_PRICE 
                          + DAYS_EMPLOYED,
                          data = train_df, seed = 1331)$data
table(train_df_balanced$TARGET)
## 
##    0    1 
## 2687 2734
#Classification tree
class_tr <- rpart(TARGET ~ CNT_CHILDREN 
                  + AMT_INCOME_TOTAL + AMT_CREDIT 
                  + AMT_ANNUITY 
                  + AMT_GOODS_PRICE 
                  + DAYS_EMPLOYED,
                  data = train_df_balanced, method = "class", maxdepth = 20)
prp(class_tr)

rpart.plot(class_tr, type = 5)

rpart.rules(class_tr, extra = 4)
##  TARGET    0   1                                                                                                                                                
##       0 [.70 .30] when DAYS_EMPLOYED >= -80743 & AMT_GOODS_PRICE >= 1420500                                                                                     
##       0 [.63 .37] when DAYS_EMPLOYED >= -80743 & AMT_GOODS_PRICE <  1420500 & CNT_CHILDREN >=          -0.45 & AMT_ANNUITY <  11086                             
##       0 [.60 .40] when DAYS_EMPLOYED >= -80743 & AMT_GOODS_PRICE <  1420500 & CNT_CHILDREN >=           0.43 & AMT_ANNUITY >= 11086 & AMT_INCOME_TOTAL >= 313571
##       0 [.53 .47] when DAYS_EMPLOYED >= -80743 & AMT_GOODS_PRICE <  1420500 & CNT_CHILDREN is -0.45 to  0.43 & AMT_ANNUITY >= 11086                             
##       1 [.43 .57] when DAYS_EMPLOYED >= -80743 & AMT_GOODS_PRICE <  1420500 & CNT_CHILDREN >=           0.43 & AMT_ANNUITY >= 11086 & AMT_INCOME_TOTAL <  313571
##       1 [.35 .65] when DAYS_EMPLOYED >= -80743 & AMT_GOODS_PRICE <  1420500 & CNT_CHILDREN <  -0.45                                                             
##       1 [.30 .70] when DAYS_EMPLOYED <  -80743
#Confusion matricies
class_tr_train_predict <- predict(class_tr, train_df_balanced,
                                  type = "class")
confusionMatrix(class_tr_train_predict, train_df_balanced$TARGET,
                positive = "1")
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    0    1
##          0 1760 1389
##          1  927 1345
##                                          
##                Accuracy : 0.5728         
##                  95% CI : (0.5595, 0.586)
##     No Information Rate : 0.5043         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.1467         
##                                          
##  Mcnemar's Test P-Value : < 2.2e-16      
##                                          
##             Sensitivity : 0.4920         
##             Specificity : 0.6550         
##          Pos Pred Value : 0.5920         
##          Neg Pred Value : 0.5589         
##              Prevalence : 0.5043         
##          Detection Rate : 0.2481         
##    Detection Prevalence : 0.4191         
##       Balanced Accuracy : 0.5735         
##                                          
##        'Positive' Class : 1              
## 
#class_tr_valid_predict <- predict(class_tr, valid_df,
#                                  type = "class")
#confusionMatrix(class_tr_valid_predict, valid_df$TARGET,
#                positive = "1")
#Probabilities
class_tr_valid_predict_prob <- predict(class_tr, valid_df,
                                       type = "prob")
head(class_tr_valid_predict_prob)
##            0         1
## 1  0.5289005 0.4710995
## 4  0.5289005 0.4710995
## 6  0.6301370 0.3698630
## 10 0.5289005 0.4710995
## 11 0.6952381 0.3047619
## 12 0.6050000 0.3950000
# Implementing new records
new_record_class <- data.frame(CNT_CHILDREN = 0,
                               AMT_INCOME_TOTAL = 180000,
                               AMT_CREDIT = 383760, 
                               AMT_ANNUITY = 40428, 
                               AMT_GOODS_PRICE = 360000, 
                               DAYS_EMPLOYED = -1304)
class_tr1 <- predict(class_tr, newdata = new_record_class)
class_tr1
##           0         1
## 1 0.5289005 0.4710995
new_record_class2 <- data.frame(CNT_CHILDREN = 0,
                               AMT_INCOME_TOTAL = 292500,
                               AMT_CREDIT = 675000, 
                               AMT_ANNUITY = 24376.5, 
                               AMT_GOODS_PRICE = 675000, 
                               DAYS_EMPLOYED = -1548)
class_tr2 <- predict(class_tr, newdata = new_record_class2)
class_tr2
##           0         1
## 1 0.5289005 0.4710995
new_record_class3 <- data.frame(CNT_CHILDREN = 0,
                               AMT_INCOME_TOTAL = 157500,
                               AMT_CREDIT = 761067, 
                               AMT_ANNUITY = 33655.5, 
                               AMT_GOODS_PRICE = 657000, 
                               DAYS_EMPLOYED = -2124)
class_tr3 <- predict(class_tr, newdata = new_record_class3)
class_tr3
##           0         1
## 1 0.5289005 0.4710995
new_record_class4 <- data.frame(CNT_CHILDREN = 0,
                               AMT_INCOME_TOTAL = 90000,
                               AMT_CREDIT = 67500, 
                               AMT_ANNUITY = 7047, 
                               AMT_GOODS_PRICE = 67500, 
                               DAYS_EMPLOYED = 365243)
class_tr4 <- predict(class_tr, newdata = new_record_class4)
class_tr4
##          0        1
## 1 0.630137 0.369863
new_record_class5 <- data.frame(CNT_CHILDREN = 3,
                               AMT_INCOME_TOTAL = 135000,
                               AMT_CREDIT = 301464, 
                               AMT_ANNUITY = 20277, 
                               AMT_GOODS_PRICE = 238500, 
                               DAYS_EMPLOYED = -989)
class_tr5 <- predict(class_tr, newdata = new_record_class5)
class_tr5
##           0         1
## 1 0.4336438 0.5663562

Final Model

As you can see, we have 5 predictions, one for each of our new records. We think our model is solid and accurate with it’s predictions. It’s also very informative and allows us to make good recommendations to Stark Enterprises.