We are attempting to predict if a person is likely to have be a high risk when we lend them money.
We are analyzing a dataset with over 30,000 entries of previous customers and their risk level for Stark Enterprises. We are building a decision tree with that data and using it for our predictions.
This data comprises dozens of variables about previous customers who applied for credit. We narrowed the variables down to a small handful we thought would be useful. We chose CNT_CHILDREN, AMT_INCOME_TOTAL, AMT_CREDIT, AMT_ANNUITY, AMT_GOODS_PRICE, and DAYS_EMPLOYED as our variables.
library(rpart)
library(rpart.plot)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(tidyr)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(ROSE)
## Loaded ROSE 0.0-4
cred <- read.csv("credit_8.csv", header = TRUE)
names(cred)
## [1] "X" "SK_ID_CURR"
## [3] "TARGET" "NAME_CONTRACT_TYPE"
## [5] "CODE_GENDER" "FLAG_OWN_CAR"
## [7] "FLAG_OWN_REALTY" "CNT_CHILDREN"
## [9] "AMT_INCOME_TOTAL" "AMT_CREDIT"
## [11] "AMT_ANNUITY" "AMT_GOODS_PRICE"
## [13] "NAME_TYPE_SUITE" "NAME_INCOME_TYPE"
## [15] "NAME_EDUCATION_TYPE" "NAME_FAMILY_STATUS"
## [17] "NAME_HOUSING_TYPE" "DAYS_BIRTH"
## [19] "DAYS_EMPLOYED" "DAYS_REGISTRATION"
## [21] "DAYS_ID_PUBLISH" "OWN_CAR_AGE"
## [23] "FLAG_MOBIL" "FLAG_EMP_PHONE"
## [25] "FLAG_WORK_PHONE" "FLAG_CONT_MOBILE"
## [27] "FLAG_PHONE" "FLAG_EMAIL"
## [29] "OCCUPATION_TYPE" "CNT_FAM_MEMBERS"
## [31] "REGION_RATING_CLIENT" "REGION_RATING_CLIENT_W_CITY"
## [33] "WEEKDAY_APPR_PROCESS_START" "HOUR_APPR_PROCESS_START"
## [35] "REG_REGION_NOT_LIVE_REGION" "REG_REGION_NOT_WORK_REGION"
## [37] "LIVE_REGION_NOT_WORK_REGION" "REG_CITY_NOT_LIVE_CITY"
## [39] "REG_CITY_NOT_WORK_CITY" "LIVE_CITY_NOT_WORK_CITY"
## [41] "ORGANIZATION_TYPE" "DAYS_LAST_PHONE_CHANGE"
## [43] "FLAG_DOCUMENT_2" "FLAG_DOCUMENT_3"
## [45] "FLAG_DOCUMENT_4" "FLAG_DOCUMENT_5"
## [47] "FLAG_DOCUMENT_6" "FLAG_DOCUMENT_7"
## [49] "FLAG_DOCUMENT_8" "FLAG_DOCUMENT_9"
## [51] "FLAG_DOCUMENT_10" "FLAG_DOCUMENT_11"
## [53] "FLAG_DOCUMENT_12" "FLAG_DOCUMENT_13"
## [55] "FLAG_DOCUMENT_14" "FLAG_DOCUMENT_15"
## [57] "FLAG_DOCUMENT_16" "FLAG_DOCUMENT_17"
## [59] "FLAG_DOCUMENT_18" "FLAG_DOCUMENT_19"
## [61] "FLAG_DOCUMENT_20" "FLAG_DOCUMENT_21"
## [63] "AMT_REQ_CREDIT_BUREAU_HOUR" "AMT_REQ_CREDIT_BUREAU_DAY"
## [65] "AMT_REQ_CREDIT_BUREAU_WEEK" "AMT_REQ_CREDIT_BUREAU_MON"
## [67] "AMT_REQ_CREDIT_BUREAU_QRT" "AMT_REQ_CREDIT_BUREAU_YEAR"
cred <- drop_na(cred)
head(cred,10)
## X SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR
## 1 1 264634 0 Cash loans M Y
## 2 2 343697 0 Cash loans M Y
## 3 3 169422 0 Cash loans M Y
## 4 4 302321 1 Cash loans F Y
## 5 8 126681 0 Revolving loans M Y
## 6 9 177072 0 Revolving loans M Y
## 7 16 397136 0 Cash loans M Y
## 8 25 425686 0 Revolving loans M Y
## 9 30 392005 0 Revolving loans F Y
## 10 31 180988 0 Cash loans F Y
## FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY
## 1 Y 0 225000 594121.5 32229.0
## 2 N 0 382500 595453.5 30532.5
## 3 N 0 157500 534204.0 31941.0
## 4 Y 0 135000 280170.0 29547.0
## 5 Y 0 94500 180000.0 9000.0
## 6 Y 0 180000 180000.0 9000.0
## 7 N 0 130500 687600.0 18135.0
## 8 Y 1 112500 270000.0 13500.0
## 9 Y 0 45000 135000.0 6750.0
## 10 Y 0 135000 1339884.0 39307.5
## AMT_GOODS_PRICE NAME_TYPE_SUITE NAME_INCOME_TYPE
## 1 472500 Unaccompanied Working
## 2 445500 Unaccompanied Commercial associate
## 3 495000 Unaccompanied Working
## 4 247500 Unaccompanied State servant
## 5 180000 Unaccompanied Working
## 6 180000 Unaccompanied Commercial associate
## 7 450000 Family Working
## 8 270000 Unaccompanied Working
## 9 135000 Unaccompanied Pensioner
## 10 1170000 Family Commercial associate
## NAME_EDUCATION_TYPE NAME_FAMILY_STATUS NAME_HOUSING_TYPE
## 1 Higher education Married Rented apartment
## 2 Higher education Civil marriage Municipal apartment
## 3 Secondary / secondary special Separated Municipal apartment
## 4 Higher education Civil marriage House / apartment
## 5 Secondary / secondary special Married House / apartment
## 6 Incomplete higher Civil marriage With parents
## 7 Secondary / secondary special Married House / apartment
## 8 Secondary / secondary special Married House / apartment
## 9 Secondary / secondary special Civil marriage House / apartment
## 10 Incomplete higher Married House / apartment
## DAYS_BIRTH DAYS_EMPLOYED DAYS_REGISTRATION DAYS_ID_PUBLISH OWN_CAR_AGE
## 1 -15317 -1656 -2851 -2883 13
## 2 -10754 -694 -2809 -2893 9
## 3 -15166 -223 -1891 -4783 22
## 4 -8872 -1350 -8851 -595 7
## 5 -9396 -1224 -1110 -1172 13
## 6 -8006 -478 -3120 -622 7
## 7 -18287 -824 -9736 -1840 6
## 8 -15852 -8079 -4170 -4356 0
## 9 -20908 365243 -13011 -4239 14
## 10 -8696 -1239 -1403 -1085 4
## FLAG_MOBIL FLAG_EMP_PHONE FLAG_WORK_PHONE FLAG_CONT_MOBILE FLAG_PHONE
## 1 1 1 0 1 0
## 2 1 1 0 1 1
## 3 1 1 0 1 0
## 4 1 1 0 1 0
## 5 1 1 0 1 1
## 6 1 1 0 1 0
## 7 1 1 1 1 1
## 8 1 1 0 1 1
## 9 1 0 0 1 1
## 10 1 1 1 1 1
## FLAG_EMAIL OCCUPATION_TYPE CNT_FAM_MEMBERS REGION_RATING_CLIENT
## 1 0 High skill tech staff 2 2
## 2 1 Managers 2 1
## 3 0 Security staff 1 2
## 4 0 Medicine staff 2 2
## 5 0 Low-skill Laborers 2 3
## 6 0 Laborers 2 2
## 7 0 Laborers 2 2
## 8 0 Laborers 3 2
## 9 0 2 2
## 10 0 Laborers 2 2
## REGION_RATING_CLIENT_W_CITY WEEKDAY_APPR_PROCESS_START
## 1 2 SUNDAY
## 2 1 SUNDAY
## 3 2 SATURDAY
## 4 2 THURSDAY
## 5 3 SUNDAY
## 6 2 WEDNESDAY
## 7 2 TUESDAY
## 8 2 WEDNESDAY
## 9 2 MONDAY
## 10 2 FRIDAY
## HOUR_APPR_PROCESS_START REG_REGION_NOT_LIVE_REGION
## 1 11 0
## 2 11 0
## 3 3 0
## 4 16 0
## 5 11 0
## 6 10 1
## 7 10 0
## 8 17 0
## 9 11 0
## 10 12 0
## REG_REGION_NOT_WORK_REGION LIVE_REGION_NOT_WORK_REGION
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 1 0
## 7 0 0
## 8 0 0
## 9 0 0
## 10 0 0
## REG_CITY_NOT_LIVE_CITY REG_CITY_NOT_WORK_CITY LIVE_CITY_NOT_WORK_CITY
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 1 1 0
## 5 0 0 0
## 6 1 1 0
## 7 0 0 0
## 8 0 0 0
## 9 0 0 0
## 10 0 0 0
## ORGANIZATION_TYPE DAYS_LAST_PHONE_CHANGE FLAG_DOCUMENT_2
## 1 Business Entity Type 3 -439 0
## 2 Business Entity Type 3 -2207 0
## 3 Security -291 0
## 4 Medicine -419 0
## 5 Self-employed -1100 0
## 6 Business Entity Type 3 -128 0
## 7 Business Entity Type 3 -3003 0
## 8 Transport: type 2 -1143 0
## 9 XNA -1047 0
## 10 Self-employed -918 0
## FLAG_DOCUMENT_3 FLAG_DOCUMENT_4 FLAG_DOCUMENT_5 FLAG_DOCUMENT_6
## 1 0 0 0 0
## 2 1 0 0 0
## 3 1 0 0 0
## 4 1 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## 7 1 0 0 0
## 8 0 0 1 0
## 9 0 0 0 0
## 10 1 0 0 0
## FLAG_DOCUMENT_7 FLAG_DOCUMENT_8 FLAG_DOCUMENT_9 FLAG_DOCUMENT_10
## 1 0 1 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## 7 0 0 0 0
## 8 0 0 0 0
## 9 0 0 0 0
## 10 0 0 0 0
## FLAG_DOCUMENT_11 FLAG_DOCUMENT_12 FLAG_DOCUMENT_13 FLAG_DOCUMENT_14
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## 7 0 0 0 0
## 8 0 0 0 0
## 9 0 0 0 0
## 10 0 0 0 0
## FLAG_DOCUMENT_15 FLAG_DOCUMENT_16 FLAG_DOCUMENT_17 FLAG_DOCUMENT_18
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## 7 0 0 0 0
## 8 0 0 0 0
## 9 0 0 0 0
## 10 0 0 0 0
## FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## 7 0 0 0
## 8 0 0 0
## 9 0 0 0
## 10 0 0 0
## AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## 7 0 0
## 8 0 0
## 9 0 0
## 10 0 0
## AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
## 7 0 1
## 8 0 4
## 9 1 0
## 10 0 0
## AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR
## 1 0 3
## 2 1 4
## 3 2 0
## 4 0 1
## 5 0 0
## 6 0 4
## 7 0 0
## 8 0 0
## 9 0 1
## 10 0 2
str(cred)
## 'data.frame': 9036 obs. of 68 variables:
## $ X : int 1 2 3 4 8 9 16 25 30 31 ...
## $ SK_ID_CURR : int 264634 343697 169422 302321 126681 177072 397136 425686 392005 180988 ...
## $ TARGET : int 0 0 0 1 0 0 0 0 0 0 ...
## $ NAME_CONTRACT_TYPE : chr "Cash loans" "Cash loans" "Cash loans" "Cash loans" ...
## $ CODE_GENDER : chr "M" "M" "M" "F" ...
## $ FLAG_OWN_CAR : chr "Y" "Y" "Y" "Y" ...
## $ FLAG_OWN_REALTY : chr "Y" "N" "N" "Y" ...
## $ CNT_CHILDREN : int 0 0 0 0 0 0 0 1 0 0 ...
## $ AMT_INCOME_TOTAL : num 225000 382500 157500 135000 94500 ...
## $ AMT_CREDIT : num 594122 595454 534204 280170 180000 ...
## $ AMT_ANNUITY : num 32229 30532 31941 29547 9000 ...
## $ AMT_GOODS_PRICE : num 472500 445500 495000 247500 180000 ...
## $ NAME_TYPE_SUITE : chr "Unaccompanied" "Unaccompanied" "Unaccompanied" "Unaccompanied" ...
## $ NAME_INCOME_TYPE : chr "Working" "Commercial associate" "Working" "State servant" ...
## $ NAME_EDUCATION_TYPE : chr "Higher education" "Higher education" "Secondary / secondary special" "Higher education" ...
## $ NAME_FAMILY_STATUS : chr "Married" "Civil marriage" "Separated" "Civil marriage" ...
## $ NAME_HOUSING_TYPE : chr "Rented apartment" "Municipal apartment" "Municipal apartment" "House / apartment" ...
## $ DAYS_BIRTH : int -15317 -10754 -15166 -8872 -9396 -8006 -18287 -15852 -20908 -8696 ...
## $ DAYS_EMPLOYED : int -1656 -694 -223 -1350 -1224 -478 -824 -8079 365243 -1239 ...
## $ DAYS_REGISTRATION : int -2851 -2809 -1891 -8851 -1110 -3120 -9736 -4170 -13011 -1403 ...
## $ DAYS_ID_PUBLISH : int -2883 -2893 -4783 -595 -1172 -622 -1840 -4356 -4239 -1085 ...
## $ OWN_CAR_AGE : int 13 9 22 7 13 7 6 0 14 4 ...
## $ FLAG_MOBIL : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FLAG_EMP_PHONE : int 1 1 1 1 1 1 1 1 0 1 ...
## $ FLAG_WORK_PHONE : int 0 0 0 0 0 0 1 0 0 1 ...
## $ FLAG_CONT_MOBILE : int 1 1 1 1 1 1 1 1 1 1 ...
## $ FLAG_PHONE : int 0 1 0 0 1 0 1 1 1 1 ...
## $ FLAG_EMAIL : int 0 1 0 0 0 0 0 0 0 0 ...
## $ OCCUPATION_TYPE : chr "High skill tech staff" "Managers" "Security staff" "Medicine staff" ...
## $ CNT_FAM_MEMBERS : int 2 2 1 2 2 2 2 3 2 2 ...
## $ REGION_RATING_CLIENT : int 2 1 2 2 3 2 2 2 2 2 ...
## $ REGION_RATING_CLIENT_W_CITY: int 2 1 2 2 3 2 2 2 2 2 ...
## $ WEEKDAY_APPR_PROCESS_START : chr "SUNDAY" "SUNDAY" "SATURDAY" "THURSDAY" ...
## $ HOUR_APPR_PROCESS_START : int 11 11 3 16 11 10 10 17 11 12 ...
## $ REG_REGION_NOT_LIVE_REGION : int 0 0 0 0 0 1 0 0 0 0 ...
## $ REG_REGION_NOT_WORK_REGION : int 0 0 0 0 0 1 0 0 0 0 ...
## $ LIVE_REGION_NOT_WORK_REGION: int 0 0 0 0 0 0 0 0 0 0 ...
## $ REG_CITY_NOT_LIVE_CITY : int 0 0 0 1 0 1 0 0 0 0 ...
## $ REG_CITY_NOT_WORK_CITY : int 0 0 0 1 0 1 0 0 0 0 ...
## $ LIVE_CITY_NOT_WORK_CITY : int 0 0 0 0 0 0 0 0 0 0 ...
## $ ORGANIZATION_TYPE : chr "Business Entity Type 3" "Business Entity Type 3" "Security" "Medicine" ...
## $ DAYS_LAST_PHONE_CHANGE : int -439 -2207 -291 -419 -1100 -128 -3003 -1143 -1047 -918 ...
## $ FLAG_DOCUMENT_2 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_3 : int 0 1 1 1 0 0 1 0 0 1 ...
## $ FLAG_DOCUMENT_4 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_5 : int 0 0 0 0 0 0 0 1 0 0 ...
## $ FLAG_DOCUMENT_6 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_7 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_8 : int 1 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_9 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_10 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_11 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_12 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_13 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_14 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_15 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_16 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_17 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_18 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_19 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_20 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG_DOCUMENT_21 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AMT_REQ_CREDIT_BUREAU_HOUR : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AMT_REQ_CREDIT_BUREAU_DAY : int 0 0 0 0 0 0 0 0 0 0 ...
## $ AMT_REQ_CREDIT_BUREAU_WEEK : int 0 0 0 0 0 0 0 0 1 0 ...
## $ AMT_REQ_CREDIT_BUREAU_MON : int 0 0 0 0 0 0 1 4 0 0 ...
## $ AMT_REQ_CREDIT_BUREAU_QRT : int 0 1 2 0 0 0 0 0 0 0 ...
## $ AMT_REQ_CREDIT_BUREAU_YEAR : int 3 4 0 1 0 4 0 0 1 2 ...
# Choosing to only keep columns that seem like they're good predictors
cred <- cred[ , c(3, 8:12, 19)]
names(cred)
## [1] "TARGET" "CNT_CHILDREN" "AMT_INCOME_TOTAL" "AMT_CREDIT"
## [5] "AMT_ANNUITY" "AMT_GOODS_PRICE" "DAYS_EMPLOYED"
cred$cat_TARGET <- ifelse(cred$TARGET <= mean(cred$TARGET, na.rm = TRUE), 0, 1)
set.seed(1331)
train_index <-sample(1:nrow(cred), 0.6*nrow(cred))
valid_index <-setdiff(1:nrow(cred), train_index)
train_df <- cred[train_index, ]
valid_df <- cred[valid_index, ]
nrow(train_df)
## [1] 5421
nrow(valid_df)
## [1] 3615
#Balancing data
train_df$TARGET <- as.factor(train_df$TARGET)
train_df_balanced <- ROSE(TARGET ~ CNT_CHILDREN
+ AMT_INCOME_TOTAL + AMT_CREDIT
+ AMT_ANNUITY
+ AMT_GOODS_PRICE
+ DAYS_EMPLOYED,
data = train_df, seed = 1331)$data
table(train_df_balanced$TARGET)
##
## 0 1
## 2687 2734
#Classification tree
class_tr <- rpart(TARGET ~ CNT_CHILDREN
+ AMT_INCOME_TOTAL + AMT_CREDIT
+ AMT_ANNUITY
+ AMT_GOODS_PRICE
+ DAYS_EMPLOYED,
data = train_df_balanced, method = "class", maxdepth = 20)
prp(class_tr)
rpart.plot(class_tr, type = 5)
rpart.rules(class_tr, extra = 4)
## TARGET 0 1
## 0 [.70 .30] when DAYS_EMPLOYED >= -80743 & AMT_GOODS_PRICE >= 1420500
## 0 [.63 .37] when DAYS_EMPLOYED >= -80743 & AMT_GOODS_PRICE < 1420500 & CNT_CHILDREN >= -0.45 & AMT_ANNUITY < 11086
## 0 [.60 .40] when DAYS_EMPLOYED >= -80743 & AMT_GOODS_PRICE < 1420500 & CNT_CHILDREN >= 0.43 & AMT_ANNUITY >= 11086 & AMT_INCOME_TOTAL >= 313571
## 0 [.53 .47] when DAYS_EMPLOYED >= -80743 & AMT_GOODS_PRICE < 1420500 & CNT_CHILDREN is -0.45 to 0.43 & AMT_ANNUITY >= 11086
## 1 [.43 .57] when DAYS_EMPLOYED >= -80743 & AMT_GOODS_PRICE < 1420500 & CNT_CHILDREN >= 0.43 & AMT_ANNUITY >= 11086 & AMT_INCOME_TOTAL < 313571
## 1 [.35 .65] when DAYS_EMPLOYED >= -80743 & AMT_GOODS_PRICE < 1420500 & CNT_CHILDREN < -0.45
## 1 [.30 .70] when DAYS_EMPLOYED < -80743
#Confusion matricies
class_tr_train_predict <- predict(class_tr, train_df_balanced,
type = "class")
confusionMatrix(class_tr_train_predict, train_df_balanced$TARGET,
positive = "1")
## Confusion Matrix and Statistics
##
## Reference
## Prediction 0 1
## 0 1760 1389
## 1 927 1345
##
## Accuracy : 0.5728
## 95% CI : (0.5595, 0.586)
## No Information Rate : 0.5043
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.1467
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.4920
## Specificity : 0.6550
## Pos Pred Value : 0.5920
## Neg Pred Value : 0.5589
## Prevalence : 0.5043
## Detection Rate : 0.2481
## Detection Prevalence : 0.4191
## Balanced Accuracy : 0.5735
##
## 'Positive' Class : 1
##
#class_tr_valid_predict <- predict(class_tr, valid_df,
# type = "class")
#confusionMatrix(class_tr_valid_predict, valid_df$TARGET,
# positive = "1")
#Probabilities
class_tr_valid_predict_prob <- predict(class_tr, valid_df,
type = "prob")
head(class_tr_valid_predict_prob)
## 0 1
## 1 0.5289005 0.4710995
## 4 0.5289005 0.4710995
## 6 0.6301370 0.3698630
## 10 0.5289005 0.4710995
## 11 0.6952381 0.3047619
## 12 0.6050000 0.3950000
# Implementing new records
new_record_class <- data.frame(CNT_CHILDREN = 0,
AMT_INCOME_TOTAL = 180000,
AMT_CREDIT = 383760,
AMT_ANNUITY = 40428,
AMT_GOODS_PRICE = 360000,
DAYS_EMPLOYED = -1304)
class_tr1 <- predict(class_tr, newdata = new_record_class)
class_tr1
## 0 1
## 1 0.5289005 0.4710995
new_record_class2 <- data.frame(CNT_CHILDREN = 0,
AMT_INCOME_TOTAL = 292500,
AMT_CREDIT = 675000,
AMT_ANNUITY = 24376.5,
AMT_GOODS_PRICE = 675000,
DAYS_EMPLOYED = -1548)
class_tr2 <- predict(class_tr, newdata = new_record_class2)
class_tr2
## 0 1
## 1 0.5289005 0.4710995
new_record_class3 <- data.frame(CNT_CHILDREN = 0,
AMT_INCOME_TOTAL = 157500,
AMT_CREDIT = 761067,
AMT_ANNUITY = 33655.5,
AMT_GOODS_PRICE = 657000,
DAYS_EMPLOYED = -2124)
class_tr3 <- predict(class_tr, newdata = new_record_class3)
class_tr3
## 0 1
## 1 0.5289005 0.4710995
new_record_class4 <- data.frame(CNT_CHILDREN = 0,
AMT_INCOME_TOTAL = 90000,
AMT_CREDIT = 67500,
AMT_ANNUITY = 7047,
AMT_GOODS_PRICE = 67500,
DAYS_EMPLOYED = 365243)
class_tr4 <- predict(class_tr, newdata = new_record_class4)
class_tr4
## 0 1
## 1 0.630137 0.369863
new_record_class5 <- data.frame(CNT_CHILDREN = 3,
AMT_INCOME_TOTAL = 135000,
AMT_CREDIT = 301464,
AMT_ANNUITY = 20277,
AMT_GOODS_PRICE = 238500,
DAYS_EMPLOYED = -989)
class_tr5 <- predict(class_tr, newdata = new_record_class5)
class_tr5
## 0 1
## 1 0.4336438 0.5663562
As you can see, we have 5 predictions, one for each of our new records. We think our model is solid and accurate with it’s predictions. It’s also very informative and allows us to make good recommendations to Stark Enterprises.