# Analysis and Summary are included at the end of each step!
# Analysis and Summary are included at the end of each step!
# Analysis and Summary are included at the end of each step!
# Packages Preparation
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
library(gbm)
## Loaded gbm 2.1.9
## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
library(rpart)
library( rpart.plot )
library( ROCR )
library(MASS)
# Step 1 - Reading Data and Models from Week5
wk5 <- read.csv(file.choose())
str(wk5)
## 'data.frame': 5960 obs. of 29 variables:
## $ TARGET_BAD_FLAG : int 1 1 1 1 0 1 1 1 1 1 ...
## $ TARGET_LOSS_AMT : int 641 1109 767 1425 0 335 1841 373 1217 1523 ...
## $ LOAN : int 1100 1300 1500 1500 1700 1700 1800 1800 2000 2000 ...
## $ IMP_MORTDUE : num 25860 70053 13500 65000 97800 ...
## $ M_MORTDUE : int 0 0 0 1 0 0 0 0 0 1 ...
## $ IMP_VALUE : num 39025 68400 16700 89000 112000 ...
## $ M_VALUE : int 0 0 0 1 0 0 0 0 0 0 ...
## $ IMP_YOJ : num 10.5 7 4 7 3 9 5 11 3 16 ...
## $ M_YOJ : int 0 0 0 1 0 0 0 0 0 0 ...
## $ IMP_DEROG : int 0 0 0 1 0 0 3 0 0 0 ...
## $ M_DEROG : int 0 0 0 1 0 0 0 0 0 0 ...
## $ IMP_DELINQ : int 0 2 0 1 0 0 2 0 2 0 ...
## $ M_DELINQ : int 0 0 0 1 0 0 0 0 0 0 ...
## $ IMP_CLAGE : num 94.4 121.8 149.5 174 93.3 ...
## $ M_CLAGE : int 0 0 0 1 0 0 0 0 0 0 ...
## $ IMP_NINQ : int 1 0 1 1 0 1 1 0 1 0 ...
## $ M_NINQ : int 0 0 0 1 0 0 0 0 0 0 ...
## $ IMP_CLNO : int 9 14 10 20 14 8 17 8 12 13 ...
## $ M_CLNO : int 0 0 0 1 0 0 0 0 0 0 ...
## $ IMP_DEBTINC : num 35 35 35 35 35 ...
## $ M_DEBTINC : int 1 1 1 1 1 0 1 0 1 1 ...
## $ FLAG.Job.Mgr : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG.Job.Office : int 0 0 0 0 1 0 0 0 0 0 ...
## $ FLAG.Job.Other : int 1 1 1 0 0 1 1 1 1 0 ...
## $ FLAG.Job.ProfExe : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG.Job.Sales : int 0 0 0 0 0 0 0 0 0 1 ...
## $ FLAG.Job.Self : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG.Reason.DebtCon: int 0 0 0 0 0 0 0 0 0 0 ...
## $ FLAG.Reason.HomeImp: int 1 1 1 0 1 1 1 1 1 1 ...
summary(wk5)
## TARGET_BAD_FLAG TARGET_LOSS_AMT LOAN IMP_MORTDUE
## Min. :0.0000 Min. : 0 Min. : 1100 Min. : 2063
## 1st Qu.:0.0000 1st Qu.: 0 1st Qu.:11100 1st Qu.: 48139
## Median :0.0000 Median : 0 Median :16300 Median : 65000
## Mean :0.1995 Mean : 2676 Mean :18608 Mean : 72999
## 3rd Qu.:0.0000 3rd Qu.: 0 3rd Qu.:23300 3rd Qu.: 88200
## Max. :1.0000 Max. :78987 Max. :89900 Max. :399550
## M_MORTDUE IMP_VALUE M_VALUE IMP_YOJ
## Min. :0.00000 Min. : 8000 Min. :0.00000 Min. : 0.000
## 1st Qu.:0.00000 1st Qu.: 66490 1st Qu.:0.00000 1st Qu.: 3.000
## Median :0.00000 Median : 89000 Median :0.00000 Median : 7.000
## Mean :0.08691 Mean :101536 Mean :0.01879 Mean : 8.756
## 3rd Qu.:0.00000 3rd Qu.:119005 3rd Qu.:0.00000 3rd Qu.:12.000
## Max. :1.00000 Max. :855909 Max. :1.00000 Max. :41.000
## M_YOJ IMP_DEROG M_DEROG IMP_DELINQ
## Min. :0.00000 Min. : 0.0000 Min. :0.0000 Min. : 0.000
## 1st Qu.:0.00000 1st Qu.: 0.0000 1st Qu.:0.0000 1st Qu.: 0.000
## Median :0.00000 Median : 0.0000 Median :0.0000 Median : 0.000
## Mean :0.08641 Mean : 0.3431 Mean :0.1188 Mean : 0.503
## 3rd Qu.:0.00000 3rd Qu.: 0.0000 3rd Qu.:0.0000 3rd Qu.: 1.000
## Max. :1.00000 Max. :10.0000 Max. :1.0000 Max. :15.000
## M_DELINQ IMP_CLAGE M_CLAGE IMP_NINQ
## Min. :0.00000 Min. : 0.0 Min. :0.00000 Min. : 0.00
## 1st Qu.:0.00000 1st Qu.: 117.4 1st Qu.:0.00000 1st Qu.: 0.00
## Median :0.00000 Median : 174.0 Median :0.00000 Median : 1.00
## Mean :0.09732 Mean : 179.5 Mean :0.05168 Mean : 1.17
## 3rd Qu.:0.00000 3rd Qu.: 227.1 3rd Qu.:0.00000 3rd Qu.: 2.00
## Max. :1.00000 Max. :1168.2 Max. :1.00000 Max. :17.00
## M_NINQ IMP_CLNO M_CLNO IMP_DEBTINC
## Min. :0.00000 Min. : 0.00 Min. :0.00000 Min. : 0.5245
## 1st Qu.:0.00000 1st Qu.:15.00 1st Qu.:0.00000 1st Qu.: 30.7632
## Median :0.00000 Median :20.00 Median :0.00000 Median : 35.0000
## Mean :0.08557 Mean :21.25 Mean :0.03725 Mean : 34.0393
## 3rd Qu.:0.00000 3rd Qu.:26.00 3rd Qu.:0.00000 3rd Qu.: 37.9499
## Max. :1.00000 Max. :71.00 Max. :1.00000 Max. :203.3122
## M_DEBTINC FLAG.Job.Mgr FLAG.Job.Office FLAG.Job.Other
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2126 Mean :0.1287 Mean :0.1591 Mean :0.4007
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## FLAG.Job.ProfExe FLAG.Job.Sales FLAG.Job.Self FLAG.Reason.DebtCon
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :1.0000
## Mean :0.2141 Mean :0.01829 Mean :0.03238 Mean :0.6591
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## FLAG.Reason.HomeImp
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2987
## 3rd Qu.:1.0000
## Max. :1.0000
head(wk5)
copy_wk5=wk5
# Set Tree Depth
tree_depth=rpart.control(maxdepth = 10)
set.seed(1)
# Create the data set as 70% of training and 30% test set
sample<- sample(c(TRUE,FALSE),nrow(copy_wk5),replace = TRUE,prob = c(0.7,0.3))
train<- copy_wk5[sample,]
test<- copy_wk5[!sample,]
# Step 2 - Logistic Regression
# Models from Week5 -
# Tree
tr_model=rpart(data=train,TARGET_BAD_FLAG~.-TARGET_LOSS_AMT,control=tree_depth,method="class",parms=list(split='information'))
rpart.plot(tr_model)

tr_model$variable.importance
## M_DEBTINC IMP_DEBTINC IMP_DELINQ IMP_CLAGE LOAN M_VALUE
## 533.397481 134.588883 46.494397 30.749923 24.521888 22.199895
## IMP_VALUE IMP_MORTDUE IMP_CLNO IMP_YOJ
## 7.967967 5.783975 2.459994 2.090995
pt=predict(tr_model,test,type = "prob")
pt2=prediction(pt[,2],test$TARGET_BAD_FLAG)
pt3= performance(pt2,"tpr","fpr")
# Random Tree
rf_model=randomForest(data=train,TARGET_BAD_FLAG~.-TARGET_LOSS_AMT,ntree=500,importance=TRUE)
## Warning in randomForest.default(m, y, ...): The response has five or fewer
## unique values. Are you sure you want to do regression?
importance(rf_model)
## %IncMSE IncNodePurity
## LOAN 44.967305 38.993106
## IMP_MORTDUE 41.789344 35.266432
## M_MORTDUE 13.172790 2.349943
## IMP_VALUE 42.571780 43.056864
## M_VALUE 59.462869 18.739138
## IMP_YOJ 40.432069 31.086101
## M_YOJ 14.199750 2.944031
## IMP_DEROG 45.717324 21.754538
## M_DEROG 18.473874 4.951353
## IMP_DELINQ 81.884902 48.238429
## M_DELINQ 8.547209 2.107158
## IMP_CLAGE 62.739262 55.559885
## M_CLAGE 11.726470 1.840701
## IMP_NINQ 34.906103 17.852157
## M_NINQ 12.162285 2.082765
## IMP_CLNO 53.442201 37.694496
## M_CLNO 11.739013 1.103966
## IMP_DEBTINC 35.982961 104.953372
## M_DEBTINC 38.536277 130.591066
## FLAG.Job.Mgr 17.040006 3.765830
## FLAG.Job.Office 17.467948 5.148490
## FLAG.Job.Other 18.895132 5.102794
## FLAG.Job.ProfExe 15.455256 3.935220
## FLAG.Job.Sales 23.675630 3.364372
## FLAG.Job.Self 12.098948 1.883075
## FLAG.Reason.DebtCon 15.811019 3.814255
## FLAG.Reason.HomeImp 14.617020 3.908644
varImpPlot( rf_model )

pr = predict( rf_model,test )
head( pr )
## 4 6 7 15 17 18
## 0.7966524 0.8313333 0.9241667 0.7746333 0.9212667 0.3462000
pr2 = prediction( pr, test$TARGET_BAD_FLAG)
pr3 = performance( pr2, "tpr", "fpr" )
# Gradient Boosting
gb_model = gbm( data=train, TARGET_BAD_FLAG~.-TARGET_LOSS_AMT, n.trees=500, distribution="bernoulli" )
summary.gbm(gb_model,cBars = 10)
pg = predict( gb_model, test, type="response" )
## Using 500 trees...
head( pg )
## [1] 0.9666599 0.4323602 0.9940194 0.8066121 0.9980174 0.9721196
pg2 = prediction( pg, test$TARGET_BAD_FLAG )
pg3 = performance( pg2, "tpr", "fpr" )
# LOGISTIC REGRESSION Model
theUpper_LR = glm( TARGET_BAD_FLAG~.-TARGET_LOSS_AMT, family = "binomial", data=train )
theLower_LR = glm( TARGET_BAD_FLAG ~ 1, family = "binomial", data=train )
summary( theUpper_LR )
##
## Call:
## glm(formula = TARGET_BAD_FLAG ~ . - TARGET_LOSS_AMT, family = "binomial",
## data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.817e+00 6.558e-01 -10.395 < 2e-16 ***
## LOAN -8.996e-06 5.694e-06 -1.580 0.114122
## IMP_MORTDUE -3.482e-06 2.092e-06 -1.664 0.096053 .
## M_MORTDUE 3.647e-01 2.491e-01 1.464 0.143231
## IMP_VALUE 4.356e-06 1.495e-06 2.914 0.003572 **
## M_VALUE 4.945e+00 6.167e-01 8.017 1.08e-15 ***
## IMP_YOJ -2.010e-02 8.271e-03 -2.430 0.015103 *
## M_YOJ -7.720e-01 2.427e-01 -3.181 0.001466 **
## IMP_DEROG 5.869e-01 7.617e-02 7.706 1.30e-14 ***
## M_DEROG -2.655e+00 3.603e-01 -7.367 1.74e-13 ***
## IMP_DELINQ 8.182e-01 6.264e-02 13.062 < 2e-16 ***
## M_DELINQ -1.101e+00 4.859e-01 -2.266 0.023465 *
## IMP_CLAGE -5.466e-03 8.103e-04 -6.746 1.52e-11 ***
## M_CLAGE 9.834e-01 4.403e-01 2.233 0.025529 *
## IMP_NINQ 1.528e-01 3.175e-02 4.813 1.48e-06 ***
## M_NINQ -2.494e-01 4.538e-01 -0.550 0.582542
## IMP_CLNO -1.600e-02 6.331e-03 -2.527 0.011513 *
## M_CLNO 2.693e+00 7.552e-01 3.565 0.000363 ***
## IMP_DEBTINC 9.231e-02 1.017e-02 9.075 < 2e-16 ***
## M_DEBTINC 2.679e+00 1.144e-01 23.425 < 2e-16 ***
## FLAG.Job.Mgr 2.091e+00 4.989e-01 4.191 2.77e-05 ***
## FLAG.Job.Office 1.414e+00 5.005e-01 2.824 0.004736 **
## FLAG.Job.Other 2.081e+00 4.825e-01 4.314 1.60e-05 ***
## FLAG.Job.ProfExe 1.972e+00 4.965e-01 3.971 7.16e-05 ***
## FLAG.Job.Sales 3.240e+00 5.794e-01 5.592 2.24e-08 ***
## FLAG.Job.Self 2.518e+00 5.650e-01 4.456 8.34e-06 ***
## FLAG.Reason.DebtCon -1.318e-01 3.661e-01 -0.360 0.718960
## FLAG.Reason.HomeImp -5.659e-02 3.728e-01 -0.152 0.879329
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4155.3 on 4141 degrees of freedom
## Residual deviance: 2248.2 on 4114 degrees of freedom
## AIC: 2304.2
##
## Number of Fisher Scoring iterations: 6
summary( theLower_LR )
##
## Call:
## glm(formula = TARGET_BAD_FLAG ~ 1, family = "binomial", data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.38087 0.03878 -35.61 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4155.3 on 4141 degrees of freedom
## Residual deviance: 4155.3 on 4141 degrees of freedom
## AIC: 4157.3
##
## Number of Fisher Scoring iterations: 4
# BACKWARD VARIABLE SELECTION
lr_model = stepAIC(theUpper_LR, direction="backward", scope=list(lower=theLower_LR, upper=theUpper_LR))
## Start: AIC=2304.21
## TARGET_BAD_FLAG ~ (TARGET_LOSS_AMT + LOAN + IMP_MORTDUE + M_MORTDUE +
## IMP_VALUE + M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG +
## IMP_DELINQ + M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ +
## M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr +
## FLAG.Job.Office + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales +
## FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp) -
## TARGET_LOSS_AMT
##
## Df Deviance AIC
## - FLAG.Reason.HomeImp 1 2248.2 2302.2
## - FLAG.Reason.DebtCon 1 2248.3 2302.3
## - M_NINQ 1 2248.5 2302.5
## <none> 2248.2 2304.2
## - M_MORTDUE 1 2250.3 2304.3
## - LOAN 1 2250.8 2304.8
## - IMP_MORTDUE 1 2251.2 2305.2
## - M_CLAGE 1 2253.0 2307.0
## - M_DELINQ 1 2253.7 2307.7
## - IMP_YOJ 1 2254.2 2308.2
## - IMP_CLNO 1 2254.7 2308.7
## - FLAG.Job.Office 1 2257.4 2311.4
## - IMP_VALUE 1 2258.2 2312.2
## - M_YOJ 1 2259.3 2313.3
## - M_CLNO 1 2261.9 2315.9
## - FLAG.Job.ProfExe 1 2267.8 2321.8
## - FLAG.Job.Mgr 1 2270.2 2324.2
## - FLAG.Job.Self 1 2271.0 2325.0
## - IMP_NINQ 1 2271.1 2325.1
## - FLAG.Job.Other 1 2272.4 2326.4
## - FLAG.Job.Sales 1 2284.4 2338.4
## - IMP_CLAGE 1 2298.9 2352.9
## - IMP_DEROG 1 2317.7 2371.7
## - M_DEROG 1 2320.9 2374.9
## - IMP_DEBTINC 1 2356.6 2410.6
## - M_VALUE 1 2359.2 2413.2
## - IMP_DELINQ 1 2483.2 2537.2
## - M_DEBTINC 1 2866.2 2920.2
##
## Step: AIC=2302.24
## TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + M_NINQ + IMP_CLNO +
## M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office +
## FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self +
## FLAG.Reason.DebtCon
##
## Df Deviance AIC
## - M_NINQ 1 2248.5 2300.5
## - FLAG.Reason.DebtCon 1 2248.6 2300.6
## <none> 2248.2 2302.2
## - M_MORTDUE 1 2250.3 2302.3
## - LOAN 1 2250.8 2302.8
## - IMP_MORTDUE 1 2251.2 2303.2
## - M_CLAGE 1 2253.0 2305.0
## - M_DELINQ 1 2253.7 2305.7
## - IMP_YOJ 1 2254.3 2306.3
## - IMP_CLNO 1 2254.7 2306.7
## - FLAG.Job.Office 1 2257.8 2309.8
## - IMP_VALUE 1 2258.2 2310.2
## - M_YOJ 1 2259.3 2311.3
## - M_CLNO 1 2261.9 2313.9
## - FLAG.Job.ProfExe 1 2268.8 2320.8
## - FLAG.Job.Mgr 1 2271.1 2323.1
## - IMP_NINQ 1 2271.2 2323.2
## - FLAG.Job.Self 1 2271.8 2323.8
## - FLAG.Job.Other 1 2273.9 2325.9
## - FLAG.Job.Sales 1 2285.5 2337.5
## - IMP_CLAGE 1 2299.0 2351.0
## - IMP_DEROG 1 2317.7 2369.7
## - M_DEROG 1 2320.9 2372.9
## - IMP_DEBTINC 1 2356.6 2408.6
## - M_VALUE 1 2360.5 2412.5
## - IMP_DELINQ 1 2483.8 2535.8
## - M_DEBTINC 1 2866.3 2918.3
##
## Step: AIC=2300.54
## TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO +
## IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office +
## FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self +
## FLAG.Reason.DebtCon
##
## Df Deviance AIC
## - FLAG.Reason.DebtCon 1 2248.9 2298.9
## <none> 2248.5 2300.5
## - M_MORTDUE 1 2250.6 2300.6
## - LOAN 1 2251.0 2301.0
## - IMP_MORTDUE 1 2251.5 2301.5
## - M_CLAGE 1 2253.4 2303.4
## - IMP_YOJ 1 2254.6 2304.6
## - IMP_CLNO 1 2255.0 2305.0
## - FLAG.Job.Office 1 2258.1 2308.1
## - M_DELINQ 1 2258.1 2308.1
## - IMP_VALUE 1 2258.4 2308.4
## - M_YOJ 1 2260.5 2310.5
## - M_CLNO 1 2262.3 2312.3
## - FLAG.Job.ProfExe 1 2269.1 2319.1
## - FLAG.Job.Mgr 1 2271.4 2321.4
## - IMP_NINQ 1 2271.8 2321.8
## - FLAG.Job.Self 1 2271.8 2321.8
## - FLAG.Job.Other 1 2274.2 2324.2
## - FLAG.Job.Sales 1 2286.2 2336.2
## - IMP_CLAGE 1 2299.2 2349.2
## - IMP_DEROG 1 2317.7 2367.7
## - M_DEROG 1 2321.2 2371.2
## - IMP_DEBTINC 1 2359.1 2409.1
## - M_VALUE 1 2362.1 2412.1
## - IMP_DELINQ 1 2483.8 2533.8
## - M_DEBTINC 1 2868.3 2918.3
##
## Step: AIC=2298.92
## TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO +
## IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office +
## FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self
##
## Df Deviance AIC
## <none> 2248.9 2298.9
## - M_MORTDUE 1 2251.4 2299.4
## - LOAN 1 2252.0 2300.0
## - IMP_MORTDUE 1 2252.0 2300.0
## - M_CLAGE 1 2253.6 2301.6
## - IMP_YOJ 1 2254.9 2302.9
## - IMP_CLNO 1 2255.9 2303.9
## - FLAG.Job.Office 1 2258.4 2306.4
## - M_DELINQ 1 2258.4 2306.4
## - IMP_VALUE 1 2259.2 2307.2
## - M_YOJ 1 2261.2 2309.2
## - M_CLNO 1 2263.1 2311.1
## - FLAG.Job.ProfExe 1 2269.5 2317.5
## - FLAG.Job.Mgr 1 2271.6 2319.6
## - IMP_NINQ 1 2271.8 2319.8
## - FLAG.Job.Self 1 2272.6 2320.6
## - FLAG.Job.Other 1 2274.5 2322.5
## - FLAG.Job.Sales 1 2286.2 2334.2
## - IMP_CLAGE 1 2299.3 2347.3
## - IMP_DEROG 1 2318.3 2366.3
## - M_DEROG 1 2322.1 2370.1
## - IMP_DEBTINC 1 2360.8 2408.8
## - M_VALUE 1 2363.3 2411.3
## - IMP_DELINQ 1 2484.9 2532.9
## - M_DEBTINC 1 2869.4 2917.4
summary( lr_model )
##
## Call:
## glm(formula = TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE +
## IMP_VALUE + M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG +
## IMP_DELINQ + M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ +
## IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr +
## FLAG.Job.Office + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales +
## FLAG.Job.Self, family = "binomial", data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -6.908e+00 6.124e-01 -11.280 < 2e-16 ***
## LOAN -9.617e-06 5.566e-06 -1.728 0.084025 .
## IMP_MORTDUE -3.545e-06 2.090e-06 -1.697 0.089768 .
## M_MORTDUE 3.857e-01 2.451e-01 1.574 0.115585
## IMP_VALUE 4.405e-06 1.486e-06 2.964 0.003041 **
## M_VALUE 4.999e+00 6.115e-01 8.176 2.93e-16 ***
## IMP_YOJ -2.005e-02 8.265e-03 -2.426 0.015280 *
## M_YOJ -7.974e-01 2.385e-01 -3.343 0.000829 ***
## IMP_DEROG 5.815e-01 7.534e-02 7.718 1.19e-14 ***
## M_DEROG -2.654e+00 3.589e-01 -7.397 1.40e-13 ***
## IMP_DELINQ 8.185e-01 6.249e-02 13.098 < 2e-16 ***
## M_DELINQ -1.227e+00 4.247e-01 -2.889 0.003870 **
## IMP_CLAGE -5.421e-03 8.065e-04 -6.722 1.79e-11 ***
## M_CLAGE 9.770e-01 4.397e-01 2.222 0.026279 *
## IMP_NINQ 1.520e-01 3.160e-02 4.811 1.50e-06 ***
## IMP_CLNO -1.646e-02 6.273e-03 -2.624 0.008697 **
## M_CLNO 2.599e+00 7.180e-01 3.620 0.000295 ***
## IMP_DEBTINC 9.295e-02 1.013e-02 9.179 < 2e-16 ***
## M_DEBTINC 2.682e+00 1.143e-01 23.456 < 2e-16 ***
## FLAG.Job.Mgr 2.061e+00 4.828e-01 4.269 1.97e-05 ***
## FLAG.Job.Office 1.387e+00 4.841e-01 2.864 0.004178 **
## FLAG.Job.Other 2.053e+00 4.642e-01 4.424 9.71e-06 ***
## FLAG.Job.ProfExe 1.947e+00 4.793e-01 4.061 4.89e-05 ***
## FLAG.Job.Sales 3.206e+00 5.627e-01 5.698 1.21e-08 ***
## FLAG.Job.Self 2.495e+00 5.469e-01 4.562 5.06e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4155.3 on 4141 degrees of freedom
## Residual deviance: 2248.9 on 4117 degrees of freedom
## AIC: 2298.9
##
## Number of Fisher Scoring iterations: 6
plr = predict( lr_model, test, type="response" )
plr2 = prediction( plr, test$TARGET_BAD_FLAG)
plr3 = performance( plr2, "tpr", "fpr" )
# LR STEP TREE MODEL
treeVars = tr_model$variable.importance
treeVars = names(treeVars)
treeVars
## [1] "M_DEBTINC" "IMP_DEBTINC" "IMP_DELINQ" "IMP_CLAGE" "LOAN"
## [6] "M_VALUE" "IMP_VALUE" "IMP_MORTDUE" "IMP_CLNO" "IMP_YOJ"
treeVarsPlus = paste( treeVars, collapse="+")
F = as.formula( paste( "TARGET_BAD_FLAG ~", treeVarsPlus ))
tree_LR = glm( F, family = "binomial", data=train )
theLower_LR = glm( TARGET_BAD_FLAG ~ 1, family = "binomial", data=train )
summary( tree_LR )
##
## Call:
## glm(formula = F, family = "binomial", data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.697e+00 3.910e-01 -12.014 < 2e-16 ***
## M_DEBTINC 2.782e+00 1.066e-01 26.103 < 2e-16 ***
## IMP_DEBTINC 8.956e-02 9.640e-03 9.290 < 2e-16 ***
## IMP_DELINQ 6.802e-01 5.169e-02 13.159 < 2e-16 ***
## IMP_CLAGE -6.253e-03 7.569e-04 -8.261 < 2e-16 ***
## LOAN -7.340e-06 5.046e-06 -1.455 0.14577
## M_VALUE 3.887e+00 4.956e-01 7.842 4.43e-15 ***
## IMP_VALUE 4.458e-06 1.378e-06 3.236 0.00121 **
## IMP_MORTDUE -3.540e-06 1.932e-06 -1.832 0.06692 .
## IMP_CLNO -7.701e-03 5.556e-03 -1.386 0.16572
## IMP_YOJ -2.077e-02 7.871e-03 -2.639 0.00830 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4155.3 on 4141 degrees of freedom
## Residual deviance: 2533.8 on 4131 degrees of freedom
## AIC: 2555.8
##
## Number of Fisher Scoring iterations: 6
summary( theLower_LR )
##
## Call:
## glm(formula = TARGET_BAD_FLAG ~ 1, family = "binomial", data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.38087 0.03878 -35.61 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4155.3 on 4141 degrees of freedom
## Residual deviance: 4155.3 on 4141 degrees of freedom
## AIC: 4157.3
##
## Number of Fisher Scoring iterations: 4
# TREE FORWARD STEPWISE SELECTION
lrt_model = stepAIC(theLower_LR, direction="forward", scope=list(lower=theLower_LR, upper=tree_LR))
## Start: AIC=4157.3
## TARGET_BAD_FLAG ~ 1
##
## Df Deviance AIC
## + M_DEBTINC 1 3088.5 3092.5
## + IMP_DELINQ 1 3776.4 3780.4
## + M_VALUE 1 3942.8 3946.8
## + IMP_CLAGE 1 4020.9 4024.9
## + IMP_DEBTINC 1 4051.1 4055.1
## + LOAN 1 4133.3 4137.3
## + IMP_YOJ 1 4144.0 4148.0
## + IMP_MORTDUE 1 4150.4 4154.4
## + IMP_VALUE 1 4153.1 4157.1
## <none> 4155.3 4157.3
## + IMP_CLNO 1 4155.3 4159.3
##
## Step: AIC=3092.5
## TARGET_BAD_FLAG ~ M_DEBTINC
##
## Df Deviance AIC
## + IMP_DELINQ 1 2861.2 2867.2
## + M_VALUE 1 2972.0 2978.0
## + IMP_DEBTINC 1 2977.4 2983.4
## + IMP_CLAGE 1 3006.3 3012.3
## + IMP_YOJ 1 3075.2 3081.2
## <none> 3088.5 3092.5
## + LOAN 1 3086.7 3092.7
## + IMP_CLNO 1 3088.2 3094.2
## + IMP_VALUE 1 3088.3 3094.3
## + IMP_MORTDUE 1 3088.5 3094.5
##
## Step: AIC=2867.17
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ
##
## Df Deviance AIC
## + IMP_CLAGE 1 2751.2 2759.2
## + IMP_DEBTINC 1 2753.8 2761.8
## + M_VALUE 1 2778.9 2786.9
## + IMP_YOJ 1 2836.1 2844.1
## + IMP_CLNO 1 2850.8 2858.8
## <none> 2861.2 2867.2
## + LOAN 1 2859.8 2867.8
## + IMP_VALUE 1 2860.8 2868.8
## + IMP_MORTDUE 1 2861.1 2869.1
##
## Step: AIC=2759.17
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE
##
## Df Deviance AIC
## + IMP_DEBTINC 1 2650.4 2660.4
## + M_VALUE 1 2672.4 2682.4
## + IMP_YOJ 1 2741.4 2751.4
## + IMP_VALUE 1 2743.9 2753.9
## <none> 2751.2 2759.2
## + IMP_MORTDUE 1 2749.2 2759.2
## + IMP_CLNO 1 2751.2 2761.2
## + LOAN 1 2751.2 2761.2
##
## Step: AIC=2660.38
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE + IMP_DEBTINC
##
## Df Deviance AIC
## + M_VALUE 1 2554.2 2566.2
## + IMP_YOJ 1 2644.2 2656.2
## + IMP_VALUE 1 2646.5 2658.5
## + IMP_CLNO 1 2648.2 2660.2
## <none> 2650.4 2660.4
## + LOAN 1 2650.0 2662.0
## + IMP_MORTDUE 1 2650.1 2662.1
##
## Step: AIC=2566.19
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE + IMP_DEBTINC +
## M_VALUE
##
## Df Deviance AIC
## + IMP_YOJ 1 2547.7 2561.7
## + IMP_VALUE 1 2549.6 2563.6
## <none> 2554.2 2566.2
## + IMP_CLNO 1 2552.5 2566.5
## + LOAN 1 2553.3 2567.3
## + IMP_MORTDUE 1 2554.2 2568.2
##
## Step: AIC=2561.67
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE + IMP_DEBTINC +
## M_VALUE + IMP_YOJ
##
## Df Deviance AIC
## + IMP_VALUE 1 2542.8 2558.8
## <none> 2547.7 2561.7
## + IMP_CLNO 1 2545.9 2561.9
## + LOAN 1 2547.2 2563.2
## + IMP_MORTDUE 1 2547.7 2563.7
##
## Step: AIC=2558.79
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE + IMP_DEBTINC +
## M_VALUE + IMP_YOJ + IMP_VALUE
##
## Df Deviance AIC
## + IMP_MORTDUE 1 2538.0 2556.0
## + IMP_CLNO 1 2539.4 2557.4
## + LOAN 1 2540.5 2558.5
## <none> 2542.8 2558.8
##
## Step: AIC=2556.04
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE + IMP_DEBTINC +
## M_VALUE + IMP_YOJ + IMP_VALUE + IMP_MORTDUE
##
## Df Deviance AIC
## + LOAN 1 2535.7 2555.7
## + IMP_CLNO 1 2536.0 2556.0
## <none> 2538.0 2556.0
##
## Step: AIC=2555.73
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE + IMP_DEBTINC +
## M_VALUE + IMP_YOJ + IMP_VALUE + IMP_MORTDUE + LOAN
##
## Df Deviance AIC
## <none> 2535.7 2555.7
## + IMP_CLNO 1 2533.8 2555.8
summary( lrt_model )
##
## Call:
## glm(formula = TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE +
## IMP_DEBTINC + M_VALUE + IMP_YOJ + IMP_VALUE + IMP_MORTDUE +
## LOAN, family = "binomial", data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.723e+00 3.904e-01 -12.097 < 2e-16 ***
## M_DEBTINC 2.777e+00 1.064e-01 26.096 < 2e-16 ***
## IMP_DELINQ 6.697e-01 5.095e-02 13.145 < 2e-16 ***
## IMP_CLAGE -6.506e-03 7.373e-04 -8.823 < 2e-16 ***
## IMP_DEBTINC 8.820e-02 9.577e-03 9.210 < 2e-16 ***
## M_VALUE 3.897e+00 4.959e-01 7.858 3.91e-15 ***
## IMP_YOJ -2.093e-02 7.882e-03 -2.655 0.00793 **
## IMP_VALUE 4.432e-06 1.370e-06 3.236 0.00121 **
## IMP_MORTDUE -4.005e-06 1.890e-06 -2.119 0.03408 *
## LOAN -7.589e-06 5.054e-06 -1.502 0.13323
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4155.3 on 4141 degrees of freedom
## Residual deviance: 2535.7 on 4132 degrees of freedom
## AIC: 2555.7
##
## Number of Fisher Scoring iterations: 6
plrt = predict( lrt_model, test, type="response" )
plrt2 = prediction( plrt, test$TARGET_BAD_FLAG )
plrt3 = performance( plrt2, "tpr", "fpr" )
# ROC
plot( pt3, col="green" )
plot( pr3, col="red", add=TRUE )
plot( pg3, col="blue", add=TRUE )
plot( plr3, col="gold", add=TRUE )
plot( plrt3, col="gray", add=TRUE )
abline(0,1,lty=2)
legend("bottomright",c("TREE","RANDOM FOREST", "GRADIENT BOOSTING", "LOGIT REG BWD", "LOGIT REG TREE"),col=c("green","red","blue","gold","gray"), bty="y", lty=1 )

aucT = performance( pt2, "auc" )@y.values
aucR = performance( pr2, "auc" )@y.values
aucG = performance( pg2, "auc" )@y.values
aucLR = performance( plr2, "auc")@y.values
aucLRT = performance( plrt2, "auc")@y.values
print( paste("TREE AUC=", aucT) )
## [1] "TREE AUC= 0.826618121581281"
print( paste("RF AUC=", aucR) )
## [1] "RF AUC= 0.953436405362943"
print( paste("GB AUC=", aucG) )
## [1] "GB AUC= 0.920521802150007"
print( paste("LR AUC=", aucLR) )
## [1] "LR AUC= 0.910356476608441"
print( paste("LRT AUC=", aucLRT) )
## [1] "LRT AUC= 0.879705201724766"
# !!!SUMMARY & ANALYSIS!!!
# The Random Forest Performed the best among all the models,
# as it has the largest AUC, providing the best accuracy.
# I would recommend the Random Forest method,
# because it provides better accuracy, the model is easy to build
# without a significant long running time.
# Step 3 - Linear Regression
# Create the data set as 70% of training and 30% test set
sample<- sample(c(TRUE,FALSE),nrow(copy_wk5),replace = TRUE,prob = c(0.7,0.3))
train<- copy_wk5[sample,]
test<- copy_wk5[!sample,]
# Models from Week5 -
# Anova (Using ANOVA because it outperformed Poisson in previous weeks)
TreeAnova=rpart(data=train,TARGET_LOSS_AMT~.-TARGET_BAD_FLAG,control = tree_depth, method = "anova")
rpart.plot(TreeAnova,digits=-3, extra=100)

TreeAnova$variable.importance
## LOAN M_DEBTINC IMP_DEBTINC IMP_DELINQ
## 49690664826 42985254519 15939584553 15400438974
## IMP_VALUE IMP_MORTDUE IMP_DEROG IMP_CLAGE
## 9271157466 4145647445 3634659036 3469029852
## IMP_CLNO FLAG.Reason.HomeImp FLAG.Reason.DebtCon M_VALUE
## 3174251359 2820521327 2148907741 1958672130
## M_DEROG M_DELINQ M_NINQ M_MORTDUE
## 1889394893 1161439258 916925730 847333397
## M_YOJ FLAG.Job.Self IMP_YOJ
## 585657276 452877917 390438184
pt = predict(TreeAnova, test )
head( pt )
## 2 4 8 17 18 22
## 3972.6331 3972.6331 428.8188 3972.6331 428.8188 3972.6331
RMSEt = sqrt( mean( ( test$TARGET_LOSS_AMT - pt )^2 ) )
# Random Forest
rf_model = randomForest( data=train, TARGET_LOSS_AMT~.-TARGET_BAD_FLAG, ntree=500, importance=TRUE )
importance( rf_model )
## %IncMSE IncNodePurity
## LOAN 60.668132 46695625794
## IMP_MORTDUE 17.146165 10494379059
## M_MORTDUE 5.153892 747061976
## IMP_VALUE 18.735447 13778814510
## M_VALUE 28.190389 3749783439
## IMP_YOJ 15.606857 8223538954
## M_YOJ 8.348570 683350163
## IMP_DEROG 21.995255 5774511085
## M_DEROG 8.842225 1258662250
## IMP_DELINQ 42.033453 20407610426
## M_DELINQ 6.506493 536619144
## IMP_CLAGE 25.161710 13446209097
## M_CLAGE 7.670149 257964369
## IMP_NINQ 17.389315 5663088617
## M_NINQ 4.939194 354225193
## IMP_CLNO 24.784895 11960859784
## M_CLNO 11.163432 234115096
## IMP_DEBTINC 28.439878 23417631709
## M_DEBTINC 40.538449 32416365766
## FLAG.Job.Mgr 9.203716 1052569539
## FLAG.Job.Office 7.495568 1046994234
## FLAG.Job.Other 10.543253 1280275312
## FLAG.Job.ProfExe 6.506114 912898185
## FLAG.Job.Sales 13.315858 827523754
## FLAG.Job.Self 4.561926 1376867544
## FLAG.Reason.DebtCon 8.337061 1592444846
## FLAG.Reason.HomeImp 7.951433 1409168007
varImpPlot( rf_model )

pr = predict( rf_model, test )
head( pr )
## 2 4 8 17 18 22
## 2866.231 2236.436 674.151 8233.899 1637.305 2565.278
RMSEr = sqrt( mean( (test$TARGET_LOSS_AMT - pr )^2 ) )
print(RMSEr)
## [1] 4020.425
# GRADIENT BOOSTING
gb_model = gbm( data=train, TARGET_LOSS_AMT~.-TARGET_BAD_FLAG, n.trees=500, distribution="poisson" )
summary.gbm( gb_model, cBars=10 )
pg = predict( gb_model, test, type="response" )
## Using 500 trees...
head( pg )
## [1] 4992.8362 4844.7070 246.7864 7432.4818 495.2246 2115.6312
RMSEg = sqrt( mean( (test$TARGET_LOSS_AMT - pg )^2 ) )
# LINEAR Regression Model
theUpper_LR = lm( TARGET_LOSS_AMT ~ .-TARGET_BAD_FLAG, data=train )
theLower_LR = lm( TARGET_LOSS_AMT ~ 1, data=train )
summary( theUpper_LR )
##
## Call:
## lm(formula = TARGET_LOSS_AMT ~ . - TARGET_BAD_FLAG, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15774 -2591 -343 1561 58518
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.599e+03 7.482e+02 -8.821 < 2e-16 ***
## LOAN 1.411e-01 8.469e-03 16.666 < 2e-16 ***
## IMP_MORTDUE -1.252e-02 3.555e-03 -3.523 0.000432 ***
## M_MORTDUE 1.299e+03 3.491e+02 3.721 0.000201 ***
## IMP_VALUE 1.434e-02 2.792e-03 5.136 2.94e-07 ***
## M_VALUE 6.608e+03 6.596e+02 10.018 < 2e-16 ***
## IMP_YOJ -3.338e+01 1.223e+01 -2.730 0.006355 **
## M_YOJ -6.693e+02 3.452e+02 -1.939 0.052604 .
## IMP_DEROG 8.747e+02 1.131e+02 7.735 1.29e-14 ***
## M_DEROG -2.534e+03 4.219e+02 -6.005 2.07e-09 ***
## IMP_DELINQ 1.887e+03 8.994e+01 20.978 < 2e-16 ***
## M_DELINQ -2.059e+03 5.682e+02 -3.623 0.000294 ***
## IMP_CLAGE -1.013e+01 1.115e+00 -9.083 < 2e-16 ***
## M_CLAGE 5.781e+02 7.338e+02 0.788 0.430858
## IMP_NINQ 1.848e+02 5.543e+01 3.333 0.000865 ***
## M_NINQ 1.719e+02 5.350e+02 0.321 0.747929
## IMP_CLNO 4.706e+01 1.018e+01 4.624 3.88e-06 ***
## M_CLNO 2.670e+03 9.774e+02 2.731 0.006334 **
## IMP_DEBTINC 1.166e+02 1.149e+01 10.144 < 2e-16 ***
## M_DEBTINC 5.949e+03 2.230e+02 26.682 < 2e-16 ***
## FLAG.Job.Mgr 5.942e+02 5.502e+02 1.080 0.280178
## FLAG.Job.Office 4.610e+02 5.405e+02 0.853 0.393760
## FLAG.Job.Other 8.761e+02 5.082e+02 1.724 0.084791 .
## FLAG.Job.ProfExe 8.888e+02 5.351e+02 1.661 0.096780 .
## FLAG.Job.Sales 4.103e+03 8.270e+02 4.961 7.30e-07 ***
## FLAG.Job.Self 1.893e+03 7.021e+02 2.696 0.007055 **
## FLAG.Reason.DebtCon 5.688e+01 4.994e+02 0.114 0.909314
## FLAG.Reason.HomeImp -4.700e+02 5.052e+02 -0.930 0.352240
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5487 on 4140 degrees of freedom
## Multiple R-squared: 0.4272, Adjusted R-squared: 0.4235
## F-statistic: 114.4 on 27 and 4140 DF, p-value: < 2.2e-16
summary( theLower_LR )
##
## Call:
## lm(formula = TARGET_LOSS_AMT ~ 1, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2676 -2676 -2676 -2676 76311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2676.0 111.9 23.91 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7226 on 4167 degrees of freedom
# LINEAR BACKWARD VARIABLE SELECTION
lr_model = stepAIC(theUpper_LR, direction="backward", scope=list(lower=theLower_LR, upper=theUpper_LR))
## Start: AIC=71801.44
## TARGET_LOSS_AMT ~ (TARGET_BAD_FLAG + LOAN + IMP_MORTDUE + M_MORTDUE +
## IMP_VALUE + M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG +
## IMP_DELINQ + M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ +
## M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr +
## FLAG.Job.Office + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales +
## FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp) -
## TARGET_BAD_FLAG
##
## Df Sum of Sq RSS AIC
## - FLAG.Reason.DebtCon 1 3.9061e+05 1.2463e+11 71799
## - M_NINQ 1 3.1095e+06 1.2463e+11 71800
## - M_CLAGE 1 1.8683e+07 1.2465e+11 71800
## - FLAG.Job.Office 1 2.1899e+07 1.2465e+11 71800
## - FLAG.Reason.HomeImp 1 2.6056e+07 1.2465e+11 71800
## - FLAG.Job.Mgr 1 3.5116e+07 1.2466e+11 71801
## <none> 1.2463e+11 71801
## - FLAG.Job.ProfExe 1 8.3057e+07 1.2471e+11 71802
## - FLAG.Job.Other 1 8.9466e+07 1.2472e+11 71802
## - M_YOJ 1 1.1315e+08 1.2474e+11 71803
## - FLAG.Job.Self 1 2.1874e+08 1.2485e+11 71807
## - IMP_YOJ 1 2.2440e+08 1.2485e+11 71807
## - M_CLNO 1 2.2458e+08 1.2485e+11 71807
## - IMP_NINQ 1 3.3451e+08 1.2496e+11 71811
## - IMP_MORTDUE 1 3.7359e+08 1.2500e+11 71812
## - M_DELINQ 1 3.9523e+08 1.2502e+11 71813
## - M_MORTDUE 1 4.1669e+08 1.2504e+11 71813
## - IMP_CLNO 1 6.4368e+08 1.2527e+11 71821
## - FLAG.Job.Sales 1 7.4082e+08 1.2537e+11 71824
## - IMP_VALUE 1 7.9406e+08 1.2542e+11 71826
## - M_DEROG 1 1.0856e+09 1.2571e+11 71836
## - IMP_DEROG 1 1.8013e+09 1.2643e+11 71859
## - IMP_CLAGE 1 2.4835e+09 1.2711e+11 71882
## - M_VALUE 1 3.0213e+09 1.2765e+11 71899
## - IMP_DEBTINC 1 3.0976e+09 1.2772e+11 71902
## - LOAN 1 8.3610e+09 1.3299e+11 72070
## - IMP_DELINQ 1 1.3248e+10 1.3787e+11 72221
## - M_DEBTINC 1 2.1432e+10 1.4606e+11 72461
##
## Step: AIC=71799.45
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + M_NINQ + IMP_CLNO +
## M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office +
## FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self +
## FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## - M_NINQ 1 3.1187e+06 1.2463e+11 71798
## - M_CLAGE 1 1.8370e+07 1.2465e+11 71798
## - FLAG.Job.Office 1 2.3806e+07 1.2465e+11 71798
## - FLAG.Job.Mgr 1 3.8011e+07 1.2467e+11 71799
## <none> 1.2463e+11 71799
## - FLAG.Job.ProfExe 1 8.9088e+07 1.2472e+11 71800
## - FLAG.Job.Other 1 9.6230e+07 1.2472e+11 71801
## - M_YOJ 1 1.1582e+08 1.2474e+11 71801
## - FLAG.Reason.HomeImp 1 2.1607e+08 1.2484e+11 71805
## - M_CLNO 1 2.2430e+08 1.2485e+11 71805
## - IMP_YOJ 1 2.2464e+08 1.2485e+11 71805
## - FLAG.Job.Self 1 2.2618e+08 1.2485e+11 71805
## - IMP_NINQ 1 3.3507e+08 1.2496e+11 71809
## - IMP_MORTDUE 1 3.7343e+08 1.2500e+11 71810
## - M_DELINQ 1 3.9578e+08 1.2502e+11 71811
## - M_MORTDUE 1 4.1782e+08 1.2504e+11 71811
## - IMP_CLNO 1 6.4345e+08 1.2527e+11 71819
## - FLAG.Job.Sales 1 7.6365e+08 1.2539e+11 71823
## - IMP_VALUE 1 7.9630e+08 1.2542e+11 71824
## - M_DEROG 1 1.0860e+09 1.2571e+11 71834
## - IMP_DEROG 1 1.8017e+09 1.2643e+11 71857
## - IMP_CLAGE 1 2.4944e+09 1.2712e+11 71880
## - M_VALUE 1 3.0318e+09 1.2766e+11 71898
## - IMP_DEBTINC 1 3.1039e+09 1.2773e+11 71900
## - LOAN 1 8.4305e+09 1.3306e+11 72070
## - IMP_DELINQ 1 1.3276e+10 1.3790e+11 72219
## - M_DEBTINC 1 2.1435e+10 1.4606e+11 72459
##
## Step: AIC=71797.56
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO +
## IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office +
## FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self +
## FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## - M_CLAGE 1 1.7996e+07 1.2465e+11 71796
## - FLAG.Job.Office 1 2.3164e+07 1.2465e+11 71796
## - FLAG.Job.Mgr 1 3.7190e+07 1.2467e+11 71797
## <none> 1.2463e+11 71798
## - FLAG.Job.ProfExe 1 8.7912e+07 1.2472e+11 71798
## - FLAG.Job.Other 1 9.5054e+07 1.2473e+11 71799
## - M_YOJ 1 1.1367e+08 1.2474e+11 71799
## - FLAG.Reason.HomeImp 1 2.1532e+08 1.2485e+11 71803
## - IMP_YOJ 1 2.2566e+08 1.2486e+11 71803
## - FLAG.Job.Self 1 2.2811e+08 1.2486e+11 71803
## - M_CLNO 1 2.4512e+08 1.2488e+11 71804
## - IMP_NINQ 1 3.3211e+08 1.2496e+11 71807
## - IMP_MORTDUE 1 3.7432e+08 1.2500e+11 71808
## - M_MORTDUE 1 4.1859e+08 1.2505e+11 71810
## - M_DELINQ 1 4.8257e+08 1.2511e+11 71812
## - IMP_CLNO 1 6.4498e+08 1.2528e+11 71817
## - FLAG.Job.Sales 1 7.6075e+08 1.2539e+11 71821
## - IMP_VALUE 1 7.9770e+08 1.2543e+11 71822
## - M_DEROG 1 1.0833e+09 1.2571e+11 71832
## - IMP_DEROG 1 1.8198e+09 1.2645e+11 71856
## - IMP_CLAGE 1 2.4940e+09 1.2712e+11 71878
## - M_VALUE 1 3.0305e+09 1.2766e+11 71896
## - IMP_DEBTINC 1 3.1079e+09 1.2774e+11 71898
## - LOAN 1 8.4276e+09 1.3306e+11 72068
## - IMP_DELINQ 1 1.3306e+10 1.3794e+11 72218
## - M_DEBTINC 1 2.1432e+10 1.4606e+11 72457
##
## Step: AIC=71796.16
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC +
## M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + FLAG.Job.Other +
## FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## - FLAG.Job.Office 1 1.8035e+07 1.2467e+11 71795
## - FLAG.Job.Mgr 1 3.1120e+07 1.2468e+11 71795
## <none> 1.2465e+11 71796
## - FLAG.Job.ProfExe 1 7.8370e+07 1.2473e+11 71797
## - FLAG.Job.Other 1 8.5843e+07 1.2473e+11 71797
## - M_YOJ 1 1.1270e+08 1.2476e+11 71798
## - FLAG.Job.Self 1 2.1710e+08 1.2487e+11 71801
## - FLAG.Reason.HomeImp 1 2.2215e+08 1.2487e+11 71802
## - IMP_YOJ 1 2.3573e+08 1.2488e+11 71802
## - IMP_NINQ 1 3.4004e+08 1.2499e+11 71806
## - IMP_MORTDUE 1 3.6431e+08 1.2501e+11 71806
## - M_MORTDUE 1 4.1927e+08 1.2507e+11 71808
## - M_DELINQ 1 4.8952e+08 1.2514e+11 71810
## - IMP_CLNO 1 6.2835e+08 1.2528e+11 71815
## - M_CLNO 1 7.1777e+08 1.2537e+11 71818
## - FLAG.Job.Sales 1 7.4541e+08 1.2539e+11 71819
## - IMP_VALUE 1 7.9229e+08 1.2544e+11 71821
## - M_DEROG 1 1.0839e+09 1.2573e+11 71830
## - IMP_DEROG 1 1.8303e+09 1.2648e+11 71855
## - IMP_CLAGE 1 2.4770e+09 1.2713e+11 71876
## - M_VALUE 1 3.0218e+09 1.2767e+11 71894
## - IMP_DEBTINC 1 3.0903e+09 1.2774e+11 71896
## - LOAN 1 8.4350e+09 1.3308e+11 72067
## - IMP_DELINQ 1 1.3294e+10 1.3794e+11 72217
## - M_DEBTINC 1 2.1478e+10 1.4613e+11 72457
##
## Step: AIC=71794.76
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC +
## M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Other + FLAG.Job.ProfExe +
## FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## - FLAG.Job.Mgr 1 1.3293e+07 1.2468e+11 71793
## <none> 1.2467e+11 71795
## - FLAG.Job.ProfExe 1 9.4839e+07 1.2476e+11 71796
## - M_YOJ 1 1.1778e+08 1.2478e+11 71797
## - FLAG.Job.Other 1 1.2948e+08 1.2480e+11 71797
## - FLAG.Reason.HomeImp 1 2.1713e+08 1.2488e+11 71800
## - IMP_YOJ 1 2.4037e+08 1.2491e+11 71801
## - FLAG.Job.Self 1 2.4139e+08 1.2491e+11 71801
## - IMP_NINQ 1 3.3757e+08 1.2500e+11 71804
## - IMP_MORTDUE 1 3.6994e+08 1.2504e+11 71805
## - M_MORTDUE 1 4.0780e+08 1.2507e+11 71806
## - M_DELINQ 1 4.7914e+08 1.2515e+11 71809
## - IMP_CLNO 1 6.4364e+08 1.2531e+11 71814
## - M_CLNO 1 7.5484e+08 1.2542e+11 71818
## - IMP_VALUE 1 7.9611e+08 1.2546e+11 71819
## - FLAG.Job.Sales 1 8.8419e+08 1.2555e+11 71822
## - M_DEROG 1 1.0896e+09 1.2576e+11 71829
## - IMP_DEROG 1 1.8175e+09 1.2648e+11 71853
## - IMP_CLAGE 1 2.4623e+09 1.2713e+11 71874
## - M_VALUE 1 3.0170e+09 1.2768e+11 71892
## - IMP_DEBTINC 1 3.1689e+09 1.2784e+11 71897
## - LOAN 1 8.4190e+09 1.3309e+11 72065
## - IMP_DELINQ 1 1.3313e+10 1.3798e+11 72216
## - M_DEBTINC 1 2.1529e+10 1.4620e+11 72457
##
## Step: AIC=71793.21
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC +
## M_DEBTINC + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales +
## FLAG.Job.Self + FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## <none> 1.2468e+11 71793
## - FLAG.Job.ProfExe 1 8.2589e+07 1.2476e+11 71794
## - M_YOJ 1 1.1736e+08 1.2480e+11 71795
## - FLAG.Job.Other 1 1.2304e+08 1.2480e+11 71795
## - FLAG.Reason.HomeImp 1 2.2024e+08 1.2490e+11 71799
## - FLAG.Job.Self 1 2.2813e+08 1.2491e+11 71799
## - IMP_YOJ 1 2.3572e+08 1.2492e+11 71799
## - IMP_NINQ 1 3.5005e+08 1.2503e+11 71803
## - IMP_MORTDUE 1 3.6206e+08 1.2504e+11 71803
## - M_MORTDUE 1 4.0040e+08 1.2508e+11 71805
## - M_DELINQ 1 4.7505e+08 1.2515e+11 71807
## - IMP_CLNO 1 6.4607e+08 1.2533e+11 71813
## - M_CLNO 1 7.4165e+08 1.2542e+11 71816
## - IMP_VALUE 1 7.9375e+08 1.2547e+11 71818
## - FLAG.Job.Sales 1 8.7488e+08 1.2555e+11 71820
## - M_DEROG 1 1.0934e+09 1.2577e+11 71828
## - IMP_DEROG 1 1.8214e+09 1.2650e+11 71852
## - IMP_CLAGE 1 2.4609e+09 1.2714e+11 71873
## - M_VALUE 1 3.0265e+09 1.2771e+11 71891
## - IMP_DEBTINC 1 3.1881e+09 1.2787e+11 71896
## - LOAN 1 8.4169e+09 1.3310e+11 72063
## - IMP_DELINQ 1 1.3348e+10 1.3803e+11 72215
## - M_DEBTINC 1 2.1595e+10 1.4627e+11 72457
summary( lr_model )
##
## Call:
## lm(formula = TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE +
## IMP_VALUE + M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG +
## IMP_DELINQ + M_DELINQ + IMP_CLAGE + IMP_NINQ + IMP_CLNO +
## M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Other + FLAG.Job.ProfExe +
## FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.HomeImp, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16031 -2578 -324 1563 58518
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.072e+03 4.897e+02 -12.400 < 2e-16 ***
## LOAN 1.410e-01 8.429e-03 16.728 < 2e-16 ***
## IMP_MORTDUE -1.224e-02 3.527e-03 -3.469 0.000527 ***
## M_MORTDUE 1.261e+03 3.456e+02 3.648 0.000267 ***
## IMP_VALUE 1.428e-02 2.781e-03 5.137 2.92e-07 ***
## M_VALUE 6.583e+03 6.563e+02 10.031 < 2e-16 ***
## IMP_YOJ -3.407e+01 1.217e+01 -2.799 0.005144 **
## M_YOJ -6.751e+02 3.418e+02 -1.975 0.048305 *
## IMP_DEROG 8.763e+02 1.126e+02 7.782 8.98e-15 ***
## M_DEROG -2.533e+03 4.201e+02 -6.029 1.79e-09 ***
## IMP_DELINQ 1.889e+03 8.967e+01 21.066 < 2e-16 ***
## M_DELINQ -1.947e+03 4.898e+02 -3.974 7.19e-05 ***
## IMP_CLAGE -1.004e+01 1.109e+00 -9.045 < 2e-16 ***
## IMP_NINQ 1.877e+02 5.501e+01 3.411 0.000653 ***
## IMP_CLNO 4.598e+01 9.920e+00 4.635 3.69e-06 ***
## M_CLNO 3.000e+03 6.041e+02 4.966 7.13e-07 ***
## IMP_DEBTINC 1.167e+02 1.133e+01 10.295 < 2e-16 ***
## M_DEBTINC 5.963e+03 2.226e+02 26.794 < 2e-16 ***
## FLAG.Job.Other 4.159e+02 2.056e+02 2.023 0.043185 *
## FLAG.Job.ProfExe 4.117e+02 2.484e+02 1.657 0.097593 .
## FLAG.Job.Sales 3.619e+03 6.710e+02 5.393 7.31e-08 ***
## FLAG.Job.Self 1.430e+03 5.191e+02 2.754 0.005914 **
## FLAG.Reason.HomeImp -5.267e+02 1.947e+02 -2.706 0.006840 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5484 on 4145 degrees of freedom
## Multiple R-squared: 0.427, Adjusted R-squared: 0.424
## F-statistic: 140.4 on 22 and 4145 DF, p-value: < 2.2e-16
plr = predict( lr_model, test )
head( plr )
## 2 4 8 17 18 22
## 7127.810 12239.854 -2256.998 16925.028 2243.138 3782.327
RMSElr = sqrt( mean( (test$TARGET_LOSS_AMT - plr )^2 ) )
# LR STEP TREE
treeVars = TreeAnova$variable.importance
treeVars = names(treeVars)
treeVarsPlus = paste( treeVars, collapse="+")
F = as.formula( paste( "TARGET_LOSS_AMT ~", treeVarsPlus ))
tree_LR = lm( F, data=train )
theLower_LR = lm( TARGET_LOSS_AMT ~ 1, data=train )
summary( tree_LR )
##
## Call:
## lm(formula = F, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17048 -2487 -356 1547 58341
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.642e+03 6.470e+02 -8.720 < 2e-16 ***
## LOAN 1.379e-01 8.474e-03 16.275 < 2e-16 ***
## M_DEBTINC 6.197e+03 2.210e+02 28.047 < 2e-16 ***
## IMP_DEBTINC 1.218e+02 1.135e+01 10.727 < 2e-16 ***
## IMP_DELINQ 1.849e+03 9.021e+01 20.501 < 2e-16 ***
## IMP_VALUE 1.496e-02 2.785e-03 5.371 8.27e-08 ***
## IMP_MORTDUE -1.281e-02 3.533e-03 -3.626 0.000291 ***
## IMP_DEROG 9.097e+02 1.122e+02 8.109 6.65e-16 ***
## IMP_CLAGE -1.011e+01 1.111e+00 -9.095 < 2e-16 ***
## IMP_CLNO 5.043e+01 9.918e+00 5.085 3.85e-07 ***
## FLAG.Reason.HomeImp -7.578e+02 4.839e+02 -1.566 0.117434
## FLAG.Reason.DebtCon -1.360e+02 4.764e+02 -0.286 0.775215
## M_VALUE 6.679e+03 6.613e+02 10.099 < 2e-16 ***
## M_DEROG -2.275e+03 4.189e+02 -5.430 5.94e-08 ***
## M_DELINQ -1.613e+03 5.646e+02 -2.857 0.004301 **
## M_NINQ 6.313e+02 5.118e+02 1.234 0.217440
## M_MORTDUE 1.323e+03 3.488e+02 3.794 0.000151 ***
## M_YOJ -4.643e+02 3.416e+02 -1.359 0.174244
## FLAG.Job.Self 1.078e+03 5.073e+02 2.124 0.033707 *
## IMP_YOJ -3.595e+01 1.223e+01 -2.940 0.003297 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5522 on 4148 degrees of freedom
## Multiple R-squared: 0.4188, Adjusted R-squared: 0.4161
## F-statistic: 157.3 on 19 and 4148 DF, p-value: < 2.2e-16
summary( theLower_LR )
##
## Call:
## lm(formula = TARGET_LOSS_AMT ~ 1, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2676 -2676 -2676 -2676 76311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2676.0 111.9 23.91 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7226 on 4167 degrees of freedom
plr_tree = predict( tree_LR, test )
head( plr_tree )
## 2 4 8 17 18 22
## 7285.6669 11561.0068 -2270.8502 17262.7927 897.4124 3818.7355
RMSElr_tree = sqrt( mean( ( test$TARGET_LOSS_AMT - plr_tree )^2 ) )
# TREE FORWARD STEPWISE SELECTION
lrt_model = stepAIC(theLower_LR, direction="forward", scope=list(lower=theLower_LR, upper=tree_LR))
## Start: AIC=74070.21
## TARGET_LOSS_AMT ~ 1
##
## Df Sum of Sq RSS AIC
## + M_DEBTINC 1 4.2985e+10 1.7460e+11 73155
## + IMP_DELINQ 1 2.8566e+10 1.8902e+11 73486
## + M_VALUE 1 1.1486e+10 2.0610e+11 73846
## + IMP_DEROG 1 1.0979e+10 2.0661e+11 73856
## + LOAN 1 9.3230e+09 2.0827e+11 73890
## + IMP_DEBTINC 1 8.6779e+09 2.0891e+11 73903
## + IMP_CLAGE 1 3.2843e+09 2.1431e+11 74009
## + IMP_CLNO 1 3.0963e+09 2.1449e+11 74012
## + IMP_VALUE 1 2.4769e+09 2.1511e+11 74024
## + FLAG.Job.Self 1 1.7376e+09 2.1585e+11 74039
## + IMP_MORTDUE 1 1.1685e+09 2.1642e+11 74050
## + FLAG.Reason.DebtCon 1 1.0899e+09 2.1650e+11 74051
## + FLAG.Reason.HomeImp 1 1.0621e+09 2.1653e+11 74052
## + M_DELINQ 1 6.4479e+08 2.1694e+11 74060
## + M_DEROG 1 5.0857e+08 2.1708e+11 74062
## + M_NINQ 1 4.1887e+08 2.1717e+11 74064
## + M_YOJ 1 3.0633e+08 2.1728e+11 74066
## + M_MORTDUE 1 1.9024e+08 2.1740e+11 74069
## + IMP_YOJ 1 1.6165e+08 2.1743e+11 74069
## <none> 2.1759e+11 74070
##
## Step: AIC=73154.88
## TARGET_LOSS_AMT ~ M_DEBTINC
##
## Df Sum of Sq RSS AIC
## + IMP_DELINQ 1 1.6104e+10 1.5850e+11 72754
## + LOAN 1 1.4591e+10 1.6001e+11 72793
## + IMP_DEBTINC 1 6.3106e+09 1.6829e+11 73003
## + M_VALUE 1 5.4902e+09 1.6911e+11 73024
## + IMP_DEROG 1 4.9960e+09 1.6961e+11 73036
## + IMP_VALUE 1 3.5787e+09 1.7103e+11 73071
## + IMP_CLNO 1 3.1968e+09 1.7141e+11 73080
## + IMP_MORTDUE 1 1.9240e+09 1.7268e+11 73111
## + FLAG.Job.Self 1 1.6453e+09 1.7296e+11 73117
## + IMP_CLAGE 1 1.4958e+09 1.7311e+11 73121
## + FLAG.Reason.DebtCon 1 1.4535e+09 1.7315e+11 73122
## + FLAG.Reason.HomeImp 1 1.4260e+09 1.7318e+11 73123
## + M_DELINQ 1 4.6854e+08 1.7414e+11 73146
## + M_DEROG 1 3.4032e+08 1.7426e+11 73149
## + M_NINQ 1 3.0266e+08 1.7430e+11 73150
## + M_MORTDUE 1 2.0576e+08 1.7440e+11 73152
## + M_YOJ 1 1.2972e+08 1.7447e+11 73154
## <none> 1.7460e+11 73155
## + IMP_YOJ 1 8.3110e+07 1.7452e+11 73155
##
## Step: AIC=72753.57
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ
##
## Df Sum of Sq RSS AIC
## + LOAN 1 1.4874e+10 1.4363e+11 72345
## + IMP_DEBTINC 1 5.6023e+09 1.5290e+11 72606
## + IMP_VALUE 1 4.0549e+09 1.5445e+11 72648
## + M_VALUE 1 3.4269e+09 1.5507e+11 72664
## + M_DEROG 1 2.8353e+09 1.5567e+11 72680
## + IMP_CLAGE 1 1.9153e+09 1.5659e+11 72705
## + IMP_MORTDUE 1 1.8874e+09 1.5661e+11 72706
## + M_DELINQ 1 1.8418e+09 1.5666e+11 72707
## + IMP_DEROG 1 1.7732e+09 1.5673e+11 72709
## + FLAG.Job.Self 1 1.5504e+09 1.5695e+11 72715
## + FLAG.Reason.DebtCon 1 1.4422e+09 1.5706e+11 72717
## + IMP_CLNO 1 1.4164e+09 1.5708e+11 72718
## + M_NINQ 1 1.4129e+09 1.5709e+11 72718
## + FLAG.Reason.HomeImp 1 1.1616e+09 1.5734e+11 72725
## + M_MORTDUE 1 2.4227e+08 1.5826e+11 72749
## + IMP_YOJ 1 2.2912e+08 1.5827e+11 72750
## + M_YOJ 1 1.9667e+08 1.5830e+11 72750
## <none> 1.5850e+11 72754
##
## Step: AIC=72344.85
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN
##
## Df Sum of Sq RSS AIC
## + IMP_DEBTINC 1 4382342554 1.3924e+11 72218
## + M_VALUE 1 3040572111 1.4059e+11 72258
## + IMP_CLAGE 1 2818079413 1.4081e+11 72264
## + M_DEROG 1 1840613969 1.4179e+11 72293
## + IMP_DEROG 1 1734283481 1.4189e+11 72296
## + M_DELINQ 1 1417152320 1.4221e+11 72306
## + M_NINQ 1 901508310 1.4272e+11 72321
## + IMP_YOJ 1 859705364 1.4277e+11 72322
## + IMP_CLNO 1 794603825 1.4283e+11 72324
## + IMP_VALUE 1 630648240 1.4300e+11 72329
## + FLAG.Reason.DebtCon 1 371236657 1.4326e+11 72336
## + FLAG.Job.Self 1 367115990 1.4326e+11 72336
## + M_YOJ 1 363655615 1.4326e+11 72336
## + IMP_MORTDUE 1 338903409 1.4329e+11 72337
## + FLAG.Reason.HomeImp 1 289762507 1.4334e+11 72338
## + M_MORTDUE 1 176392369 1.4345e+11 72342
## <none> 1.4363e+11 72345
##
## Step: AIC=72217.69
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC
##
## Df Sum of Sq RSS AIC
## + M_VALUE 1 3261573768 1.3598e+11 72121
## + IMP_CLAGE 1 2442457353 1.3680e+11 72146
## + M_DEROG 1 1871059034 1.3737e+11 72163
## + IMP_DEROG 1 1681490508 1.3756e+11 72169
## + M_DELINQ 1 1207409723 1.3804e+11 72183
## + IMP_YOJ 1 671007839 1.3857e+11 72200
## + M_NINQ 1 566586131 1.3868e+11 72203
## + IMP_VALUE 1 363744397 1.3888e+11 72209
## + IMP_CLNO 1 329813904 1.3891e+11 72210
## + FLAG.Job.Self 1 321592197 1.3892e+11 72210
## + M_YOJ 1 313832007 1.3893e+11 72210
## + FLAG.Reason.HomeImp 1 257845757 1.3899e+11 72212
## + FLAG.Reason.DebtCon 1 236493592 1.3901e+11 72213
## + IMP_MORTDUE 1 104153521 1.3914e+11 72217
## <none> 1.3924e+11 72218
## + M_MORTDUE 1 5870760 1.3924e+11 72220
##
## Step: AIC=72120.9
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC +
## M_VALUE
##
## Df Sum of Sq RSS AIC
## + IMP_CLAGE 1 2417174554 1.3357e+11 72048
## + M_DEROG 1 2063724215 1.3392e+11 72059
## + M_DELINQ 1 1399521757 1.3458e+11 72080
## + IMP_DEROG 1 1324850098 1.3466e+11 72082
## + IMP_YOJ 1 644362288 1.3534e+11 72103
## + M_NINQ 1 620583975 1.3536e+11 72104
## + IMP_VALUE 1 425540654 1.3556e+11 72110
## + IMP_CLNO 1 349260052 1.3563e+11 72112
## + M_YOJ 1 334603026 1.3565e+11 72113
## + FLAG.Reason.DebtCon 1 317232133 1.3567e+11 72113
## + FLAG.Job.Self 1 279790362 1.3570e+11 72114
## + FLAG.Reason.HomeImp 1 271577259 1.3571e+11 72115
## + IMP_MORTDUE 1 82929578 1.3590e+11 72120
## <none> 1.3598e+11 72121
## + M_MORTDUE 1 33675071 1.3595e+11 72122
##
## Step: AIC=72048.15
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC +
## M_VALUE + IMP_CLAGE
##
## Df Sum of Sq RSS AIC
## + M_DEROG 1 2038154395 1.3153e+11 71986
## + M_DELINQ 1 1598723672 1.3197e+11 72000
## + IMP_DEROG 1 1112720283 1.3245e+11 72015
## + IMP_CLNO 1 981805526 1.3258e+11 72019
## + IMP_VALUE 1 785067759 1.3278e+11 72026
## + M_NINQ 1 687476699 1.3288e+11 72029
## + IMP_YOJ 1 281389216 1.3328e+11 72041
## + FLAG.Job.Self 1 230543969 1.3333e+11 72043
## + M_YOJ 1 224083673 1.3334e+11 72043
## + IMP_MORTDUE 1 204621394 1.3336e+11 72044
## + FLAG.Reason.DebtCon 1 190532848 1.3337e+11 72044
## + FLAG.Reason.HomeImp 1 176483568 1.3339e+11 72045
## <none> 1.3357e+11 72048
## + M_MORTDUE 1 21665019 1.3354e+11 72049
##
## Step: AIC=71986.05
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC +
## M_VALUE + IMP_CLAGE + M_DEROG
##
## Df Sum of Sq RSS AIC
## + IMP_DEROG 1 2135460095 1.2939e+11 71920
## + IMP_CLNO 1 1050943433 1.3048e+11 71955
## + IMP_VALUE 1 758647495 1.3077e+11 71964
## + IMP_YOJ 1 339256390 1.3119e+11 71977
## + FLAG.Reason.HomeImp 1 251367594 1.3128e+11 71980
## + IMP_MORTDUE 1 233447781 1.3129e+11 71981
## + FLAG.Reason.DebtCon 1 186168086 1.3134e+11 71982
## + FLAG.Job.Self 1 179278280 1.3135e+11 71982
## + M_DELINQ 1 95610280 1.3143e+11 71985
## <none> 1.3153e+11 71986
## + M_YOJ 1 32656131 1.3149e+11 71987
## + M_MORTDUE 1 13140421 1.3151e+11 71988
## + M_NINQ 1 6036669 1.3152e+11 71988
##
## Step: AIC=71919.83
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC +
## M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG
##
## Df Sum of Sq RSS AIC
## + IMP_CLNO 1 930428675 1.2846e+11 71892
## + IMP_VALUE 1 841534830 1.2855e+11 71895
## + IMP_MORTDUE 1 286457586 1.2911e+11 71913
## + IMP_YOJ 1 252248930 1.2914e+11 71914
## + FLAG.Reason.HomeImp 1 238236907 1.2915e+11 71914
## + M_DELINQ 1 215786383 1.2918e+11 71915
## + FLAG.Job.Self 1 197705488 1.2919e+11 71915
## + FLAG.Reason.DebtCon 1 180065494 1.2921e+11 71916
## + M_YOJ 1 73072118 1.2932e+11 71919
## <none> 1.2939e+11 71920
## + M_NINQ 1 4155585 1.2939e+11 71922
## + M_MORTDUE 1 3874039 1.2939e+11 71922
##
## Step: AIC=71891.75
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC +
## M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO
##
## Df Sum of Sq RSS AIC
## + IMP_VALUE 1 520419939 1.2794e+11 71877
## + IMP_YOJ 1 246282855 1.2821e+11 71886
## + FLAG.Job.Self 1 187256646 1.2827e+11 71888
## + M_DELINQ 1 161400460 1.2830e+11 71889
## + FLAG.Reason.HomeImp 1 154305048 1.2831e+11 71889
## + FLAG.Reason.DebtCon 1 96549754 1.2836e+11 71891
## + M_MORTDUE 1 87637800 1.2837e+11 71891
## + IMP_MORTDUE 1 77176790 1.2838e+11 71891
## <none> 1.2846e+11 71892
## + M_YOJ 1 17344982 1.2844e+11 71893
## + M_NINQ 1 449465 1.2846e+11 71894
##
## Step: AIC=71876.83
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC +
## M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE
##
## Df Sum of Sq RSS AIC
## + IMP_YOJ 1 209903284 1.2773e+11 71872
## + IMP_MORTDUE 1 199669995 1.2774e+11 71872
## + FLAG.Reason.HomeImp 1 199449417 1.2774e+11 71872
## + M_MORTDUE 1 171310165 1.2777e+11 71873
## + FLAG.Reason.DebtCon 1 134929142 1.2781e+11 71874
## + M_DELINQ 1 131241955 1.2781e+11 71875
## + FLAG.Job.Self 1 125703535 1.2782e+11 71875
## <none> 1.2794e+11 71877
## + M_YOJ 1 5175937 1.2794e+11 71879
## + M_NINQ 1 40992 1.2794e+11 71879
##
## Step: AIC=71871.98
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC +
## M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE +
## IMP_YOJ
##
## Df Sum of Sq RSS AIC
## + IMP_MORTDUE 1 262961061 1.2747e+11 71865
## + FLAG.Reason.HomeImp 1 181555262 1.2755e+11 71868
## + M_DELINQ 1 167151797 1.2756e+11 71869
## + M_MORTDUE 1 151757897 1.2758e+11 71869
## + FLAG.Reason.DebtCon 1 122757177 1.2761e+11 71870
## + FLAG.Job.Self 1 107630208 1.2762e+11 71870
## <none> 1.2773e+11 71872
## + M_YOJ 1 13672561 1.2772e+11 71874
## + M_NINQ 1 1565056 1.2773e+11 71874
##
## Step: AIC=71865.4
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC +
## M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE +
## IMP_YOJ + IMP_MORTDUE
##
## Df Sum of Sq RSS AIC
## + M_MORTDUE 1 265350493 1.2720e+11 71859
## + FLAG.Reason.HomeImp 1 188691909 1.2728e+11 71861
## + M_DELINQ 1 157320316 1.2731e+11 71862
## + FLAG.Reason.DebtCon 1 132390092 1.2734e+11 71863
## + FLAG.Job.Self 1 102522707 1.2737e+11 71864
## <none> 1.2747e+11 71865
## + M_YOJ 1 11725976 1.2746e+11 71867
## + M_NINQ 1 1959077 1.2747e+11 71867
##
## Step: AIC=71858.71
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC +
## M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE +
## IMP_YOJ + IMP_MORTDUE + M_MORTDUE
##
## Df Sum of Sq RSS AIC
## + FLAG.Reason.HomeImp 1 258652854 1.2694e+11 71852
## + M_DELINQ 1 234328337 1.2697e+11 71853
## + FLAG.Reason.DebtCon 1 221215436 1.2698e+11 71853
## + FLAG.Job.Self 1 94363758 1.2711e+11 71858
## + M_YOJ 1 65602818 1.2714e+11 71859
## <none> 1.2720e+11 71859
## + M_NINQ 1 13784086 1.2719e+11 71860
##
## Step: AIC=71852.23
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC +
## M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE +
## IMP_YOJ + IMP_MORTDUE + M_MORTDUE + FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## + M_DELINQ 1 229643185 1.2671e+11 71847
## + FLAG.Job.Self 1 146413335 1.2680e+11 71849
## + M_YOJ 1 74623693 1.2687e+11 71852
## <none> 1.2694e+11 71852
## + M_NINQ 1 10140778 1.2693e+11 71854
## + FLAG.Reason.DebtCon 1 178835 1.2694e+11 71854
##
## Step: AIC=71846.68
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC +
## M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE +
## IMP_YOJ + IMP_MORTDUE + M_MORTDUE + FLAG.Reason.HomeImp +
## M_DELINQ
##
## Df Sum of Sq RSS AIC
## + FLAG.Job.Self 1 153257181 1.2656e+11 71844
## <none> 1.2671e+11 71847
## + M_NINQ 1 47816102 1.2667e+11 71847
## + M_YOJ 1 43983881 1.2667e+11 71847
## + FLAG.Reason.DebtCon 1 1138654 1.2671e+11 71849
##
## Step: AIC=71843.64
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC +
## M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE +
## IMP_YOJ + IMP_MORTDUE + M_MORTDUE + FLAG.Reason.HomeImp +
## M_DELINQ + FLAG.Job.Self
##
## Df Sum of Sq RSS AIC
## <none> 1.2656e+11 71844
## + M_YOJ 1 41931294 1.2652e+11 71844
## + M_NINQ 1 36078051 1.2652e+11 71844
## + FLAG.Reason.DebtCon 1 1131206 1.2656e+11 71846
summary( lrt_model )
##
## Call:
## lm(formula = TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN +
## IMP_DEBTINC + M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG +
## IMP_CLNO + IMP_VALUE + IMP_YOJ + IMP_MORTDUE + M_MORTDUE +
## FLAG.Reason.HomeImp + M_DELINQ + FLAG.Job.Self, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17087 -2499 -374 1587 58426
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.708e+03 4.783e+02 -11.934 < 2e-16 ***
## M_DEBTINC 6.200e+03 2.209e+02 28.064 < 2e-16 ***
## IMP_DELINQ 1.854e+03 9.011e+01 20.578 < 2e-16 ***
## LOAN 1.366e-01 8.414e-03 16.228 < 2e-16 ***
## IMP_DEBTINC 1.194e+02 1.125e+01 10.613 < 2e-16 ***
## M_VALUE 6.684e+03 6.585e+02 10.150 < 2e-16 ***
## IMP_CLAGE -1.024e+01 1.104e+00 -9.282 < 2e-16 ***
## M_DEROG -2.216e+03 4.160e+02 -5.328 1.05e-07 ***
## IMP_DEROG 9.099e+02 1.120e+02 8.122 5.99e-16 ***
## IMP_CLNO 5.133e+01 9.895e+00 5.187 2.24e-07 ***
## IMP_VALUE 1.511e-02 2.774e-03 5.447 5.42e-08 ***
## IMP_YOJ -3.490e+01 1.219e+01 -2.864 0.004207 **
## IMP_MORTDUE -1.283e-02 3.522e-03 -3.642 0.000274 ***
## M_MORTDUE 1.240e+03 3.356e+02 3.695 0.000223 ***
## FLAG.Reason.HomeImp -6.171e+02 1.945e+02 -3.173 0.001520 **
## M_DELINQ -1.278e+03 4.589e+02 -2.785 0.005377 **
## FLAG.Job.Self 1.134e+03 5.057e+02 2.242 0.025014 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5522 on 4151 degrees of freedom
## Multiple R-squared: 0.4183, Adjusted R-squared: 0.4161
## F-statistic: 186.6 on 16 and 4151 DF, p-value: < 2.2e-16
plr_tree_step = predict( lrt_model, test )
head( plr_tree_step )
## 2 4 8 17 18 22
## 7300.5000 11579.4541 -2273.9803 17308.8567 619.5205 3801.3538
RMSElr_tree_step = sqrt( mean( ( test$TARGET_LOSS_AMT - plr_tree_step )^2 ) )
print( paste("TREE RMSE=", RMSEt ))
## [1] "TREE RMSE= 4996.32928769835"
print( paste("RF RMSE=", RMSEr ))
## [1] "RF RMSE= 4020.42485050438"
print( paste("GB RMSE=", RMSEg ))
## [1] "GB RMSE= 5843.09744413598"
print( paste("LR BACK RMSE=", RMSElr ))
## [1] "LR BACK RMSE= 5449.94944750639"
print( paste("LR TREE RMSE=", RMSElr_tree ))
## [1] "LR TREE RMSE= 5504.81050975738"
print( paste("LR TREE FORWARD STEP RMSE=", RMSElr_tree_step ))
## [1] "LR TREE FORWARD STEP RMSE= 5510.20534361442"
# !!!SUMMARY & ANALYSIS!!!
# Based on the RMSE, the Random Forest method is the best.
# Random Forest RMSE is significantly lower($800+) than other methods,
# Recommending Random Forest because its remarkable accuracy.
# Step 4 - Probability / Severity Model
# Create the data set as 70% of training and 30% test set
sample<- sample(c(TRUE,FALSE),nrow(copy_wk5),replace = TRUE,prob = c(0.7,0.3))
train<- copy_wk5[sample,]
test<- copy_wk5[!sample,]
# Predict Bad Flag Using Logistic Regression
theUpper_LR = glm(TARGET_BAD_FLAG ~ . - TARGET_LOSS_AMT, family = "binomial", data = train)
theLower_LR = glm(TARGET_BAD_FLAG ~ 1, family = "binomial", data = train)
LR_flag = stepAIC(theUpper_LR, direction = "backward", scope = list(lower = theLower_LR, upper = theUpper_LR))
## Start: AIC=2251.09
## TARGET_BAD_FLAG ~ (TARGET_LOSS_AMT + LOAN + IMP_MORTDUE + M_MORTDUE +
## IMP_VALUE + M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG +
## IMP_DELINQ + M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ +
## M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr +
## FLAG.Job.Office + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales +
## FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp) -
## TARGET_LOSS_AMT
##
## Df Deviance AIC
## - FLAG.Reason.DebtCon 1 2195.1 2249.1
## - FLAG.Reason.HomeImp 1 2195.2 2249.2
## - M_NINQ 1 2195.4 2249.4
## - LOAN 1 2195.9 2249.9
## <none> 2195.1 2251.1
## - IMP_CLNO 1 2197.5 2251.5
## - IMP_YOJ 1 2198.1 2252.1
## - M_CLAGE 1 2199.2 2253.2
## - IMP_MORTDUE 1 2199.3 2253.3
## - M_YOJ 1 2200.1 2254.1
## - IMP_VALUE 1 2201.5 2255.5
## - M_MORTDUE 1 2202.0 2256.0
## - FLAG.Job.Office 1 2202.6 2256.6
## - M_DELINQ 1 2208.2 2262.2
## - FLAG.Job.ProfExe 1 2211.6 2265.6
## - M_CLNO 1 2215.2 2269.2
## - FLAG.Job.Mgr 1 2215.3 2269.3
## - FLAG.Job.Self 1 2216.4 2270.4
## - IMP_NINQ 1 2216.8 2270.8
## - FLAG.Job.Other 1 2218.2 2272.2
## - FLAG.Job.Sales 1 2219.5 2273.5
## - M_DEROG 1 2254.7 2308.7
## - IMP_CLAGE 1 2258.1 2312.1
## - IMP_DEROG 1 2259.4 2313.4
## - IMP_DEBTINC 1 2290.8 2344.8
## - M_VALUE 1 2316.4 2370.4
## - IMP_DELINQ 1 2408.4 2462.4
## - M_DEBTINC 1 2853.2 2907.2
##
## Step: AIC=2249.1
## TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + M_NINQ + IMP_CLNO +
## M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office +
## FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self +
## FLAG.Reason.HomeImp
##
## Df Deviance AIC
## - FLAG.Reason.HomeImp 1 2195.3 2247.3
## - M_NINQ 1 2195.4 2247.4
## - LOAN 1 2195.9 2247.9
## <none> 2195.1 2249.1
## - IMP_CLNO 1 2197.5 2249.5
## - IMP_YOJ 1 2198.1 2250.1
## - M_CLAGE 1 2199.2 2251.2
## - IMP_MORTDUE 1 2199.3 2251.3
## - M_YOJ 1 2200.2 2252.2
## - IMP_VALUE 1 2201.5 2253.5
## - M_MORTDUE 1 2202.0 2254.0
## - FLAG.Job.Office 1 2203.3 2255.3
## - M_DELINQ 1 2208.2 2260.2
## - FLAG.Job.ProfExe 1 2212.9 2264.9
## - M_CLNO 1 2215.2 2267.2
## - IMP_NINQ 1 2216.8 2268.8
## - FLAG.Job.Mgr 1 2216.8 2268.8
## - FLAG.Job.Self 1 2217.4 2269.4
## - FLAG.Job.Other 1 2220.4 2272.4
## - FLAG.Job.Sales 1 2220.7 2272.7
## - M_DEROG 1 2254.7 2306.7
## - IMP_CLAGE 1 2258.4 2310.4
## - IMP_DEROG 1 2259.4 2311.4
## - IMP_DEBTINC 1 2290.9 2342.9
## - M_VALUE 1 2317.5 2369.5
## - IMP_DELINQ 1 2408.6 2460.6
## - M_DEBTINC 1 2853.7 2905.7
##
## Step: AIC=2247.29
## TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + M_NINQ + IMP_CLNO +
## M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office +
## FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self
##
## Df Deviance AIC
## - M_NINQ 1 2195.6 2245.6
## - LOAN 1 2196.3 2246.3
## <none> 2195.3 2247.3
## - IMP_CLNO 1 2197.9 2247.9
## - IMP_YOJ 1 2198.3 2248.3
## - M_CLAGE 1 2199.3 2249.3
## - IMP_MORTDUE 1 2199.7 2249.7
## - M_YOJ 1 2200.6 2250.6
## - IMP_VALUE 1 2202.0 2252.0
## - M_MORTDUE 1 2202.8 2252.8
## - FLAG.Job.Office 1 2203.7 2253.7
## - M_DELINQ 1 2208.4 2258.4
## - FLAG.Job.ProfExe 1 2213.6 2263.6
## - M_CLNO 1 2215.8 2265.8
## - IMP_NINQ 1 2216.8 2266.8
## - FLAG.Job.Mgr 1 2217.4 2267.4
## - FLAG.Job.Self 1 2218.6 2268.6
## - FLAG.Job.Other 1 2221.2 2271.2
## - FLAG.Job.Sales 1 2221.2 2271.2
## - M_DEROG 1 2255.5 2305.5
## - IMP_CLAGE 1 2258.4 2308.4
## - IMP_DEROG 1 2260.0 2310.0
## - IMP_DEBTINC 1 2291.3 2341.3
## - M_VALUE 1 2317.7 2367.7
## - IMP_DELINQ 1 2409.1 2459.1
## - M_DEBTINC 1 2855.8 2905.8
##
## Step: AIC=2245.57
## TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO +
## IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office +
## FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self
##
## Df Deviance AIC
## - LOAN 1 2196.6 2244.6
## <none> 2195.6 2245.6
## - IMP_CLNO 1 2198.2 2246.2
## - IMP_YOJ 1 2198.6 2246.6
## - M_CLAGE 1 2199.6 2247.6
## - IMP_MORTDUE 1 2199.9 2247.9
## - M_YOJ 1 2201.3 2249.3
## - IMP_VALUE 1 2202.2 2250.2
## - M_MORTDUE 1 2202.9 2250.9
## - FLAG.Job.Office 1 2203.9 2251.9
## - FLAG.Job.ProfExe 1 2213.9 2261.9
## - M_DELINQ 1 2215.7 2263.7
## - M_CLNO 1 2216.8 2264.8
## - IMP_NINQ 1 2217.4 2265.4
## - FLAG.Job.Mgr 1 2217.7 2265.7
## - FLAG.Job.Self 1 2218.6 2266.6
## - FLAG.Job.Other 1 2221.5 2269.5
## - FLAG.Job.Sales 1 2221.6 2269.6
## - M_DEROG 1 2255.8 2303.8
## - IMP_CLAGE 1 2258.5 2306.5
## - IMP_DEROG 1 2260.1 2308.1
## - IMP_DEBTINC 1 2293.6 2341.6
## - M_VALUE 1 2320.0 2368.0
## - IMP_DELINQ 1 2409.1 2457.1
## - M_DEBTINC 1 2857.6 2905.6
##
## Step: AIC=2244.59
## TARGET_BAD_FLAG ~ IMP_MORTDUE + M_MORTDUE + IMP_VALUE + M_VALUE +
## IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + M_DELINQ +
## IMP_CLAGE + M_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC +
## M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + FLAG.Job.Other +
## FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self
##
## Df Deviance AIC
## <none> 2196.6 2244.6
## - IMP_CLNO 1 2199.5 2245.5
## - IMP_YOJ 1 2200.1 2246.1
## - M_CLAGE 1 2200.4 2246.4
## - IMP_MORTDUE 1 2200.9 2246.9
## - IMP_VALUE 1 2202.3 2248.3
## - M_YOJ 1 2202.9 2248.9
## - M_MORTDUE 1 2203.7 2249.7
## - FLAG.Job.Office 1 2204.8 2250.8
## - FLAG.Job.ProfExe 1 2214.9 2260.9
## - IMP_NINQ 1 2217.7 2263.7
## - M_DELINQ 1 2218.1 2264.1
## - FLAG.Job.Mgr 1 2218.5 2264.5
## - FLAG.Job.Self 1 2219.0 2265.0
## - M_CLNO 1 2219.3 2265.3
## - FLAG.Job.Other 1 2222.4 2268.4
## - FLAG.Job.Sales 1 2223.2 2269.2
## - M_DEROG 1 2256.1 2302.1
## - IMP_CLAGE 1 2260.4 2306.4
## - IMP_DEROG 1 2261.0 2307.0
## - IMP_DEBTINC 1 2293.9 2339.9
## - M_VALUE 1 2320.4 2366.4
## - IMP_DELINQ 1 2411.8 2457.8
## - M_DEBTINC 1 2875.7 2921.7
summary(LR_flag)
##
## Call:
## glm(formula = TARGET_BAD_FLAG ~ IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO +
## IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office +
## FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self,
## family = "binomial", data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -7.102e+00 6.556e-01 -10.832 < 2e-16 ***
## IMP_MORTDUE -4.207e-06 2.075e-06 -2.027 0.042664 *
## M_MORTDUE 6.703e-01 2.481e-01 2.702 0.006888 **
## IMP_VALUE 3.346e-06 1.426e-06 2.347 0.018912 *
## M_VALUE 5.384e+00 6.393e-01 8.422 < 2e-16 ***
## IMP_YOJ -1.571e-02 8.484e-03 -1.851 0.064130 .
## M_YOJ -5.722e-01 2.333e-01 -2.453 0.014187 *
## IMP_DEROG 5.418e-01 7.453e-02 7.269 3.61e-13 ***
## M_DEROG -2.293e+00 3.367e-01 -6.810 9.75e-12 ***
## IMP_DELINQ 7.816e-01 6.226e-02 12.555 < 2e-16 ***
## M_DELINQ -1.908e+00 4.495e-01 -4.244 2.20e-05 ***
## IMP_CLAGE -6.288e-03 8.306e-04 -7.571 3.71e-14 ***
## M_CLAGE 8.359e-01 4.239e-01 1.972 0.048599 *
## IMP_NINQ 1.417e-01 3.068e-02 4.620 3.83e-06 ***
## IMP_CLNO -1.099e-02 6.424e-03 -1.711 0.087086 .
## M_CLNO 3.331e+00 7.307e-01 4.558 5.15e-06 ***
## IMP_DEBTINC 9.388e-02 1.079e-02 8.700 < 2e-16 ***
## M_DEBTINC 2.812e+00 1.154e-01 24.373 < 2e-16 ***
## FLAG.Job.Mgr 2.163e+00 5.179e-01 4.177 2.95e-05 ***
## FLAG.Job.Office 1.396e+00 5.234e-01 2.667 0.007643 **
## FLAG.Job.Other 2.214e+00 5.018e-01 4.412 1.02e-05 ***
## FLAG.Job.ProfExe 1.985e+00 5.172e-01 3.838 0.000124 ***
## FLAG.Job.Sales 3.068e+00 6.220e-01 4.933 8.08e-07 ***
## FLAG.Job.Self 2.552e+00 5.797e-01 4.401 1.08e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4168.3 on 4189 degrees of freedom
## Residual deviance: 2196.6 on 4166 degrees of freedom
## AIC: 2244.6
##
## Number of Fisher Scoring iterations: 6
# Predict LOSS Using Linear Regression
train_subset = subset(train, TARGET_BAD_FLAG == 1)
theUpperloss_LR = lm(TARGET_LOSS_AMT ~ . , data = train_subset)
theLowerloss_LR = lm(TARGET_LOSS_AMT ~ 1, data = train_subset)
LR_Loss = stepAIC(theUpperloss_LR, direction = "backward", scope = list(lower = theLowerloss_LR, upper = theUpperloss_LR))
## Start: AIC=13598.56
## TARGET_LOSS_AMT ~ TARGET_BAD_FLAG + LOAN + IMP_MORTDUE + M_MORTDUE +
## IMP_VALUE + M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG +
## IMP_DELINQ + M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ +
## M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr +
## FLAG.Job.Office + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales +
## FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
##
##
## Step: AIC=13598.56
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + M_NINQ + IMP_CLNO +
## M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office +
## FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self +
## FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## - IMP_NINQ 1 6.0976e+05 1.0309e+10 13597
## - M_DELINQ 1 8.4722e+05 1.0309e+10 13597
## - M_VALUE 1 2.9678e+06 1.0312e+10 13597
## - FLAG.Job.Office 1 3.1046e+06 1.0312e+10 13597
## - M_YOJ 1 3.8661e+06 1.0312e+10 13597
## - FLAG.Job.Other 1 4.5477e+06 1.0313e+10 13597
## - FLAG.Job.Mgr 1 6.9592e+06 1.0316e+10 13597
## - FLAG.Job.ProfExe 1 1.1109e+07 1.0320e+10 13598
## - M_NINQ 1 1.3113e+07 1.0322e+10 13598
## - FLAG.Job.Sales 1 2.1519e+07 1.0330e+10 13598
## - M_DEROG 1 2.4887e+07 1.0333e+10 13599
## <none> 1.0309e+10 13599
## - FLAG.Job.Self 1 2.5532e+07 1.0334e+10 13599
## - FLAG.Reason.HomeImp 1 6.0094e+07 1.0369e+10 13601
## - IMP_DEROG 1 9.4108e+07 1.0403e+10 13604
## - M_MORTDUE 1 1.0032e+08 1.0409e+10 13605
## - IMP_MORTDUE 1 1.0125e+08 1.0410e+10 13605
## - M_CLNO 1 1.5775e+08 1.0466e+10 13609
## - FLAG.Reason.DebtCon 1 2.3273e+08 1.0541e+10 13615
## - IMP_VALUE 1 2.3457e+08 1.0543e+10 13615
## - IMP_YOJ 1 4.0292e+08 1.0711e+10 13628
## - M_CLAGE 1 4.6109e+08 1.0770e+10 13633
## - IMP_DEBTINC 1 1.1473e+09 1.1456e+10 13684
## - IMP_DELINQ 1 1.6127e+09 1.1921e+10 13717
## - IMP_CLAGE 1 1.9883e+09 1.2297e+10 13743
## - IMP_CLNO 1 3.2159e+09 1.3524e+10 13822
## - M_DEBTINC 1 5.7999e+09 1.6108e+10 13967
## - LOAN 1 5.0514e+10 6.0822e+10 15068
##
## Step: AIC=13596.61
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## M_DELINQ + IMP_CLAGE + M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO +
## IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office +
## FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self +
## FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## - M_DELINQ 1 8.1916e+05 1.0310e+10 13595
## - FLAG.Job.Office 1 3.1771e+06 1.0312e+10 13595
## - M_VALUE 1 3.1811e+06 1.0312e+10 13595
## - M_YOJ 1 3.7335e+06 1.0313e+10 13595
## - FLAG.Job.Other 1 4.6211e+06 1.0314e+10 13595
## - FLAG.Job.Mgr 1 7.1038e+06 1.0316e+10 13595
## - FLAG.Job.ProfExe 1 1.1259e+07 1.0320e+10 13596
## - M_NINQ 1 1.3370e+07 1.0323e+10 13596
## - FLAG.Job.Sales 1 2.1297e+07 1.0330e+10 13596
## <none> 1.0309e+10 13597
## - M_DEROG 1 2.4909e+07 1.0334e+10 13597
## - FLAG.Job.Self 1 2.5308e+07 1.0334e+10 13597
## - FLAG.Reason.HomeImp 1 5.9645e+07 1.0369e+10 13599
## - IMP_DEROG 1 9.6471e+07 1.0406e+10 13602
## - M_MORTDUE 1 9.9727e+07 1.0409e+10 13603
## - IMP_MORTDUE 1 1.0217e+08 1.0411e+10 13603
## - M_CLNO 1 1.5916e+08 1.0468e+10 13607
## - FLAG.Reason.DebtCon 1 2.3281e+08 1.0542e+10 13613
## - IMP_VALUE 1 2.3559e+08 1.0545e+10 13613
## - IMP_YOJ 1 4.0791e+08 1.0717e+10 13627
## - M_CLAGE 1 4.6816e+08 1.0777e+10 13631
## - IMP_DEBTINC 1 1.1467e+09 1.1456e+10 13682
## - IMP_DELINQ 1 1.6123e+09 1.1921e+10 13715
## - IMP_CLAGE 1 2.0135e+09 1.2323e+10 13742
## - IMP_CLNO 1 3.2323e+09 1.3541e+10 13821
## - M_DEBTINC 1 5.8622e+09 1.6171e+10 13968
## - LOAN 1 5.0645e+10 6.0954e+10 15068
##
## Step: AIC=13594.68
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## IMP_CLAGE + M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC +
## M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + FLAG.Job.Other +
## FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.DebtCon +
## FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## - M_VALUE 1 2.9232e+06 1.0313e+10 13593
## - FLAG.Job.Office 1 3.1345e+06 1.0313e+10 13593
## - M_YOJ 1 3.8521e+06 1.0314e+10 13593
## - FLAG.Job.Other 1 4.5520e+06 1.0315e+10 13593
## - FLAG.Job.Mgr 1 6.9977e+06 1.0317e+10 13593
## - FLAG.Job.ProfExe 1 1.1172e+07 1.0321e+10 13594
## - M_NINQ 1 1.3582e+07 1.0324e+10 13594
## - FLAG.Job.Sales 1 2.2234e+07 1.0332e+10 13594
## <none> 1.0310e+10 13595
## - FLAG.Job.Self 1 2.5351e+07 1.0335e+10 13595
## - M_DEROG 1 2.9633e+07 1.0340e+10 13595
## - FLAG.Reason.HomeImp 1 5.9738e+07 1.0370e+10 13598
## - IMP_DEROG 1 9.6785e+07 1.0407e+10 13600
## - M_MORTDUE 1 9.9236e+07 1.0409e+10 13601
## - IMP_MORTDUE 1 1.0186e+08 1.0412e+10 13601
## - M_CLNO 1 2.1909e+08 1.0529e+10 13610
## - FLAG.Reason.DebtCon 1 2.3299e+08 1.0543e+10 13611
## - IMP_VALUE 1 2.3478e+08 1.0545e+10 13611
## - IMP_YOJ 1 4.0805e+08 1.0718e+10 13625
## - M_CLAGE 1 4.6887e+08 1.0779e+10 13630
## - IMP_DEBTINC 1 1.1462e+09 1.1456e+10 13680
## - IMP_DELINQ 1 1.6131e+09 1.1923e+10 13713
## - IMP_CLAGE 1 2.0135e+09 1.2324e+10 13741
## - IMP_CLNO 1 3.2317e+09 1.3542e+10 13819
## - M_DEBTINC 1 5.8673e+09 1.6177e+10 13966
## - LOAN 1 5.0699e+10 6.1009e+10 15067
##
## Step: AIC=13592.91
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE +
## M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC +
## FLAG.Job.Mgr + FLAG.Job.Office + FLAG.Job.Other + FLAG.Job.ProfExe +
## FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## - FLAG.Job.Office 1 3.0359e+06 1.0316e+10 13591
## - M_YOJ 1 4.0598e+06 1.0317e+10 13591
## - FLAG.Job.Other 1 4.3675e+06 1.0317e+10 13591
## - FLAG.Job.Mgr 1 6.8565e+06 1.0320e+10 13592
## - FLAG.Job.ProfExe 1 1.0893e+07 1.0324e+10 13592
## - M_NINQ 1 1.3094e+07 1.0326e+10 13592
## - FLAG.Job.Sales 1 2.2678e+07 1.0336e+10 13593
## <none> 1.0313e+10 13593
## - FLAG.Job.Self 1 2.5772e+07 1.0339e+10 13593
## - M_DEROG 1 2.7925e+07 1.0341e+10 13593
## - FLAG.Reason.HomeImp 1 6.2237e+07 1.0375e+10 13596
## - IMP_DEROG 1 9.5082e+07 1.0408e+10 13598
## - IMP_MORTDUE 1 9.9413e+07 1.0412e+10 13599
## - M_MORTDUE 1 1.0060e+08 1.0414e+10 13599
## - M_CLNO 1 2.1826e+08 1.0531e+10 13608
## - IMP_VALUE 1 2.3188e+08 1.0545e+10 13609
## - FLAG.Reason.DebtCon 1 2.3900e+08 1.0552e+10 13610
## - IMP_YOJ 1 4.1145e+08 1.0724e+10 13623
## - M_CLAGE 1 4.6637e+08 1.0779e+10 13628
## - IMP_DEBTINC 1 1.1610e+09 1.1474e+10 13679
## - IMP_DELINQ 1 1.6111e+09 1.1924e+10 13711
## - IMP_CLAGE 1 2.0217e+09 1.2335e+10 13739
## - IMP_CLNO 1 3.2478e+09 1.3561e+10 13818
## - M_DEBTINC 1 5.8670e+09 1.6180e+10 13964
## - LOAN 1 5.1144e+10 6.1457e+10 15071
##
## Step: AIC=13591.15
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE +
## M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC +
## FLAG.Job.Mgr + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales +
## FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## - FLAG.Job.Other 1 1.5954e+06 1.0318e+10 13589
## - M_YOJ 1 5.4362e+06 1.0321e+10 13590
## - FLAG.Job.Mgr 1 5.9400e+06 1.0322e+10 13590
## - M_NINQ 1 1.3705e+07 1.0330e+10 13590
## - FLAG.Job.ProfExe 1 1.6310e+07 1.0332e+10 13590
## <none> 1.0316e+10 13591
## - M_DEROG 1 2.7752e+07 1.0344e+10 13591
## - FLAG.Reason.HomeImp 1 5.9323e+07 1.0375e+10 13594
## - FLAG.Job.Sales 1 8.6207e+07 1.0402e+10 13596
## - M_MORTDUE 1 9.8109e+07 1.0414e+10 13597
## - IMP_DEROG 1 9.9756e+07 1.0416e+10 13597
## - IMP_MORTDUE 1 9.9906e+07 1.0416e+10 13597
## - FLAG.Job.Self 1 1.3831e+08 1.0454e+10 13600
## - M_CLNO 1 2.2521e+08 1.0541e+10 13607
## - IMP_VALUE 1 2.3068e+08 1.0547e+10 13608
## - FLAG.Reason.DebtCon 1 2.4079e+08 1.0557e+10 13608
## - IMP_YOJ 1 4.2274e+08 1.0739e+10 13622
## - M_CLAGE 1 4.6692e+08 1.0783e+10 13626
## - IMP_DEBTINC 1 1.1583e+09 1.1474e+10 13677
## - IMP_DELINQ 1 1.6084e+09 1.1924e+10 13709
## - IMP_CLAGE 1 2.0192e+09 1.2335e+10 13737
## - IMP_CLNO 1 3.2461e+09 1.3562e+10 13816
## - M_DEBTINC 1 5.8766e+09 1.6193e+10 13963
## - LOAN 1 5.1170e+10 6.1486e+10 15069
##
## Step: AIC=13589.28
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE +
## M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC +
## FLAG.Job.Mgr + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self +
## FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## - FLAG.Job.Mgr 1 4.5908e+06 1.0322e+10 13588
## - M_YOJ 1 5.1221e+06 1.0323e+10 13588
## - M_NINQ 1 1.3716e+07 1.0331e+10 13588
## - FLAG.Job.ProfExe 1 1.8408e+07 1.0336e+10 13589
## <none> 1.0318e+10 13589
## - M_DEROG 1 2.7930e+07 1.0345e+10 13590
## - FLAG.Reason.HomeImp 1 5.8585e+07 1.0376e+10 13592
## - M_MORTDUE 1 9.9712e+07 1.0417e+10 13595
## - IMP_DEROG 1 1.0093e+08 1.0418e+10 13595
## - IMP_MORTDUE 1 1.0151e+08 1.0419e+10 13595
## - FLAG.Job.Sales 1 1.1004e+08 1.0428e+10 13596
## - FLAG.Job.Self 1 1.9710e+08 1.0515e+10 13603
## - M_CLNO 1 2.2781e+08 1.0545e+10 13605
## - IMP_VALUE 1 2.3084e+08 1.0548e+10 13606
## - FLAG.Reason.DebtCon 1 2.4022e+08 1.0558e+10 13606
## - IMP_YOJ 1 4.2336e+08 1.0741e+10 13621
## - M_CLAGE 1 4.6830e+08 1.0786e+10 13624
## - IMP_DEBTINC 1 1.1592e+09 1.1477e+10 13676
## - IMP_DELINQ 1 1.6268e+09 1.1944e+10 13709
## - IMP_CLAGE 1 2.0189e+09 1.2336e+10 13735
## - IMP_CLNO 1 3.2474e+09 1.3565e+10 13814
## - M_DEBTINC 1 5.8750e+09 1.6193e+10 13961
## - LOAN 1 5.1250e+10 6.1568e+10 15068
##
## Step: AIC=13587.65
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE +
## M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC +
## FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.DebtCon +
## FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## - M_YOJ 1 6.0563e+06 1.0328e+10 13586
## - FLAG.Job.ProfExe 1 1.4775e+07 1.0337e+10 13587
## - M_NINQ 1 1.4799e+07 1.0337e+10 13587
## <none> 1.0322e+10 13588
## - M_DEROG 1 2.9931e+07 1.0352e+10 13588
## - FLAG.Reason.HomeImp 1 5.8002e+07 1.0380e+10 13590
## - M_MORTDUE 1 9.7574e+07 1.0420e+10 13594
## - IMP_MORTDUE 1 9.8132e+07 1.0420e+10 13594
## - IMP_DEROG 1 9.8651e+07 1.0421e+10 13594
## - FLAG.Job.Sales 1 1.1705e+08 1.0439e+10 13595
## - FLAG.Job.Self 1 2.1021e+08 1.0532e+10 13602
## - M_CLNO 1 2.2937e+08 1.0552e+10 13604
## - IMP_VALUE 1 2.3161e+08 1.0554e+10 13604
## - FLAG.Reason.DebtCon 1 2.3908e+08 1.0561e+10 13605
## - IMP_YOJ 1 4.2604e+08 1.0748e+10 13619
## - M_CLAGE 1 4.7005e+08 1.0792e+10 13623
## - IMP_DEBTINC 1 1.1592e+09 1.1481e+10 13674
## - IMP_DELINQ 1 1.6269e+09 1.1949e+10 13707
## - IMP_CLAGE 1 2.0149e+09 1.2337e+10 13734
## - IMP_CLNO 1 3.2535e+09 1.3576e+10 13813
## - M_DEBTINC 1 5.8748e+09 1.6197e+10 13959
## - LOAN 1 5.1322e+10 6.1645e+10 15067
##
## Step: AIC=13586.14
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## IMP_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE +
## M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC +
## FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.DebtCon +
## FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## - M_NINQ 1 1.1730e+07 1.0340e+10 13585
## - FLAG.Job.ProfExe 1 1.6347e+07 1.0345e+10 13585
## <none> 1.0328e+10 13586
## - M_DEROG 1 2.9684e+07 1.0358e+10 13586
## - FLAG.Reason.HomeImp 1 5.6273e+07 1.0384e+10 13589
## - M_MORTDUE 1 9.2571e+07 1.0421e+10 13592
## - IMP_MORTDUE 1 9.6484e+07 1.0425e+10 13592
## - IMP_DEROG 1 9.6980e+07 1.0425e+10 13592
## - FLAG.Job.Sales 1 1.1532e+08 1.0444e+10 13593
## - FLAG.Job.Self 1 2.1104e+08 1.0539e+10 13601
## - M_CLNO 1 2.2619e+08 1.0554e+10 13602
## - IMP_VALUE 1 2.3377e+08 1.0562e+10 13603
## - FLAG.Reason.DebtCon 1 2.3652e+08 1.0565e+10 13603
## - IMP_YOJ 1 4.2889e+08 1.0757e+10 13618
## - M_CLAGE 1 4.7533e+08 1.0804e+10 13621
## - IMP_DEBTINC 1 1.1598e+09 1.1488e+10 13672
## - IMP_DELINQ 1 1.6224e+09 1.1951e+10 13705
## - IMP_CLAGE 1 2.0091e+09 1.2337e+10 13732
## - IMP_CLNO 1 3.2511e+09 1.3579e+10 13811
## - M_DEBTINC 1 5.8732e+09 1.6201e+10 13957
## - LOAN 1 5.1444e+10 6.1772e+10 15067
##
## Step: AIC=13585.08
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## IMP_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE +
## M_CLAGE + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.ProfExe +
## FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## - FLAG.Job.ProfExe 1 1.6402e+07 1.0356e+10 13584
## <none> 1.0340e+10 13585
## - M_DEROG 1 2.8174e+07 1.0368e+10 13585
## - FLAG.Reason.HomeImp 1 5.6391e+07 1.0396e+10 13588
## - M_MORTDUE 1 8.9116e+07 1.0429e+10 13590
## - IMP_MORTDUE 1 9.5453e+07 1.0435e+10 13591
## - IMP_DEROG 1 9.6820e+07 1.0437e+10 13591
## - FLAG.Job.Sales 1 1.1628e+08 1.0456e+10 13592
## - FLAG.Job.Self 1 2.1606e+08 1.0556e+10 13600
## - IMP_VALUE 1 2.3838e+08 1.0578e+10 13602
## - FLAG.Reason.DebtCon 1 2.4090e+08 1.0581e+10 13602
## - M_CLNO 1 2.6088e+08 1.0601e+10 13604
## - IMP_YOJ 1 4.2197e+08 1.0762e+10 13616
## - M_CLAGE 1 4.7183e+08 1.0812e+10 13620
## - IMP_DEBTINC 1 1.1693e+09 1.1509e+10 13672
## - IMP_DELINQ 1 1.6163e+09 1.1956e+10 13704
## - IMP_CLAGE 1 1.9995e+09 1.2339e+10 13730
## - IMP_CLNO 1 3.2710e+09 1.3611e+10 13811
## - M_DEBTINC 1 5.8850e+09 1.6225e+10 13957
## - LOAN 1 5.1499e+10 6.1839e+10 15066
##
## Step: AIC=13584.39
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE +
## IMP_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE +
## M_CLAGE + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Sales +
## FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
##
## Df Sum of Sq RSS AIC
## <none> 1.0356e+10 13584
## - M_DEROG 1 3.0998e+07 1.0387e+10 13585
## - FLAG.Reason.HomeImp 1 5.3866e+07 1.0410e+10 13587
## - M_MORTDUE 1 8.3528e+07 1.0440e+10 13589
## - IMP_MORTDUE 1 8.5303e+07 1.0442e+10 13589
## - IMP_DEROG 1 9.5960e+07 1.0452e+10 13590
## - FLAG.Job.Sales 1 1.2538e+08 1.0482e+10 13592
## - FLAG.Job.Self 1 2.3628e+08 1.0593e+10 13601
## - FLAG.Reason.DebtCon 1 2.4112e+08 1.0597e+10 13602
## - IMP_VALUE 1 2.4391e+08 1.0600e+10 13602
## - M_CLNO 1 2.5435e+08 1.0611e+10 13602
## - IMP_YOJ 1 4.3694e+08 1.0793e+10 13617
## - M_CLAGE 1 4.6531e+08 1.0822e+10 13619
## - IMP_DEBTINC 1 1.1604e+09 1.1517e+10 13670
## - IMP_DELINQ 1 1.6242e+09 1.1980e+10 13703
## - IMP_CLAGE 1 2.0759e+09 1.2432e+10 13734
## - IMP_CLNO 1 3.2580e+09 1.3614e+10 13809
## - M_DEBTINC 1 5.8692e+09 1.6225e+10 13955
## - LOAN 1 5.1573e+10 6.1929e+10 15065
summary(LR_Loss)
##
## Call:
## lm(formula = TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE +
## IMP_VALUE + IMP_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ +
## IMP_CLAGE + M_CLAGE + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC +
## FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp,
## data = train_subset)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22411.0 -1149.3 121.5 1711.2 13821.3
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.202e+04 9.388e+02 -12.804 < 2e-16 ***
## LOAN 8.057e-01 1.269e-02 63.511 < 2e-16 ***
## IMP_MORTDUE 9.825e-03 3.804e-03 2.583 0.00997 **
## M_MORTDUE -1.224e+03 4.789e+02 -2.556 0.01077 *
## IMP_VALUE -1.173e-02 2.686e-03 -4.368 1.42e-05 ***
## IMP_YOJ -1.094e+02 1.871e+01 -5.846 7.30e-09 ***
## IMP_DEROG 2.377e+02 8.677e+01 2.740 0.00629 **
## M_DEROG 1.159e+03 7.444e+02 1.557 0.11985
## IMP_DELINQ 7.942e+02 7.046e+01 11.271 < 2e-16 ***
## IMP_CLAGE -2.130e+01 1.672e+00 -12.742 < 2e-16 ***
## M_CLAGE -5.753e+03 9.536e+02 -6.033 2.45e-09 ***
## IMP_CLNO 2.032e+02 1.273e+01 15.963 < 2e-16 ***
## M_CLNO 6.065e+03 1.360e+03 4.460 9.34e-06 ***
## IMP_DEBTINC 1.342e+02 1.409e+01 9.527 < 2e-16 ***
## M_DEBTINC 5.868e+03 2.739e+02 21.425 < 2e-16 ***
## FLAG.Job.Sales 2.567e+03 8.196e+02 3.131 0.00180 **
## FLAG.Job.Self 2.505e+03 5.826e+02 4.299 1.93e-05 ***
## FLAG.Reason.DebtCon 2.718e+03 6.260e+02 4.343 1.59e-05 ***
## FLAG.Reason.HomeImp 1.339e+03 6.522e+02 2.053 0.04043 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3576 on 810 degrees of freedom
## Multiple R-squared: 0.9033, Adjusted R-squared: 0.9011
## F-statistic: 420.1 on 18 and 810 DF, p-value: < 2.2e-16
# Predictions
P_Badflag = predict(LR_flag, test)
P_Loss = predict(LR_Loss, test)
# Calculate RMSE for individual predictions
RMSE_Badflag = sqrt(mean((test$TARGET_BAD_FLAG - P_Badflag)^2))
RMSE_Loss = sqrt(mean((test$TARGET_LOSS_AMT - P_Loss)^2))
print(paste("Bad Flag Prediction RMSE =", RMSE_Badflag))
## [1] "Bad Flag Prediction RMSE = 3.29460273953523"
print(paste("Loss Prediction RMSE =", RMSE_Loss))
## [1] "Loss Prediction RMSE = 12227.0972186223"
# Multiply predictions
P_Multiply = P_Badflag * P_Loss
# Calculate RMSE for multiplied predictions
RMSE_Multiply = sqrt(mean((test$TARGET_LOSS_AMT - P_Multiply)^2))
print(paste("Regression Method Multiplied RMSE =", RMSE_Multiply))
## [1] "Regression Method Multiplied RMSE = 45142.2425164878"
# !!! SUMMARY & ANALYSIS !!!
# Loss Prediction RMSE is way too large indicating large error
# Recommending using regular regression method for simplicity and accuracy