# Analysis and Summary are included at the end of each step!
# Analysis and Summary are included at the end of each step!
# Analysis and Summary are included at the end of each step!

# Packages Preparation
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
library(gbm)
## Loaded gbm 2.1.9
## This version of gbm is no longer under development. Consider transitioning to gbm3, https://github.com/gbm-developers/gbm3
library(rpart)
library( rpart.plot )
library( ROCR )
library(MASS)

# Step 1 - Reading Data and Models from Week5
wk5 <- read.csv(file.choose())
str(wk5)
## 'data.frame':    5960 obs. of  29 variables:
##  $ TARGET_BAD_FLAG    : int  1 1 1 1 0 1 1 1 1 1 ...
##  $ TARGET_LOSS_AMT    : int  641 1109 767 1425 0 335 1841 373 1217 1523 ...
##  $ LOAN               : int  1100 1300 1500 1500 1700 1700 1800 1800 2000 2000 ...
##  $ IMP_MORTDUE        : num  25860 70053 13500 65000 97800 ...
##  $ M_MORTDUE          : int  0 0 0 1 0 0 0 0 0 1 ...
##  $ IMP_VALUE          : num  39025 68400 16700 89000 112000 ...
##  $ M_VALUE            : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ IMP_YOJ            : num  10.5 7 4 7 3 9 5 11 3 16 ...
##  $ M_YOJ              : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ IMP_DEROG          : int  0 0 0 1 0 0 3 0 0 0 ...
##  $ M_DEROG            : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ IMP_DELINQ         : int  0 2 0 1 0 0 2 0 2 0 ...
##  $ M_DELINQ           : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ IMP_CLAGE          : num  94.4 121.8 149.5 174 93.3 ...
##  $ M_CLAGE            : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ IMP_NINQ           : int  1 0 1 1 0 1 1 0 1 0 ...
##  $ M_NINQ             : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ IMP_CLNO           : int  9 14 10 20 14 8 17 8 12 13 ...
##  $ M_CLNO             : int  0 0 0 1 0 0 0 0 0 0 ...
##  $ IMP_DEBTINC        : num  35 35 35 35 35 ...
##  $ M_DEBTINC          : int  1 1 1 1 1 0 1 0 1 1 ...
##  $ FLAG.Job.Mgr       : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG.Job.Office    : int  0 0 0 0 1 0 0 0 0 0 ...
##  $ FLAG.Job.Other     : int  1 1 1 0 0 1 1 1 1 0 ...
##  $ FLAG.Job.ProfExe   : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG.Job.Sales     : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ FLAG.Job.Self      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG.Reason.DebtCon: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FLAG.Reason.HomeImp: int  1 1 1 0 1 1 1 1 1 1 ...
summary(wk5)
##  TARGET_BAD_FLAG  TARGET_LOSS_AMT      LOAN        IMP_MORTDUE    
##  Min.   :0.0000   Min.   :    0   Min.   : 1100   Min.   :  2063  
##  1st Qu.:0.0000   1st Qu.:    0   1st Qu.:11100   1st Qu.: 48139  
##  Median :0.0000   Median :    0   Median :16300   Median : 65000  
##  Mean   :0.1995   Mean   : 2676   Mean   :18608   Mean   : 72999  
##  3rd Qu.:0.0000   3rd Qu.:    0   3rd Qu.:23300   3rd Qu.: 88200  
##  Max.   :1.0000   Max.   :78987   Max.   :89900   Max.   :399550  
##    M_MORTDUE         IMP_VALUE         M_VALUE           IMP_YOJ      
##  Min.   :0.00000   Min.   :  8000   Min.   :0.00000   Min.   : 0.000  
##  1st Qu.:0.00000   1st Qu.: 66490   1st Qu.:0.00000   1st Qu.: 3.000  
##  Median :0.00000   Median : 89000   Median :0.00000   Median : 7.000  
##  Mean   :0.08691   Mean   :101536   Mean   :0.01879   Mean   : 8.756  
##  3rd Qu.:0.00000   3rd Qu.:119005   3rd Qu.:0.00000   3rd Qu.:12.000  
##  Max.   :1.00000   Max.   :855909   Max.   :1.00000   Max.   :41.000  
##      M_YOJ           IMP_DEROG          M_DEROG         IMP_DELINQ    
##  Min.   :0.00000   Min.   : 0.0000   Min.   :0.0000   Min.   : 0.000  
##  1st Qu.:0.00000   1st Qu.: 0.0000   1st Qu.:0.0000   1st Qu.: 0.000  
##  Median :0.00000   Median : 0.0000   Median :0.0000   Median : 0.000  
##  Mean   :0.08641   Mean   : 0.3431   Mean   :0.1188   Mean   : 0.503  
##  3rd Qu.:0.00000   3rd Qu.: 0.0000   3rd Qu.:0.0000   3rd Qu.: 1.000  
##  Max.   :1.00000   Max.   :10.0000   Max.   :1.0000   Max.   :15.000  
##     M_DELINQ         IMP_CLAGE         M_CLAGE           IMP_NINQ    
##  Min.   :0.00000   Min.   :   0.0   Min.   :0.00000   Min.   : 0.00  
##  1st Qu.:0.00000   1st Qu.: 117.4   1st Qu.:0.00000   1st Qu.: 0.00  
##  Median :0.00000   Median : 174.0   Median :0.00000   Median : 1.00  
##  Mean   :0.09732   Mean   : 179.5   Mean   :0.05168   Mean   : 1.17  
##  3rd Qu.:0.00000   3rd Qu.: 227.1   3rd Qu.:0.00000   3rd Qu.: 2.00  
##  Max.   :1.00000   Max.   :1168.2   Max.   :1.00000   Max.   :17.00  
##      M_NINQ           IMP_CLNO         M_CLNO         IMP_DEBTINC      
##  Min.   :0.00000   Min.   : 0.00   Min.   :0.00000   Min.   :  0.5245  
##  1st Qu.:0.00000   1st Qu.:15.00   1st Qu.:0.00000   1st Qu.: 30.7632  
##  Median :0.00000   Median :20.00   Median :0.00000   Median : 35.0000  
##  Mean   :0.08557   Mean   :21.25   Mean   :0.03725   Mean   : 34.0393  
##  3rd Qu.:0.00000   3rd Qu.:26.00   3rd Qu.:0.00000   3rd Qu.: 37.9499  
##  Max.   :1.00000   Max.   :71.00   Max.   :1.00000   Max.   :203.3122  
##    M_DEBTINC       FLAG.Job.Mgr    FLAG.Job.Office  FLAG.Job.Other  
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.2126   Mean   :0.1287   Mean   :0.1591   Mean   :0.4007  
##  3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##  FLAG.Job.ProfExe FLAG.Job.Sales    FLAG.Job.Self     FLAG.Reason.DebtCon
##  Min.   :0.0000   Min.   :0.00000   Min.   :0.00000   Min.   :0.0000     
##  1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.0000     
##  Median :0.0000   Median :0.00000   Median :0.00000   Median :1.0000     
##  Mean   :0.2141   Mean   :0.01829   Mean   :0.03238   Mean   :0.6591     
##  3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:1.0000     
##  Max.   :1.0000   Max.   :1.00000   Max.   :1.00000   Max.   :1.0000     
##  FLAG.Reason.HomeImp
##  Min.   :0.0000     
##  1st Qu.:0.0000     
##  Median :0.0000     
##  Mean   :0.2987     
##  3rd Qu.:1.0000     
##  Max.   :1.0000
head(wk5)
copy_wk5=wk5

# Set Tree Depth
tree_depth=rpart.control(maxdepth = 10)
set.seed(1)

# Create the data set as 70% of training and 30% test set
sample<- sample(c(TRUE,FALSE),nrow(copy_wk5),replace = TRUE,prob = c(0.7,0.3))
train<- copy_wk5[sample,]
test<- copy_wk5[!sample,]


# Step 2 - Logistic Regression

# Models from Week5 -

# Tree
tr_model=rpart(data=train,TARGET_BAD_FLAG~.-TARGET_LOSS_AMT,control=tree_depth,method="class",parms=list(split='information'))
rpart.plot(tr_model)

tr_model$variable.importance
##   M_DEBTINC IMP_DEBTINC  IMP_DELINQ   IMP_CLAGE        LOAN     M_VALUE 
##  533.397481  134.588883   46.494397   30.749923   24.521888   22.199895 
##   IMP_VALUE IMP_MORTDUE    IMP_CLNO     IMP_YOJ 
##    7.967967    5.783975    2.459994    2.090995
pt=predict(tr_model,test,type = "prob")
pt2=prediction(pt[,2],test$TARGET_BAD_FLAG)
pt3= performance(pt2,"tpr","fpr")

# Random Tree
rf_model=randomForest(data=train,TARGET_BAD_FLAG~.-TARGET_LOSS_AMT,ntree=500,importance=TRUE)
## Warning in randomForest.default(m, y, ...): The response has five or fewer
## unique values.  Are you sure you want to do regression?
importance(rf_model)
##                       %IncMSE IncNodePurity
## LOAN                44.967305     38.993106
## IMP_MORTDUE         41.789344     35.266432
## M_MORTDUE           13.172790      2.349943
## IMP_VALUE           42.571780     43.056864
## M_VALUE             59.462869     18.739138
## IMP_YOJ             40.432069     31.086101
## M_YOJ               14.199750      2.944031
## IMP_DEROG           45.717324     21.754538
## M_DEROG             18.473874      4.951353
## IMP_DELINQ          81.884902     48.238429
## M_DELINQ             8.547209      2.107158
## IMP_CLAGE           62.739262     55.559885
## M_CLAGE             11.726470      1.840701
## IMP_NINQ            34.906103     17.852157
## M_NINQ              12.162285      2.082765
## IMP_CLNO            53.442201     37.694496
## M_CLNO              11.739013      1.103966
## IMP_DEBTINC         35.982961    104.953372
## M_DEBTINC           38.536277    130.591066
## FLAG.Job.Mgr        17.040006      3.765830
## FLAG.Job.Office     17.467948      5.148490
## FLAG.Job.Other      18.895132      5.102794
## FLAG.Job.ProfExe    15.455256      3.935220
## FLAG.Job.Sales      23.675630      3.364372
## FLAG.Job.Self       12.098948      1.883075
## FLAG.Reason.DebtCon 15.811019      3.814255
## FLAG.Reason.HomeImp 14.617020      3.908644
varImpPlot( rf_model )

pr = predict( rf_model,test )
head( pr )
##         4         6         7        15        17        18 
## 0.7966524 0.8313333 0.9241667 0.7746333 0.9212667 0.3462000
pr2 = prediction( pr, test$TARGET_BAD_FLAG)
pr3 = performance( pr2, "tpr", "fpr" )

# Gradient Boosting
gb_model = gbm( data=train, TARGET_BAD_FLAG~.-TARGET_LOSS_AMT, n.trees=500, distribution="bernoulli" )
summary.gbm(gb_model,cBars = 10)
pg = predict( gb_model, test, type="response" )
## Using 500 trees...
head( pg )
## [1] 0.9666599 0.4323602 0.9940194 0.8066121 0.9980174 0.9721196
pg2 = prediction( pg, test$TARGET_BAD_FLAG )
pg3 = performance( pg2, "tpr", "fpr" )

# LOGISTIC REGRESSION Model

theUpper_LR = glm( TARGET_BAD_FLAG~.-TARGET_LOSS_AMT, family = "binomial", data=train )
theLower_LR = glm( TARGET_BAD_FLAG ~ 1, family = "binomial", data=train )
summary( theUpper_LR )
## 
## Call:
## glm(formula = TARGET_BAD_FLAG ~ . - TARGET_LOSS_AMT, family = "binomial", 
##     data = train)
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -6.817e+00  6.558e-01 -10.395  < 2e-16 ***
## LOAN                -8.996e-06  5.694e-06  -1.580 0.114122    
## IMP_MORTDUE         -3.482e-06  2.092e-06  -1.664 0.096053 .  
## M_MORTDUE            3.647e-01  2.491e-01   1.464 0.143231    
## IMP_VALUE            4.356e-06  1.495e-06   2.914 0.003572 ** 
## M_VALUE              4.945e+00  6.167e-01   8.017 1.08e-15 ***
## IMP_YOJ             -2.010e-02  8.271e-03  -2.430 0.015103 *  
## M_YOJ               -7.720e-01  2.427e-01  -3.181 0.001466 ** 
## IMP_DEROG            5.869e-01  7.617e-02   7.706 1.30e-14 ***
## M_DEROG             -2.655e+00  3.603e-01  -7.367 1.74e-13 ***
## IMP_DELINQ           8.182e-01  6.264e-02  13.062  < 2e-16 ***
## M_DELINQ            -1.101e+00  4.859e-01  -2.266 0.023465 *  
## IMP_CLAGE           -5.466e-03  8.103e-04  -6.746 1.52e-11 ***
## M_CLAGE              9.834e-01  4.403e-01   2.233 0.025529 *  
## IMP_NINQ             1.528e-01  3.175e-02   4.813 1.48e-06 ***
## M_NINQ              -2.494e-01  4.538e-01  -0.550 0.582542    
## IMP_CLNO            -1.600e-02  6.331e-03  -2.527 0.011513 *  
## M_CLNO               2.693e+00  7.552e-01   3.565 0.000363 ***
## IMP_DEBTINC          9.231e-02  1.017e-02   9.075  < 2e-16 ***
## M_DEBTINC            2.679e+00  1.144e-01  23.425  < 2e-16 ***
## FLAG.Job.Mgr         2.091e+00  4.989e-01   4.191 2.77e-05 ***
## FLAG.Job.Office      1.414e+00  5.005e-01   2.824 0.004736 ** 
## FLAG.Job.Other       2.081e+00  4.825e-01   4.314 1.60e-05 ***
## FLAG.Job.ProfExe     1.972e+00  4.965e-01   3.971 7.16e-05 ***
## FLAG.Job.Sales       3.240e+00  5.794e-01   5.592 2.24e-08 ***
## FLAG.Job.Self        2.518e+00  5.650e-01   4.456 8.34e-06 ***
## FLAG.Reason.DebtCon -1.318e-01  3.661e-01  -0.360 0.718960    
## FLAG.Reason.HomeImp -5.659e-02  3.728e-01  -0.152 0.879329    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4155.3  on 4141  degrees of freedom
## Residual deviance: 2248.2  on 4114  degrees of freedom
## AIC: 2304.2
## 
## Number of Fisher Scoring iterations: 6
summary( theLower_LR )
## 
## Call:
## glm(formula = TARGET_BAD_FLAG ~ 1, family = "binomial", data = train)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.38087    0.03878  -35.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4155.3  on 4141  degrees of freedom
## Residual deviance: 4155.3  on 4141  degrees of freedom
## AIC: 4157.3
## 
## Number of Fisher Scoring iterations: 4
# BACKWARD VARIABLE SELECTION
lr_model = stepAIC(theUpper_LR, direction="backward", scope=list(lower=theLower_LR, upper=theUpper_LR))
## Start:  AIC=2304.21
## TARGET_BAD_FLAG ~ (TARGET_LOSS_AMT + LOAN + IMP_MORTDUE + M_MORTDUE + 
##     IMP_VALUE + M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + 
##     IMP_DELINQ + M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + 
##     M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + 
##     FLAG.Job.Office + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + 
##     FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp) - 
##     TARGET_LOSS_AMT
## 
##                       Df Deviance    AIC
## - FLAG.Reason.HomeImp  1   2248.2 2302.2
## - FLAG.Reason.DebtCon  1   2248.3 2302.3
## - M_NINQ               1   2248.5 2302.5
## <none>                     2248.2 2304.2
## - M_MORTDUE            1   2250.3 2304.3
## - LOAN                 1   2250.8 2304.8
## - IMP_MORTDUE          1   2251.2 2305.2
## - M_CLAGE              1   2253.0 2307.0
## - M_DELINQ             1   2253.7 2307.7
## - IMP_YOJ              1   2254.2 2308.2
## - IMP_CLNO             1   2254.7 2308.7
## - FLAG.Job.Office      1   2257.4 2311.4
## - IMP_VALUE            1   2258.2 2312.2
## - M_YOJ                1   2259.3 2313.3
## - M_CLNO               1   2261.9 2315.9
## - FLAG.Job.ProfExe     1   2267.8 2321.8
## - FLAG.Job.Mgr         1   2270.2 2324.2
## - FLAG.Job.Self        1   2271.0 2325.0
## - IMP_NINQ             1   2271.1 2325.1
## - FLAG.Job.Other       1   2272.4 2326.4
## - FLAG.Job.Sales       1   2284.4 2338.4
## - IMP_CLAGE            1   2298.9 2352.9
## - IMP_DEROG            1   2317.7 2371.7
## - M_DEROG              1   2320.9 2374.9
## - IMP_DEBTINC          1   2356.6 2410.6
## - M_VALUE              1   2359.2 2413.2
## - IMP_DELINQ           1   2483.2 2537.2
## - M_DEBTINC            1   2866.2 2920.2
## 
## Step:  AIC=2302.24
## TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + M_NINQ + IMP_CLNO + 
##     M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + 
##     FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + 
##     FLAG.Reason.DebtCon
## 
##                       Df Deviance    AIC
## - M_NINQ               1   2248.5 2300.5
## - FLAG.Reason.DebtCon  1   2248.6 2300.6
## <none>                     2248.2 2302.2
## - M_MORTDUE            1   2250.3 2302.3
## - LOAN                 1   2250.8 2302.8
## - IMP_MORTDUE          1   2251.2 2303.2
## - M_CLAGE              1   2253.0 2305.0
## - M_DELINQ             1   2253.7 2305.7
## - IMP_YOJ              1   2254.3 2306.3
## - IMP_CLNO             1   2254.7 2306.7
## - FLAG.Job.Office      1   2257.8 2309.8
## - IMP_VALUE            1   2258.2 2310.2
## - M_YOJ                1   2259.3 2311.3
## - M_CLNO               1   2261.9 2313.9
## - FLAG.Job.ProfExe     1   2268.8 2320.8
## - FLAG.Job.Mgr         1   2271.1 2323.1
## - IMP_NINQ             1   2271.2 2323.2
## - FLAG.Job.Self        1   2271.8 2323.8
## - FLAG.Job.Other       1   2273.9 2325.9
## - FLAG.Job.Sales       1   2285.5 2337.5
## - IMP_CLAGE            1   2299.0 2351.0
## - IMP_DEROG            1   2317.7 2369.7
## - M_DEROG              1   2320.9 2372.9
## - IMP_DEBTINC          1   2356.6 2408.6
## - M_VALUE              1   2360.5 2412.5
## - IMP_DELINQ           1   2483.8 2535.8
## - M_DEBTINC            1   2866.3 2918.3
## 
## Step:  AIC=2300.54
## TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO + 
##     IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + 
##     FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + 
##     FLAG.Reason.DebtCon
## 
##                       Df Deviance    AIC
## - FLAG.Reason.DebtCon  1   2248.9 2298.9
## <none>                     2248.5 2300.5
## - M_MORTDUE            1   2250.6 2300.6
## - LOAN                 1   2251.0 2301.0
## - IMP_MORTDUE          1   2251.5 2301.5
## - M_CLAGE              1   2253.4 2303.4
## - IMP_YOJ              1   2254.6 2304.6
## - IMP_CLNO             1   2255.0 2305.0
## - FLAG.Job.Office      1   2258.1 2308.1
## - M_DELINQ             1   2258.1 2308.1
## - IMP_VALUE            1   2258.4 2308.4
## - M_YOJ                1   2260.5 2310.5
## - M_CLNO               1   2262.3 2312.3
## - FLAG.Job.ProfExe     1   2269.1 2319.1
## - FLAG.Job.Mgr         1   2271.4 2321.4
## - IMP_NINQ             1   2271.8 2321.8
## - FLAG.Job.Self        1   2271.8 2321.8
## - FLAG.Job.Other       1   2274.2 2324.2
## - FLAG.Job.Sales       1   2286.2 2336.2
## - IMP_CLAGE            1   2299.2 2349.2
## - IMP_DEROG            1   2317.7 2367.7
## - M_DEROG              1   2321.2 2371.2
## - IMP_DEBTINC          1   2359.1 2409.1
## - M_VALUE              1   2362.1 2412.1
## - IMP_DELINQ           1   2483.8 2533.8
## - M_DEBTINC            1   2868.3 2918.3
## 
## Step:  AIC=2298.92
## TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO + 
##     IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + 
##     FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self
## 
##                    Df Deviance    AIC
## <none>                  2248.9 2298.9
## - M_MORTDUE         1   2251.4 2299.4
## - LOAN              1   2252.0 2300.0
## - IMP_MORTDUE       1   2252.0 2300.0
## - M_CLAGE           1   2253.6 2301.6
## - IMP_YOJ           1   2254.9 2302.9
## - IMP_CLNO          1   2255.9 2303.9
## - FLAG.Job.Office   1   2258.4 2306.4
## - M_DELINQ          1   2258.4 2306.4
## - IMP_VALUE         1   2259.2 2307.2
## - M_YOJ             1   2261.2 2309.2
## - M_CLNO            1   2263.1 2311.1
## - FLAG.Job.ProfExe  1   2269.5 2317.5
## - FLAG.Job.Mgr      1   2271.6 2319.6
## - IMP_NINQ          1   2271.8 2319.8
## - FLAG.Job.Self     1   2272.6 2320.6
## - FLAG.Job.Other    1   2274.5 2322.5
## - FLAG.Job.Sales    1   2286.2 2334.2
## - IMP_CLAGE         1   2299.3 2347.3
## - IMP_DEROG         1   2318.3 2366.3
## - M_DEROG           1   2322.1 2370.1
## - IMP_DEBTINC       1   2360.8 2408.8
## - M_VALUE           1   2363.3 2411.3
## - IMP_DELINQ        1   2484.9 2532.9
## - M_DEBTINC         1   2869.4 2917.4
summary( lr_model )
## 
## Call:
## glm(formula = TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE + 
##     IMP_VALUE + M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + 
##     IMP_DELINQ + M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + 
##     IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + 
##     FLAG.Job.Office + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + 
##     FLAG.Job.Self, family = "binomial", data = train)
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -6.908e+00  6.124e-01 -11.280  < 2e-16 ***
## LOAN             -9.617e-06  5.566e-06  -1.728 0.084025 .  
## IMP_MORTDUE      -3.545e-06  2.090e-06  -1.697 0.089768 .  
## M_MORTDUE         3.857e-01  2.451e-01   1.574 0.115585    
## IMP_VALUE         4.405e-06  1.486e-06   2.964 0.003041 ** 
## M_VALUE           4.999e+00  6.115e-01   8.176 2.93e-16 ***
## IMP_YOJ          -2.005e-02  8.265e-03  -2.426 0.015280 *  
## M_YOJ            -7.974e-01  2.385e-01  -3.343 0.000829 ***
## IMP_DEROG         5.815e-01  7.534e-02   7.718 1.19e-14 ***
## M_DEROG          -2.654e+00  3.589e-01  -7.397 1.40e-13 ***
## IMP_DELINQ        8.185e-01  6.249e-02  13.098  < 2e-16 ***
## M_DELINQ         -1.227e+00  4.247e-01  -2.889 0.003870 ** 
## IMP_CLAGE        -5.421e-03  8.065e-04  -6.722 1.79e-11 ***
## M_CLAGE           9.770e-01  4.397e-01   2.222 0.026279 *  
## IMP_NINQ          1.520e-01  3.160e-02   4.811 1.50e-06 ***
## IMP_CLNO         -1.646e-02  6.273e-03  -2.624 0.008697 ** 
## M_CLNO            2.599e+00  7.180e-01   3.620 0.000295 ***
## IMP_DEBTINC       9.295e-02  1.013e-02   9.179  < 2e-16 ***
## M_DEBTINC         2.682e+00  1.143e-01  23.456  < 2e-16 ***
## FLAG.Job.Mgr      2.061e+00  4.828e-01   4.269 1.97e-05 ***
## FLAG.Job.Office   1.387e+00  4.841e-01   2.864 0.004178 ** 
## FLAG.Job.Other    2.053e+00  4.642e-01   4.424 9.71e-06 ***
## FLAG.Job.ProfExe  1.947e+00  4.793e-01   4.061 4.89e-05 ***
## FLAG.Job.Sales    3.206e+00  5.627e-01   5.698 1.21e-08 ***
## FLAG.Job.Self     2.495e+00  5.469e-01   4.562 5.06e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4155.3  on 4141  degrees of freedom
## Residual deviance: 2248.9  on 4117  degrees of freedom
## AIC: 2298.9
## 
## Number of Fisher Scoring iterations: 6
plr = predict( lr_model, test, type="response" )
plr2 = prediction( plr, test$TARGET_BAD_FLAG)
plr3 = performance( plr2, "tpr", "fpr" )

# LR STEP TREE MODEL
treeVars = tr_model$variable.importance
treeVars = names(treeVars)
treeVars
##  [1] "M_DEBTINC"   "IMP_DEBTINC" "IMP_DELINQ"  "IMP_CLAGE"   "LOAN"       
##  [6] "M_VALUE"     "IMP_VALUE"   "IMP_MORTDUE" "IMP_CLNO"    "IMP_YOJ"
treeVarsPlus = paste( treeVars, collapse="+")
F = as.formula( paste( "TARGET_BAD_FLAG ~", treeVarsPlus ))
tree_LR = glm( F, family = "binomial", data=train )
theLower_LR = glm( TARGET_BAD_FLAG ~ 1, family = "binomial", data=train )
summary( tree_LR )
## 
## Call:
## glm(formula = F, family = "binomial", data = train)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -4.697e+00  3.910e-01 -12.014  < 2e-16 ***
## M_DEBTINC    2.782e+00  1.066e-01  26.103  < 2e-16 ***
## IMP_DEBTINC  8.956e-02  9.640e-03   9.290  < 2e-16 ***
## IMP_DELINQ   6.802e-01  5.169e-02  13.159  < 2e-16 ***
## IMP_CLAGE   -6.253e-03  7.569e-04  -8.261  < 2e-16 ***
## LOAN        -7.340e-06  5.046e-06  -1.455  0.14577    
## M_VALUE      3.887e+00  4.956e-01   7.842 4.43e-15 ***
## IMP_VALUE    4.458e-06  1.378e-06   3.236  0.00121 ** 
## IMP_MORTDUE -3.540e-06  1.932e-06  -1.832  0.06692 .  
## IMP_CLNO    -7.701e-03  5.556e-03  -1.386  0.16572    
## IMP_YOJ     -2.077e-02  7.871e-03  -2.639  0.00830 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4155.3  on 4141  degrees of freedom
## Residual deviance: 2533.8  on 4131  degrees of freedom
## AIC: 2555.8
## 
## Number of Fisher Scoring iterations: 6
summary( theLower_LR )
## 
## Call:
## glm(formula = TARGET_BAD_FLAG ~ 1, family = "binomial", data = train)
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.38087    0.03878  -35.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4155.3  on 4141  degrees of freedom
## Residual deviance: 4155.3  on 4141  degrees of freedom
## AIC: 4157.3
## 
## Number of Fisher Scoring iterations: 4
# TREE FORWARD STEPWISE SELECTION
lrt_model = stepAIC(theLower_LR, direction="forward", scope=list(lower=theLower_LR, upper=tree_LR))
## Start:  AIC=4157.3
## TARGET_BAD_FLAG ~ 1
## 
##               Df Deviance    AIC
## + M_DEBTINC    1   3088.5 3092.5
## + IMP_DELINQ   1   3776.4 3780.4
## + M_VALUE      1   3942.8 3946.8
## + IMP_CLAGE    1   4020.9 4024.9
## + IMP_DEBTINC  1   4051.1 4055.1
## + LOAN         1   4133.3 4137.3
## + IMP_YOJ      1   4144.0 4148.0
## + IMP_MORTDUE  1   4150.4 4154.4
## + IMP_VALUE    1   4153.1 4157.1
## <none>             4155.3 4157.3
## + IMP_CLNO     1   4155.3 4159.3
## 
## Step:  AIC=3092.5
## TARGET_BAD_FLAG ~ M_DEBTINC
## 
##               Df Deviance    AIC
## + IMP_DELINQ   1   2861.2 2867.2
## + M_VALUE      1   2972.0 2978.0
## + IMP_DEBTINC  1   2977.4 2983.4
## + IMP_CLAGE    1   3006.3 3012.3
## + IMP_YOJ      1   3075.2 3081.2
## <none>             3088.5 3092.5
## + LOAN         1   3086.7 3092.7
## + IMP_CLNO     1   3088.2 3094.2
## + IMP_VALUE    1   3088.3 3094.3
## + IMP_MORTDUE  1   3088.5 3094.5
## 
## Step:  AIC=2867.17
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ
## 
##               Df Deviance    AIC
## + IMP_CLAGE    1   2751.2 2759.2
## + IMP_DEBTINC  1   2753.8 2761.8
## + M_VALUE      1   2778.9 2786.9
## + IMP_YOJ      1   2836.1 2844.1
## + IMP_CLNO     1   2850.8 2858.8
## <none>             2861.2 2867.2
## + LOAN         1   2859.8 2867.8
## + IMP_VALUE    1   2860.8 2868.8
## + IMP_MORTDUE  1   2861.1 2869.1
## 
## Step:  AIC=2759.17
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE
## 
##               Df Deviance    AIC
## + IMP_DEBTINC  1   2650.4 2660.4
## + M_VALUE      1   2672.4 2682.4
## + IMP_YOJ      1   2741.4 2751.4
## + IMP_VALUE    1   2743.9 2753.9
## <none>             2751.2 2759.2
## + IMP_MORTDUE  1   2749.2 2759.2
## + IMP_CLNO     1   2751.2 2761.2
## + LOAN         1   2751.2 2761.2
## 
## Step:  AIC=2660.38
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE + IMP_DEBTINC
## 
##               Df Deviance    AIC
## + M_VALUE      1   2554.2 2566.2
## + IMP_YOJ      1   2644.2 2656.2
## + IMP_VALUE    1   2646.5 2658.5
## + IMP_CLNO     1   2648.2 2660.2
## <none>             2650.4 2660.4
## + LOAN         1   2650.0 2662.0
## + IMP_MORTDUE  1   2650.1 2662.1
## 
## Step:  AIC=2566.19
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE + IMP_DEBTINC + 
##     M_VALUE
## 
##               Df Deviance    AIC
## + IMP_YOJ      1   2547.7 2561.7
## + IMP_VALUE    1   2549.6 2563.6
## <none>             2554.2 2566.2
## + IMP_CLNO     1   2552.5 2566.5
## + LOAN         1   2553.3 2567.3
## + IMP_MORTDUE  1   2554.2 2568.2
## 
## Step:  AIC=2561.67
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE + IMP_DEBTINC + 
##     M_VALUE + IMP_YOJ
## 
##               Df Deviance    AIC
## + IMP_VALUE    1   2542.8 2558.8
## <none>             2547.7 2561.7
## + IMP_CLNO     1   2545.9 2561.9
## + LOAN         1   2547.2 2563.2
## + IMP_MORTDUE  1   2547.7 2563.7
## 
## Step:  AIC=2558.79
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE + IMP_DEBTINC + 
##     M_VALUE + IMP_YOJ + IMP_VALUE
## 
##               Df Deviance    AIC
## + IMP_MORTDUE  1   2538.0 2556.0
## + IMP_CLNO     1   2539.4 2557.4
## + LOAN         1   2540.5 2558.5
## <none>             2542.8 2558.8
## 
## Step:  AIC=2556.04
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE + IMP_DEBTINC + 
##     M_VALUE + IMP_YOJ + IMP_VALUE + IMP_MORTDUE
## 
##            Df Deviance    AIC
## + LOAN      1   2535.7 2555.7
## + IMP_CLNO  1   2536.0 2556.0
## <none>          2538.0 2556.0
## 
## Step:  AIC=2555.73
## TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE + IMP_DEBTINC + 
##     M_VALUE + IMP_YOJ + IMP_VALUE + IMP_MORTDUE + LOAN
## 
##            Df Deviance    AIC
## <none>          2535.7 2555.7
## + IMP_CLNO  1   2533.8 2555.8
summary( lrt_model )
## 
## Call:
## glm(formula = TARGET_BAD_FLAG ~ M_DEBTINC + IMP_DELINQ + IMP_CLAGE + 
##     IMP_DEBTINC + M_VALUE + IMP_YOJ + IMP_VALUE + IMP_MORTDUE + 
##     LOAN, family = "binomial", data = train)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -4.723e+00  3.904e-01 -12.097  < 2e-16 ***
## M_DEBTINC    2.777e+00  1.064e-01  26.096  < 2e-16 ***
## IMP_DELINQ   6.697e-01  5.095e-02  13.145  < 2e-16 ***
## IMP_CLAGE   -6.506e-03  7.373e-04  -8.823  < 2e-16 ***
## IMP_DEBTINC  8.820e-02  9.577e-03   9.210  < 2e-16 ***
## M_VALUE      3.897e+00  4.959e-01   7.858 3.91e-15 ***
## IMP_YOJ     -2.093e-02  7.882e-03  -2.655  0.00793 ** 
## IMP_VALUE    4.432e-06  1.370e-06   3.236  0.00121 ** 
## IMP_MORTDUE -4.005e-06  1.890e-06  -2.119  0.03408 *  
## LOAN        -7.589e-06  5.054e-06  -1.502  0.13323    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4155.3  on 4141  degrees of freedom
## Residual deviance: 2535.7  on 4132  degrees of freedom
## AIC: 2555.7
## 
## Number of Fisher Scoring iterations: 6
plrt = predict( lrt_model, test, type="response" )
plrt2 = prediction( plrt, test$TARGET_BAD_FLAG )
plrt3 = performance( plrt2, "tpr", "fpr" )

# ROC

plot( pt3, col="green" )
plot( pr3, col="red", add=TRUE )
plot( pg3, col="blue", add=TRUE )
plot( plr3, col="gold", add=TRUE ) 
plot( plrt3, col="gray", add=TRUE ) 
abline(0,1,lty=2)
legend("bottomright",c("TREE","RANDOM FOREST", "GRADIENT BOOSTING", "LOGIT REG BWD", "LOGIT REG TREE"),col=c("green","red","blue","gold","gray"), bty="y", lty=1 )

aucT = performance( pt2, "auc" )@y.values
aucR = performance( pr2, "auc" )@y.values
aucG = performance( pg2, "auc" )@y.values
aucLR = performance( plr2, "auc")@y.values
aucLRT = performance( plrt2, "auc")@y.values

print( paste("TREE AUC=", aucT) )
## [1] "TREE AUC= 0.826618121581281"
print( paste("RF AUC=", aucR) )
## [1] "RF AUC= 0.953436405362943"
print( paste("GB AUC=", aucG) )
## [1] "GB AUC= 0.920521802150007"
print( paste("LR AUC=", aucLR) )
## [1] "LR AUC= 0.910356476608441"
print( paste("LRT AUC=", aucLRT) )
## [1] "LRT AUC= 0.879705201724766"
# !!!SUMMARY & ANALYSIS!!!

# The Random Forest Performed the best among all the models,
# as it has the largest AUC, providing the best accuracy.
# I would recommend the Random Forest method, 
# because it provides better accuracy, the model is easy to build
# without a significant long running time.


# Step 3 - Linear Regression 

# Create the data set as 70% of training and 30% test set
sample<- sample(c(TRUE,FALSE),nrow(copy_wk5),replace = TRUE,prob = c(0.7,0.3))
train<- copy_wk5[sample,]
test<- copy_wk5[!sample,]

# Models from Week5 -

# Anova (Using ANOVA because it outperformed Poisson in previous weeks)
TreeAnova=rpart(data=train,TARGET_LOSS_AMT~.-TARGET_BAD_FLAG,control = tree_depth, method = "anova")
rpart.plot(TreeAnova,digits=-3, extra=100)

TreeAnova$variable.importance
##                LOAN           M_DEBTINC         IMP_DEBTINC          IMP_DELINQ 
##         49690664826         42985254519         15939584553         15400438974 
##           IMP_VALUE         IMP_MORTDUE           IMP_DEROG           IMP_CLAGE 
##          9271157466          4145647445          3634659036          3469029852 
##            IMP_CLNO FLAG.Reason.HomeImp FLAG.Reason.DebtCon             M_VALUE 
##          3174251359          2820521327          2148907741          1958672130 
##             M_DEROG            M_DELINQ              M_NINQ           M_MORTDUE 
##          1889394893          1161439258           916925730           847333397 
##               M_YOJ       FLAG.Job.Self             IMP_YOJ 
##           585657276           452877917           390438184
pt = predict(TreeAnova, test )
head( pt )
##         2         4         8        17        18        22 
## 3972.6331 3972.6331  428.8188 3972.6331  428.8188 3972.6331
RMSEt = sqrt( mean( ( test$TARGET_LOSS_AMT - pt )^2 ) )

# Random Forest
rf_model = randomForest( data=train, TARGET_LOSS_AMT~.-TARGET_BAD_FLAG, ntree=500, importance=TRUE )
importance( rf_model )
##                       %IncMSE IncNodePurity
## LOAN                60.668132   46695625794
## IMP_MORTDUE         17.146165   10494379059
## M_MORTDUE            5.153892     747061976
## IMP_VALUE           18.735447   13778814510
## M_VALUE             28.190389    3749783439
## IMP_YOJ             15.606857    8223538954
## M_YOJ                8.348570     683350163
## IMP_DEROG           21.995255    5774511085
## M_DEROG              8.842225    1258662250
## IMP_DELINQ          42.033453   20407610426
## M_DELINQ             6.506493     536619144
## IMP_CLAGE           25.161710   13446209097
## M_CLAGE              7.670149     257964369
## IMP_NINQ            17.389315    5663088617
## M_NINQ               4.939194     354225193
## IMP_CLNO            24.784895   11960859784
## M_CLNO              11.163432     234115096
## IMP_DEBTINC         28.439878   23417631709
## M_DEBTINC           40.538449   32416365766
## FLAG.Job.Mgr         9.203716    1052569539
## FLAG.Job.Office      7.495568    1046994234
## FLAG.Job.Other      10.543253    1280275312
## FLAG.Job.ProfExe     6.506114     912898185
## FLAG.Job.Sales      13.315858     827523754
## FLAG.Job.Self        4.561926    1376867544
## FLAG.Reason.DebtCon  8.337061    1592444846
## FLAG.Reason.HomeImp  7.951433    1409168007
varImpPlot( rf_model )

pr = predict( rf_model, test )
head( pr )
##        2        4        8       17       18       22 
## 2866.231 2236.436  674.151 8233.899 1637.305 2565.278
RMSEr = sqrt( mean( (test$TARGET_LOSS_AMT - pr )^2 ) )
print(RMSEr)
## [1] 4020.425
# GRADIENT BOOSTING
gb_model = gbm( data=train, TARGET_LOSS_AMT~.-TARGET_BAD_FLAG, n.trees=500, distribution="poisson" )
summary.gbm( gb_model, cBars=10 )
pg = predict( gb_model, test, type="response" )
## Using 500 trees...
head( pg )
## [1] 4992.8362 4844.7070  246.7864 7432.4818  495.2246 2115.6312
RMSEg = sqrt( mean( (test$TARGET_LOSS_AMT - pg )^2 ) )

# LINEAR Regression Model
theUpper_LR = lm( TARGET_LOSS_AMT ~ .-TARGET_BAD_FLAG, data=train )
theLower_LR = lm( TARGET_LOSS_AMT ~ 1, data=train )
summary( theUpper_LR )
## 
## Call:
## lm(formula = TARGET_LOSS_AMT ~ . - TARGET_BAD_FLAG, data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -15774  -2591   -343   1561  58518 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -6.599e+03  7.482e+02  -8.821  < 2e-16 ***
## LOAN                 1.411e-01  8.469e-03  16.666  < 2e-16 ***
## IMP_MORTDUE         -1.252e-02  3.555e-03  -3.523 0.000432 ***
## M_MORTDUE            1.299e+03  3.491e+02   3.721 0.000201 ***
## IMP_VALUE            1.434e-02  2.792e-03   5.136 2.94e-07 ***
## M_VALUE              6.608e+03  6.596e+02  10.018  < 2e-16 ***
## IMP_YOJ             -3.338e+01  1.223e+01  -2.730 0.006355 ** 
## M_YOJ               -6.693e+02  3.452e+02  -1.939 0.052604 .  
## IMP_DEROG            8.747e+02  1.131e+02   7.735 1.29e-14 ***
## M_DEROG             -2.534e+03  4.219e+02  -6.005 2.07e-09 ***
## IMP_DELINQ           1.887e+03  8.994e+01  20.978  < 2e-16 ***
## M_DELINQ            -2.059e+03  5.682e+02  -3.623 0.000294 ***
## IMP_CLAGE           -1.013e+01  1.115e+00  -9.083  < 2e-16 ***
## M_CLAGE              5.781e+02  7.338e+02   0.788 0.430858    
## IMP_NINQ             1.848e+02  5.543e+01   3.333 0.000865 ***
## M_NINQ               1.719e+02  5.350e+02   0.321 0.747929    
## IMP_CLNO             4.706e+01  1.018e+01   4.624 3.88e-06 ***
## M_CLNO               2.670e+03  9.774e+02   2.731 0.006334 ** 
## IMP_DEBTINC          1.166e+02  1.149e+01  10.144  < 2e-16 ***
## M_DEBTINC            5.949e+03  2.230e+02  26.682  < 2e-16 ***
## FLAG.Job.Mgr         5.942e+02  5.502e+02   1.080 0.280178    
## FLAG.Job.Office      4.610e+02  5.405e+02   0.853 0.393760    
## FLAG.Job.Other       8.761e+02  5.082e+02   1.724 0.084791 .  
## FLAG.Job.ProfExe     8.888e+02  5.351e+02   1.661 0.096780 .  
## FLAG.Job.Sales       4.103e+03  8.270e+02   4.961 7.30e-07 ***
## FLAG.Job.Self        1.893e+03  7.021e+02   2.696 0.007055 ** 
## FLAG.Reason.DebtCon  5.688e+01  4.994e+02   0.114 0.909314    
## FLAG.Reason.HomeImp -4.700e+02  5.052e+02  -0.930 0.352240    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5487 on 4140 degrees of freedom
## Multiple R-squared:  0.4272, Adjusted R-squared:  0.4235 
## F-statistic: 114.4 on 27 and 4140 DF,  p-value: < 2.2e-16
summary( theLower_LR )
## 
## Call:
## lm(formula = TARGET_LOSS_AMT ~ 1, data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -2676  -2676  -2676  -2676  76311 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2676.0      111.9   23.91   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7226 on 4167 degrees of freedom
# LINEAR BACKWARD VARIABLE SELECTION
lr_model = stepAIC(theUpper_LR, direction="backward", scope=list(lower=theLower_LR, upper=theUpper_LR))
## Start:  AIC=71801.44
## TARGET_LOSS_AMT ~ (TARGET_BAD_FLAG + LOAN + IMP_MORTDUE + M_MORTDUE + 
##     IMP_VALUE + M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + 
##     IMP_DELINQ + M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + 
##     M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + 
##     FLAG.Job.Office + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + 
##     FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp) - 
##     TARGET_BAD_FLAG
## 
##                       Df  Sum of Sq        RSS   AIC
## - FLAG.Reason.DebtCon  1 3.9061e+05 1.2463e+11 71799
## - M_NINQ               1 3.1095e+06 1.2463e+11 71800
## - M_CLAGE              1 1.8683e+07 1.2465e+11 71800
## - FLAG.Job.Office      1 2.1899e+07 1.2465e+11 71800
## - FLAG.Reason.HomeImp  1 2.6056e+07 1.2465e+11 71800
## - FLAG.Job.Mgr         1 3.5116e+07 1.2466e+11 71801
## <none>                              1.2463e+11 71801
## - FLAG.Job.ProfExe     1 8.3057e+07 1.2471e+11 71802
## - FLAG.Job.Other       1 8.9466e+07 1.2472e+11 71802
## - M_YOJ                1 1.1315e+08 1.2474e+11 71803
## - FLAG.Job.Self        1 2.1874e+08 1.2485e+11 71807
## - IMP_YOJ              1 2.2440e+08 1.2485e+11 71807
## - M_CLNO               1 2.2458e+08 1.2485e+11 71807
## - IMP_NINQ             1 3.3451e+08 1.2496e+11 71811
## - IMP_MORTDUE          1 3.7359e+08 1.2500e+11 71812
## - M_DELINQ             1 3.9523e+08 1.2502e+11 71813
## - M_MORTDUE            1 4.1669e+08 1.2504e+11 71813
## - IMP_CLNO             1 6.4368e+08 1.2527e+11 71821
## - FLAG.Job.Sales       1 7.4082e+08 1.2537e+11 71824
## - IMP_VALUE            1 7.9406e+08 1.2542e+11 71826
## - M_DEROG              1 1.0856e+09 1.2571e+11 71836
## - IMP_DEROG            1 1.8013e+09 1.2643e+11 71859
## - IMP_CLAGE            1 2.4835e+09 1.2711e+11 71882
## - M_VALUE              1 3.0213e+09 1.2765e+11 71899
## - IMP_DEBTINC          1 3.0976e+09 1.2772e+11 71902
## - LOAN                 1 8.3610e+09 1.3299e+11 72070
## - IMP_DELINQ           1 1.3248e+10 1.3787e+11 72221
## - M_DEBTINC            1 2.1432e+10 1.4606e+11 72461
## 
## Step:  AIC=71799.45
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + M_NINQ + IMP_CLNO + 
##     M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + 
##     FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + 
##     FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## - M_NINQ               1 3.1187e+06 1.2463e+11 71798
## - M_CLAGE              1 1.8370e+07 1.2465e+11 71798
## - FLAG.Job.Office      1 2.3806e+07 1.2465e+11 71798
## - FLAG.Job.Mgr         1 3.8011e+07 1.2467e+11 71799
## <none>                              1.2463e+11 71799
## - FLAG.Job.ProfExe     1 8.9088e+07 1.2472e+11 71800
## - FLAG.Job.Other       1 9.6230e+07 1.2472e+11 71801
## - M_YOJ                1 1.1582e+08 1.2474e+11 71801
## - FLAG.Reason.HomeImp  1 2.1607e+08 1.2484e+11 71805
## - M_CLNO               1 2.2430e+08 1.2485e+11 71805
## - IMP_YOJ              1 2.2464e+08 1.2485e+11 71805
## - FLAG.Job.Self        1 2.2618e+08 1.2485e+11 71805
## - IMP_NINQ             1 3.3507e+08 1.2496e+11 71809
## - IMP_MORTDUE          1 3.7343e+08 1.2500e+11 71810
## - M_DELINQ             1 3.9578e+08 1.2502e+11 71811
## - M_MORTDUE            1 4.1782e+08 1.2504e+11 71811
## - IMP_CLNO             1 6.4345e+08 1.2527e+11 71819
## - FLAG.Job.Sales       1 7.6365e+08 1.2539e+11 71823
## - IMP_VALUE            1 7.9630e+08 1.2542e+11 71824
## - M_DEROG              1 1.0860e+09 1.2571e+11 71834
## - IMP_DEROG            1 1.8017e+09 1.2643e+11 71857
## - IMP_CLAGE            1 2.4944e+09 1.2712e+11 71880
## - M_VALUE              1 3.0318e+09 1.2766e+11 71898
## - IMP_DEBTINC          1 3.1039e+09 1.2773e+11 71900
## - LOAN                 1 8.4305e+09 1.3306e+11 72070
## - IMP_DELINQ           1 1.3276e+10 1.3790e+11 72219
## - M_DEBTINC            1 2.1435e+10 1.4606e+11 72459
## 
## Step:  AIC=71797.56
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO + 
##     IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + 
##     FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + 
##     FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## - M_CLAGE              1 1.7996e+07 1.2465e+11 71796
## - FLAG.Job.Office      1 2.3164e+07 1.2465e+11 71796
## - FLAG.Job.Mgr         1 3.7190e+07 1.2467e+11 71797
## <none>                              1.2463e+11 71798
## - FLAG.Job.ProfExe     1 8.7912e+07 1.2472e+11 71798
## - FLAG.Job.Other       1 9.5054e+07 1.2473e+11 71799
## - M_YOJ                1 1.1367e+08 1.2474e+11 71799
## - FLAG.Reason.HomeImp  1 2.1532e+08 1.2485e+11 71803
## - IMP_YOJ              1 2.2566e+08 1.2486e+11 71803
## - FLAG.Job.Self        1 2.2811e+08 1.2486e+11 71803
## - M_CLNO               1 2.4512e+08 1.2488e+11 71804
## - IMP_NINQ             1 3.3211e+08 1.2496e+11 71807
## - IMP_MORTDUE          1 3.7432e+08 1.2500e+11 71808
## - M_MORTDUE            1 4.1859e+08 1.2505e+11 71810
## - M_DELINQ             1 4.8257e+08 1.2511e+11 71812
## - IMP_CLNO             1 6.4498e+08 1.2528e+11 71817
## - FLAG.Job.Sales       1 7.6075e+08 1.2539e+11 71821
## - IMP_VALUE            1 7.9770e+08 1.2543e+11 71822
## - M_DEROG              1 1.0833e+09 1.2571e+11 71832
## - IMP_DEROG            1 1.8198e+09 1.2645e+11 71856
## - IMP_CLAGE            1 2.4940e+09 1.2712e+11 71878
## - M_VALUE              1 3.0305e+09 1.2766e+11 71896
## - IMP_DEBTINC          1 3.1079e+09 1.2774e+11 71898
## - LOAN                 1 8.4276e+09 1.3306e+11 72068
## - IMP_DELINQ           1 1.3306e+10 1.3794e+11 72218
## - M_DEBTINC            1 2.1432e+10 1.4606e+11 72457
## 
## Step:  AIC=71796.16
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + 
##     M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + FLAG.Job.Other + 
##     FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## - FLAG.Job.Office      1 1.8035e+07 1.2467e+11 71795
## - FLAG.Job.Mgr         1 3.1120e+07 1.2468e+11 71795
## <none>                              1.2465e+11 71796
## - FLAG.Job.ProfExe     1 7.8370e+07 1.2473e+11 71797
## - FLAG.Job.Other       1 8.5843e+07 1.2473e+11 71797
## - M_YOJ                1 1.1270e+08 1.2476e+11 71798
## - FLAG.Job.Self        1 2.1710e+08 1.2487e+11 71801
## - FLAG.Reason.HomeImp  1 2.2215e+08 1.2487e+11 71802
## - IMP_YOJ              1 2.3573e+08 1.2488e+11 71802
## - IMP_NINQ             1 3.4004e+08 1.2499e+11 71806
## - IMP_MORTDUE          1 3.6431e+08 1.2501e+11 71806
## - M_MORTDUE            1 4.1927e+08 1.2507e+11 71808
## - M_DELINQ             1 4.8952e+08 1.2514e+11 71810
## - IMP_CLNO             1 6.2835e+08 1.2528e+11 71815
## - M_CLNO               1 7.1777e+08 1.2537e+11 71818
## - FLAG.Job.Sales       1 7.4541e+08 1.2539e+11 71819
## - IMP_VALUE            1 7.9229e+08 1.2544e+11 71821
## - M_DEROG              1 1.0839e+09 1.2573e+11 71830
## - IMP_DEROG            1 1.8303e+09 1.2648e+11 71855
## - IMP_CLAGE            1 2.4770e+09 1.2713e+11 71876
## - M_VALUE              1 3.0218e+09 1.2767e+11 71894
## - IMP_DEBTINC          1 3.0903e+09 1.2774e+11 71896
## - LOAN                 1 8.4350e+09 1.3308e+11 72067
## - IMP_DELINQ           1 1.3294e+10 1.3794e+11 72217
## - M_DEBTINC            1 2.1478e+10 1.4613e+11 72457
## 
## Step:  AIC=71794.76
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + 
##     M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Other + FLAG.Job.ProfExe + 
##     FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## - FLAG.Job.Mgr         1 1.3293e+07 1.2468e+11 71793
## <none>                              1.2467e+11 71795
## - FLAG.Job.ProfExe     1 9.4839e+07 1.2476e+11 71796
## - M_YOJ                1 1.1778e+08 1.2478e+11 71797
## - FLAG.Job.Other       1 1.2948e+08 1.2480e+11 71797
## - FLAG.Reason.HomeImp  1 2.1713e+08 1.2488e+11 71800
## - IMP_YOJ              1 2.4037e+08 1.2491e+11 71801
## - FLAG.Job.Self        1 2.4139e+08 1.2491e+11 71801
## - IMP_NINQ             1 3.3757e+08 1.2500e+11 71804
## - IMP_MORTDUE          1 3.6994e+08 1.2504e+11 71805
## - M_MORTDUE            1 4.0780e+08 1.2507e+11 71806
## - M_DELINQ             1 4.7914e+08 1.2515e+11 71809
## - IMP_CLNO             1 6.4364e+08 1.2531e+11 71814
## - M_CLNO               1 7.5484e+08 1.2542e+11 71818
## - IMP_VALUE            1 7.9611e+08 1.2546e+11 71819
## - FLAG.Job.Sales       1 8.8419e+08 1.2555e+11 71822
## - M_DEROG              1 1.0896e+09 1.2576e+11 71829
## - IMP_DEROG            1 1.8175e+09 1.2648e+11 71853
## - IMP_CLAGE            1 2.4623e+09 1.2713e+11 71874
## - M_VALUE              1 3.0170e+09 1.2768e+11 71892
## - IMP_DEBTINC          1 3.1689e+09 1.2784e+11 71897
## - LOAN                 1 8.4190e+09 1.3309e+11 72065
## - IMP_DELINQ           1 1.3313e+10 1.3798e+11 72216
## - M_DEBTINC            1 2.1529e+10 1.4620e+11 72457
## 
## Step:  AIC=71793.21
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + 
##     M_DEBTINC + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + 
##     FLAG.Job.Self + FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## <none>                              1.2468e+11 71793
## - FLAG.Job.ProfExe     1 8.2589e+07 1.2476e+11 71794
## - M_YOJ                1 1.1736e+08 1.2480e+11 71795
## - FLAG.Job.Other       1 1.2304e+08 1.2480e+11 71795
## - FLAG.Reason.HomeImp  1 2.2024e+08 1.2490e+11 71799
## - FLAG.Job.Self        1 2.2813e+08 1.2491e+11 71799
## - IMP_YOJ              1 2.3572e+08 1.2492e+11 71799
## - IMP_NINQ             1 3.5005e+08 1.2503e+11 71803
## - IMP_MORTDUE          1 3.6206e+08 1.2504e+11 71803
## - M_MORTDUE            1 4.0040e+08 1.2508e+11 71805
## - M_DELINQ             1 4.7505e+08 1.2515e+11 71807
## - IMP_CLNO             1 6.4607e+08 1.2533e+11 71813
## - M_CLNO               1 7.4165e+08 1.2542e+11 71816
## - IMP_VALUE            1 7.9375e+08 1.2547e+11 71818
## - FLAG.Job.Sales       1 8.7488e+08 1.2555e+11 71820
## - M_DEROG              1 1.0934e+09 1.2577e+11 71828
## - IMP_DEROG            1 1.8214e+09 1.2650e+11 71852
## - IMP_CLAGE            1 2.4609e+09 1.2714e+11 71873
## - M_VALUE              1 3.0265e+09 1.2771e+11 71891
## - IMP_DEBTINC          1 3.1881e+09 1.2787e+11 71896
## - LOAN                 1 8.4169e+09 1.3310e+11 72063
## - IMP_DELINQ           1 1.3348e+10 1.3803e+11 72215
## - M_DEBTINC            1 2.1595e+10 1.4627e+11 72457
summary( lr_model )
## 
## Call:
## lm(formula = TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + 
##     IMP_VALUE + M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + 
##     IMP_DELINQ + M_DELINQ + IMP_CLAGE + IMP_NINQ + IMP_CLNO + 
##     M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Other + FLAG.Job.ProfExe + 
##     FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.HomeImp, data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -16031  -2578   -324   1563  58518 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -6.072e+03  4.897e+02 -12.400  < 2e-16 ***
## LOAN                 1.410e-01  8.429e-03  16.728  < 2e-16 ***
## IMP_MORTDUE         -1.224e-02  3.527e-03  -3.469 0.000527 ***
## M_MORTDUE            1.261e+03  3.456e+02   3.648 0.000267 ***
## IMP_VALUE            1.428e-02  2.781e-03   5.137 2.92e-07 ***
## M_VALUE              6.583e+03  6.563e+02  10.031  < 2e-16 ***
## IMP_YOJ             -3.407e+01  1.217e+01  -2.799 0.005144 ** 
## M_YOJ               -6.751e+02  3.418e+02  -1.975 0.048305 *  
## IMP_DEROG            8.763e+02  1.126e+02   7.782 8.98e-15 ***
## M_DEROG             -2.533e+03  4.201e+02  -6.029 1.79e-09 ***
## IMP_DELINQ           1.889e+03  8.967e+01  21.066  < 2e-16 ***
## M_DELINQ            -1.947e+03  4.898e+02  -3.974 7.19e-05 ***
## IMP_CLAGE           -1.004e+01  1.109e+00  -9.045  < 2e-16 ***
## IMP_NINQ             1.877e+02  5.501e+01   3.411 0.000653 ***
## IMP_CLNO             4.598e+01  9.920e+00   4.635 3.69e-06 ***
## M_CLNO               3.000e+03  6.041e+02   4.966 7.13e-07 ***
## IMP_DEBTINC          1.167e+02  1.133e+01  10.295  < 2e-16 ***
## M_DEBTINC            5.963e+03  2.226e+02  26.794  < 2e-16 ***
## FLAG.Job.Other       4.159e+02  2.056e+02   2.023 0.043185 *  
## FLAG.Job.ProfExe     4.117e+02  2.484e+02   1.657 0.097593 .  
## FLAG.Job.Sales       3.619e+03  6.710e+02   5.393 7.31e-08 ***
## FLAG.Job.Self        1.430e+03  5.191e+02   2.754 0.005914 ** 
## FLAG.Reason.HomeImp -5.267e+02  1.947e+02  -2.706 0.006840 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5484 on 4145 degrees of freedom
## Multiple R-squared:  0.427,  Adjusted R-squared:  0.424 
## F-statistic: 140.4 on 22 and 4145 DF,  p-value: < 2.2e-16
plr = predict( lr_model, test )
head( plr )
##         2         4         8        17        18        22 
##  7127.810 12239.854 -2256.998 16925.028  2243.138  3782.327
RMSElr = sqrt( mean( (test$TARGET_LOSS_AMT - plr )^2 ) )

# LR STEP TREE
treeVars = TreeAnova$variable.importance
treeVars = names(treeVars)
treeVarsPlus = paste( treeVars, collapse="+")
F = as.formula( paste( "TARGET_LOSS_AMT ~", treeVarsPlus ))

tree_LR = lm( F, data=train )
theLower_LR = lm( TARGET_LOSS_AMT ~ 1, data=train )

summary( tree_LR )
## 
## Call:
## lm(formula = F, data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -17048  -2487   -356   1547  58341 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -5.642e+03  6.470e+02  -8.720  < 2e-16 ***
## LOAN                 1.379e-01  8.474e-03  16.275  < 2e-16 ***
## M_DEBTINC            6.197e+03  2.210e+02  28.047  < 2e-16 ***
## IMP_DEBTINC          1.218e+02  1.135e+01  10.727  < 2e-16 ***
## IMP_DELINQ           1.849e+03  9.021e+01  20.501  < 2e-16 ***
## IMP_VALUE            1.496e-02  2.785e-03   5.371 8.27e-08 ***
## IMP_MORTDUE         -1.281e-02  3.533e-03  -3.626 0.000291 ***
## IMP_DEROG            9.097e+02  1.122e+02   8.109 6.65e-16 ***
## IMP_CLAGE           -1.011e+01  1.111e+00  -9.095  < 2e-16 ***
## IMP_CLNO             5.043e+01  9.918e+00   5.085 3.85e-07 ***
## FLAG.Reason.HomeImp -7.578e+02  4.839e+02  -1.566 0.117434    
## FLAG.Reason.DebtCon -1.360e+02  4.764e+02  -0.286 0.775215    
## M_VALUE              6.679e+03  6.613e+02  10.099  < 2e-16 ***
## M_DEROG             -2.275e+03  4.189e+02  -5.430 5.94e-08 ***
## M_DELINQ            -1.613e+03  5.646e+02  -2.857 0.004301 ** 
## M_NINQ               6.313e+02  5.118e+02   1.234 0.217440    
## M_MORTDUE            1.323e+03  3.488e+02   3.794 0.000151 ***
## M_YOJ               -4.643e+02  3.416e+02  -1.359 0.174244    
## FLAG.Job.Self        1.078e+03  5.073e+02   2.124 0.033707 *  
## IMP_YOJ             -3.595e+01  1.223e+01  -2.940 0.003297 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5522 on 4148 degrees of freedom
## Multiple R-squared:  0.4188, Adjusted R-squared:  0.4161 
## F-statistic: 157.3 on 19 and 4148 DF,  p-value: < 2.2e-16
summary( theLower_LR )
## 
## Call:
## lm(formula = TARGET_LOSS_AMT ~ 1, data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -2676  -2676  -2676  -2676  76311 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2676.0      111.9   23.91   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7226 on 4167 degrees of freedom
plr_tree = predict( tree_LR, test )
head( plr_tree )
##          2          4          8         17         18         22 
##  7285.6669 11561.0068 -2270.8502 17262.7927   897.4124  3818.7355
RMSElr_tree = sqrt( mean( ( test$TARGET_LOSS_AMT - plr_tree )^2 ) )

# TREE FORWARD STEPWISE SELECTION
lrt_model = stepAIC(theLower_LR, direction="forward", scope=list(lower=theLower_LR, upper=tree_LR))
## Start:  AIC=74070.21
## TARGET_LOSS_AMT ~ 1
## 
##                       Df  Sum of Sq        RSS   AIC
## + M_DEBTINC            1 4.2985e+10 1.7460e+11 73155
## + IMP_DELINQ           1 2.8566e+10 1.8902e+11 73486
## + M_VALUE              1 1.1486e+10 2.0610e+11 73846
## + IMP_DEROG            1 1.0979e+10 2.0661e+11 73856
## + LOAN                 1 9.3230e+09 2.0827e+11 73890
## + IMP_DEBTINC          1 8.6779e+09 2.0891e+11 73903
## + IMP_CLAGE            1 3.2843e+09 2.1431e+11 74009
## + IMP_CLNO             1 3.0963e+09 2.1449e+11 74012
## + IMP_VALUE            1 2.4769e+09 2.1511e+11 74024
## + FLAG.Job.Self        1 1.7376e+09 2.1585e+11 74039
## + IMP_MORTDUE          1 1.1685e+09 2.1642e+11 74050
## + FLAG.Reason.DebtCon  1 1.0899e+09 2.1650e+11 74051
## + FLAG.Reason.HomeImp  1 1.0621e+09 2.1653e+11 74052
## + M_DELINQ             1 6.4479e+08 2.1694e+11 74060
## + M_DEROG              1 5.0857e+08 2.1708e+11 74062
## + M_NINQ               1 4.1887e+08 2.1717e+11 74064
## + M_YOJ                1 3.0633e+08 2.1728e+11 74066
## + M_MORTDUE            1 1.9024e+08 2.1740e+11 74069
## + IMP_YOJ              1 1.6165e+08 2.1743e+11 74069
## <none>                              2.1759e+11 74070
## 
## Step:  AIC=73154.88
## TARGET_LOSS_AMT ~ M_DEBTINC
## 
##                       Df  Sum of Sq        RSS   AIC
## + IMP_DELINQ           1 1.6104e+10 1.5850e+11 72754
## + LOAN                 1 1.4591e+10 1.6001e+11 72793
## + IMP_DEBTINC          1 6.3106e+09 1.6829e+11 73003
## + M_VALUE              1 5.4902e+09 1.6911e+11 73024
## + IMP_DEROG            1 4.9960e+09 1.6961e+11 73036
## + IMP_VALUE            1 3.5787e+09 1.7103e+11 73071
## + IMP_CLNO             1 3.1968e+09 1.7141e+11 73080
## + IMP_MORTDUE          1 1.9240e+09 1.7268e+11 73111
## + FLAG.Job.Self        1 1.6453e+09 1.7296e+11 73117
## + IMP_CLAGE            1 1.4958e+09 1.7311e+11 73121
## + FLAG.Reason.DebtCon  1 1.4535e+09 1.7315e+11 73122
## + FLAG.Reason.HomeImp  1 1.4260e+09 1.7318e+11 73123
## + M_DELINQ             1 4.6854e+08 1.7414e+11 73146
## + M_DEROG              1 3.4032e+08 1.7426e+11 73149
## + M_NINQ               1 3.0266e+08 1.7430e+11 73150
## + M_MORTDUE            1 2.0576e+08 1.7440e+11 73152
## + M_YOJ                1 1.2972e+08 1.7447e+11 73154
## <none>                              1.7460e+11 73155
## + IMP_YOJ              1 8.3110e+07 1.7452e+11 73155
## 
## Step:  AIC=72753.57
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ
## 
##                       Df  Sum of Sq        RSS   AIC
## + LOAN                 1 1.4874e+10 1.4363e+11 72345
## + IMP_DEBTINC          1 5.6023e+09 1.5290e+11 72606
## + IMP_VALUE            1 4.0549e+09 1.5445e+11 72648
## + M_VALUE              1 3.4269e+09 1.5507e+11 72664
## + M_DEROG              1 2.8353e+09 1.5567e+11 72680
## + IMP_CLAGE            1 1.9153e+09 1.5659e+11 72705
## + IMP_MORTDUE          1 1.8874e+09 1.5661e+11 72706
## + M_DELINQ             1 1.8418e+09 1.5666e+11 72707
## + IMP_DEROG            1 1.7732e+09 1.5673e+11 72709
## + FLAG.Job.Self        1 1.5504e+09 1.5695e+11 72715
## + FLAG.Reason.DebtCon  1 1.4422e+09 1.5706e+11 72717
## + IMP_CLNO             1 1.4164e+09 1.5708e+11 72718
## + M_NINQ               1 1.4129e+09 1.5709e+11 72718
## + FLAG.Reason.HomeImp  1 1.1616e+09 1.5734e+11 72725
## + M_MORTDUE            1 2.4227e+08 1.5826e+11 72749
## + IMP_YOJ              1 2.2912e+08 1.5827e+11 72750
## + M_YOJ                1 1.9667e+08 1.5830e+11 72750
## <none>                              1.5850e+11 72754
## 
## Step:  AIC=72344.85
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN
## 
##                       Df  Sum of Sq        RSS   AIC
## + IMP_DEBTINC          1 4382342554 1.3924e+11 72218
## + M_VALUE              1 3040572111 1.4059e+11 72258
## + IMP_CLAGE            1 2818079413 1.4081e+11 72264
## + M_DEROG              1 1840613969 1.4179e+11 72293
## + IMP_DEROG            1 1734283481 1.4189e+11 72296
## + M_DELINQ             1 1417152320 1.4221e+11 72306
## + M_NINQ               1  901508310 1.4272e+11 72321
## + IMP_YOJ              1  859705364 1.4277e+11 72322
## + IMP_CLNO             1  794603825 1.4283e+11 72324
## + IMP_VALUE            1  630648240 1.4300e+11 72329
## + FLAG.Reason.DebtCon  1  371236657 1.4326e+11 72336
## + FLAG.Job.Self        1  367115990 1.4326e+11 72336
## + M_YOJ                1  363655615 1.4326e+11 72336
## + IMP_MORTDUE          1  338903409 1.4329e+11 72337
## + FLAG.Reason.HomeImp  1  289762507 1.4334e+11 72338
## + M_MORTDUE            1  176392369 1.4345e+11 72342
## <none>                              1.4363e+11 72345
## 
## Step:  AIC=72217.69
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC
## 
##                       Df  Sum of Sq        RSS   AIC
## + M_VALUE              1 3261573768 1.3598e+11 72121
## + IMP_CLAGE            1 2442457353 1.3680e+11 72146
## + M_DEROG              1 1871059034 1.3737e+11 72163
## + IMP_DEROG            1 1681490508 1.3756e+11 72169
## + M_DELINQ             1 1207409723 1.3804e+11 72183
## + IMP_YOJ              1  671007839 1.3857e+11 72200
## + M_NINQ               1  566586131 1.3868e+11 72203
## + IMP_VALUE            1  363744397 1.3888e+11 72209
## + IMP_CLNO             1  329813904 1.3891e+11 72210
## + FLAG.Job.Self        1  321592197 1.3892e+11 72210
## + M_YOJ                1  313832007 1.3893e+11 72210
## + FLAG.Reason.HomeImp  1  257845757 1.3899e+11 72212
## + FLAG.Reason.DebtCon  1  236493592 1.3901e+11 72213
## + IMP_MORTDUE          1  104153521 1.3914e+11 72217
## <none>                              1.3924e+11 72218
## + M_MORTDUE            1    5870760 1.3924e+11 72220
## 
## Step:  AIC=72120.9
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC + 
##     M_VALUE
## 
##                       Df  Sum of Sq        RSS   AIC
## + IMP_CLAGE            1 2417174554 1.3357e+11 72048
## + M_DEROG              1 2063724215 1.3392e+11 72059
## + M_DELINQ             1 1399521757 1.3458e+11 72080
## + IMP_DEROG            1 1324850098 1.3466e+11 72082
## + IMP_YOJ              1  644362288 1.3534e+11 72103
## + M_NINQ               1  620583975 1.3536e+11 72104
## + IMP_VALUE            1  425540654 1.3556e+11 72110
## + IMP_CLNO             1  349260052 1.3563e+11 72112
## + M_YOJ                1  334603026 1.3565e+11 72113
## + FLAG.Reason.DebtCon  1  317232133 1.3567e+11 72113
## + FLAG.Job.Self        1  279790362 1.3570e+11 72114
## + FLAG.Reason.HomeImp  1  271577259 1.3571e+11 72115
## + IMP_MORTDUE          1   82929578 1.3590e+11 72120
## <none>                              1.3598e+11 72121
## + M_MORTDUE            1   33675071 1.3595e+11 72122
## 
## Step:  AIC=72048.15
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC + 
##     M_VALUE + IMP_CLAGE
## 
##                       Df  Sum of Sq        RSS   AIC
## + M_DEROG              1 2038154395 1.3153e+11 71986
## + M_DELINQ             1 1598723672 1.3197e+11 72000
## + IMP_DEROG            1 1112720283 1.3245e+11 72015
## + IMP_CLNO             1  981805526 1.3258e+11 72019
## + IMP_VALUE            1  785067759 1.3278e+11 72026
## + M_NINQ               1  687476699 1.3288e+11 72029
## + IMP_YOJ              1  281389216 1.3328e+11 72041
## + FLAG.Job.Self        1  230543969 1.3333e+11 72043
## + M_YOJ                1  224083673 1.3334e+11 72043
## + IMP_MORTDUE          1  204621394 1.3336e+11 72044
## + FLAG.Reason.DebtCon  1  190532848 1.3337e+11 72044
## + FLAG.Reason.HomeImp  1  176483568 1.3339e+11 72045
## <none>                              1.3357e+11 72048
## + M_MORTDUE            1   21665019 1.3354e+11 72049
## 
## Step:  AIC=71986.05
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC + 
##     M_VALUE + IMP_CLAGE + M_DEROG
## 
##                       Df  Sum of Sq        RSS   AIC
## + IMP_DEROG            1 2135460095 1.2939e+11 71920
## + IMP_CLNO             1 1050943433 1.3048e+11 71955
## + IMP_VALUE            1  758647495 1.3077e+11 71964
## + IMP_YOJ              1  339256390 1.3119e+11 71977
## + FLAG.Reason.HomeImp  1  251367594 1.3128e+11 71980
## + IMP_MORTDUE          1  233447781 1.3129e+11 71981
## + FLAG.Reason.DebtCon  1  186168086 1.3134e+11 71982
## + FLAG.Job.Self        1  179278280 1.3135e+11 71982
## + M_DELINQ             1   95610280 1.3143e+11 71985
## <none>                              1.3153e+11 71986
## + M_YOJ                1   32656131 1.3149e+11 71987
## + M_MORTDUE            1   13140421 1.3151e+11 71988
## + M_NINQ               1    6036669 1.3152e+11 71988
## 
## Step:  AIC=71919.83
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC + 
##     M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG
## 
##                       Df Sum of Sq        RSS   AIC
## + IMP_CLNO             1 930428675 1.2846e+11 71892
## + IMP_VALUE            1 841534830 1.2855e+11 71895
## + IMP_MORTDUE          1 286457586 1.2911e+11 71913
## + IMP_YOJ              1 252248930 1.2914e+11 71914
## + FLAG.Reason.HomeImp  1 238236907 1.2915e+11 71914
## + M_DELINQ             1 215786383 1.2918e+11 71915
## + FLAG.Job.Self        1 197705488 1.2919e+11 71915
## + FLAG.Reason.DebtCon  1 180065494 1.2921e+11 71916
## + M_YOJ                1  73072118 1.2932e+11 71919
## <none>                             1.2939e+11 71920
## + M_NINQ               1   4155585 1.2939e+11 71922
## + M_MORTDUE            1   3874039 1.2939e+11 71922
## 
## Step:  AIC=71891.75
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC + 
##     M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO
## 
##                       Df Sum of Sq        RSS   AIC
## + IMP_VALUE            1 520419939 1.2794e+11 71877
## + IMP_YOJ              1 246282855 1.2821e+11 71886
## + FLAG.Job.Self        1 187256646 1.2827e+11 71888
## + M_DELINQ             1 161400460 1.2830e+11 71889
## + FLAG.Reason.HomeImp  1 154305048 1.2831e+11 71889
## + FLAG.Reason.DebtCon  1  96549754 1.2836e+11 71891
## + M_MORTDUE            1  87637800 1.2837e+11 71891
## + IMP_MORTDUE          1  77176790 1.2838e+11 71891
## <none>                             1.2846e+11 71892
## + M_YOJ                1  17344982 1.2844e+11 71893
## + M_NINQ               1    449465 1.2846e+11 71894
## 
## Step:  AIC=71876.83
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC + 
##     M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE
## 
##                       Df Sum of Sq        RSS   AIC
## + IMP_YOJ              1 209903284 1.2773e+11 71872
## + IMP_MORTDUE          1 199669995 1.2774e+11 71872
## + FLAG.Reason.HomeImp  1 199449417 1.2774e+11 71872
## + M_MORTDUE            1 171310165 1.2777e+11 71873
## + FLAG.Reason.DebtCon  1 134929142 1.2781e+11 71874
## + M_DELINQ             1 131241955 1.2781e+11 71875
## + FLAG.Job.Self        1 125703535 1.2782e+11 71875
## <none>                             1.2794e+11 71877
## + M_YOJ                1   5175937 1.2794e+11 71879
## + M_NINQ               1     40992 1.2794e+11 71879
## 
## Step:  AIC=71871.98
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC + 
##     M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE + 
##     IMP_YOJ
## 
##                       Df Sum of Sq        RSS   AIC
## + IMP_MORTDUE          1 262961061 1.2747e+11 71865
## + FLAG.Reason.HomeImp  1 181555262 1.2755e+11 71868
## + M_DELINQ             1 167151797 1.2756e+11 71869
## + M_MORTDUE            1 151757897 1.2758e+11 71869
## + FLAG.Reason.DebtCon  1 122757177 1.2761e+11 71870
## + FLAG.Job.Self        1 107630208 1.2762e+11 71870
## <none>                             1.2773e+11 71872
## + M_YOJ                1  13672561 1.2772e+11 71874
## + M_NINQ               1   1565056 1.2773e+11 71874
## 
## Step:  AIC=71865.4
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC + 
##     M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE + 
##     IMP_YOJ + IMP_MORTDUE
## 
##                       Df Sum of Sq        RSS   AIC
## + M_MORTDUE            1 265350493 1.2720e+11 71859
## + FLAG.Reason.HomeImp  1 188691909 1.2728e+11 71861
## + M_DELINQ             1 157320316 1.2731e+11 71862
## + FLAG.Reason.DebtCon  1 132390092 1.2734e+11 71863
## + FLAG.Job.Self        1 102522707 1.2737e+11 71864
## <none>                             1.2747e+11 71865
## + M_YOJ                1  11725976 1.2746e+11 71867
## + M_NINQ               1   1959077 1.2747e+11 71867
## 
## Step:  AIC=71858.71
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC + 
##     M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE + 
##     IMP_YOJ + IMP_MORTDUE + M_MORTDUE
## 
##                       Df Sum of Sq        RSS   AIC
## + FLAG.Reason.HomeImp  1 258652854 1.2694e+11 71852
## + M_DELINQ             1 234328337 1.2697e+11 71853
## + FLAG.Reason.DebtCon  1 221215436 1.2698e+11 71853
## + FLAG.Job.Self        1  94363758 1.2711e+11 71858
## + M_YOJ                1  65602818 1.2714e+11 71859
## <none>                             1.2720e+11 71859
## + M_NINQ               1  13784086 1.2719e+11 71860
## 
## Step:  AIC=71852.23
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC + 
##     M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE + 
##     IMP_YOJ + IMP_MORTDUE + M_MORTDUE + FLAG.Reason.HomeImp
## 
##                       Df Sum of Sq        RSS   AIC
## + M_DELINQ             1 229643185 1.2671e+11 71847
## + FLAG.Job.Self        1 146413335 1.2680e+11 71849
## + M_YOJ                1  74623693 1.2687e+11 71852
## <none>                             1.2694e+11 71852
## + M_NINQ               1  10140778 1.2693e+11 71854
## + FLAG.Reason.DebtCon  1    178835 1.2694e+11 71854
## 
## Step:  AIC=71846.68
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC + 
##     M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE + 
##     IMP_YOJ + IMP_MORTDUE + M_MORTDUE + FLAG.Reason.HomeImp + 
##     M_DELINQ
## 
##                       Df Sum of Sq        RSS   AIC
## + FLAG.Job.Self        1 153257181 1.2656e+11 71844
## <none>                             1.2671e+11 71847
## + M_NINQ               1  47816102 1.2667e+11 71847
## + M_YOJ                1  43983881 1.2667e+11 71847
## + FLAG.Reason.DebtCon  1   1138654 1.2671e+11 71849
## 
## Step:  AIC=71843.64
## TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + IMP_DEBTINC + 
##     M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + IMP_CLNO + IMP_VALUE + 
##     IMP_YOJ + IMP_MORTDUE + M_MORTDUE + FLAG.Reason.HomeImp + 
##     M_DELINQ + FLAG.Job.Self
## 
##                       Df Sum of Sq        RSS   AIC
## <none>                             1.2656e+11 71844
## + M_YOJ                1  41931294 1.2652e+11 71844
## + M_NINQ               1  36078051 1.2652e+11 71844
## + FLAG.Reason.DebtCon  1   1131206 1.2656e+11 71846
summary( lrt_model )
## 
## Call:
## lm(formula = TARGET_LOSS_AMT ~ M_DEBTINC + IMP_DELINQ + LOAN + 
##     IMP_DEBTINC + M_VALUE + IMP_CLAGE + M_DEROG + IMP_DEROG + 
##     IMP_CLNO + IMP_VALUE + IMP_YOJ + IMP_MORTDUE + M_MORTDUE + 
##     FLAG.Reason.HomeImp + M_DELINQ + FLAG.Job.Self, data = train)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -17087  -2499   -374   1587  58426 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -5.708e+03  4.783e+02 -11.934  < 2e-16 ***
## M_DEBTINC            6.200e+03  2.209e+02  28.064  < 2e-16 ***
## IMP_DELINQ           1.854e+03  9.011e+01  20.578  < 2e-16 ***
## LOAN                 1.366e-01  8.414e-03  16.228  < 2e-16 ***
## IMP_DEBTINC          1.194e+02  1.125e+01  10.613  < 2e-16 ***
## M_VALUE              6.684e+03  6.585e+02  10.150  < 2e-16 ***
## IMP_CLAGE           -1.024e+01  1.104e+00  -9.282  < 2e-16 ***
## M_DEROG             -2.216e+03  4.160e+02  -5.328 1.05e-07 ***
## IMP_DEROG            9.099e+02  1.120e+02   8.122 5.99e-16 ***
## IMP_CLNO             5.133e+01  9.895e+00   5.187 2.24e-07 ***
## IMP_VALUE            1.511e-02  2.774e-03   5.447 5.42e-08 ***
## IMP_YOJ             -3.490e+01  1.219e+01  -2.864 0.004207 ** 
## IMP_MORTDUE         -1.283e-02  3.522e-03  -3.642 0.000274 ***
## M_MORTDUE            1.240e+03  3.356e+02   3.695 0.000223 ***
## FLAG.Reason.HomeImp -6.171e+02  1.945e+02  -3.173 0.001520 ** 
## M_DELINQ            -1.278e+03  4.589e+02  -2.785 0.005377 ** 
## FLAG.Job.Self        1.134e+03  5.057e+02   2.242 0.025014 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5522 on 4151 degrees of freedom
## Multiple R-squared:  0.4183, Adjusted R-squared:  0.4161 
## F-statistic: 186.6 on 16 and 4151 DF,  p-value: < 2.2e-16
plr_tree_step = predict( lrt_model, test )
head( plr_tree_step )
##          2          4          8         17         18         22 
##  7300.5000 11579.4541 -2273.9803 17308.8567   619.5205  3801.3538
RMSElr_tree_step = sqrt( mean( ( test$TARGET_LOSS_AMT - plr_tree_step )^2 ) )


print( paste("TREE RMSE=", RMSEt ))
## [1] "TREE RMSE= 4996.32928769835"
print( paste("RF RMSE=", RMSEr ))
## [1] "RF RMSE= 4020.42485050438"
print( paste("GB RMSE=", RMSEg ))
## [1] "GB RMSE= 5843.09744413598"
print( paste("LR BACK RMSE=",  RMSElr ))
## [1] "LR BACK RMSE= 5449.94944750639"
print( paste("LR TREE RMSE=",  RMSElr_tree ))
## [1] "LR TREE RMSE= 5504.81050975738"
print( paste("LR TREE FORWARD STEP RMSE=", RMSElr_tree_step ))
## [1] "LR TREE FORWARD STEP RMSE= 5510.20534361442"
# !!!SUMMARY & ANALYSIS!!!
# Based on the RMSE, the Random Forest method is the best. 
# Random Forest RMSE is significantly lower($800+) than other methods,
# Recommending Random Forest because its remarkable accuracy.


# Step 4 - Probability / Severity Model

# Create the data set as 70% of training and 30% test set
sample<- sample(c(TRUE,FALSE),nrow(copy_wk5),replace = TRUE,prob = c(0.7,0.3))
train<- copy_wk5[sample,]
test<- copy_wk5[!sample,]

# Predict Bad Flag Using Logistic Regression
theUpper_LR = glm(TARGET_BAD_FLAG ~ . - TARGET_LOSS_AMT, family = "binomial", data = train)
theLower_LR = glm(TARGET_BAD_FLAG ~ 1, family = "binomial", data = train)
LR_flag = stepAIC(theUpper_LR, direction = "backward", scope = list(lower = theLower_LR, upper = theUpper_LR))
## Start:  AIC=2251.09
## TARGET_BAD_FLAG ~ (TARGET_LOSS_AMT + LOAN + IMP_MORTDUE + M_MORTDUE + 
##     IMP_VALUE + M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + 
##     IMP_DELINQ + M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + 
##     M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + 
##     FLAG.Job.Office + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + 
##     FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp) - 
##     TARGET_LOSS_AMT
## 
##                       Df Deviance    AIC
## - FLAG.Reason.DebtCon  1   2195.1 2249.1
## - FLAG.Reason.HomeImp  1   2195.2 2249.2
## - M_NINQ               1   2195.4 2249.4
## - LOAN                 1   2195.9 2249.9
## <none>                     2195.1 2251.1
## - IMP_CLNO             1   2197.5 2251.5
## - IMP_YOJ              1   2198.1 2252.1
## - M_CLAGE              1   2199.2 2253.2
## - IMP_MORTDUE          1   2199.3 2253.3
## - M_YOJ                1   2200.1 2254.1
## - IMP_VALUE            1   2201.5 2255.5
## - M_MORTDUE            1   2202.0 2256.0
## - FLAG.Job.Office      1   2202.6 2256.6
## - M_DELINQ             1   2208.2 2262.2
## - FLAG.Job.ProfExe     1   2211.6 2265.6
## - M_CLNO               1   2215.2 2269.2
## - FLAG.Job.Mgr         1   2215.3 2269.3
## - FLAG.Job.Self        1   2216.4 2270.4
## - IMP_NINQ             1   2216.8 2270.8
## - FLAG.Job.Other       1   2218.2 2272.2
## - FLAG.Job.Sales       1   2219.5 2273.5
## - M_DEROG              1   2254.7 2308.7
## - IMP_CLAGE            1   2258.1 2312.1
## - IMP_DEROG            1   2259.4 2313.4
## - IMP_DEBTINC          1   2290.8 2344.8
## - M_VALUE              1   2316.4 2370.4
## - IMP_DELINQ           1   2408.4 2462.4
## - M_DEBTINC            1   2853.2 2907.2
## 
## Step:  AIC=2249.1
## TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + M_NINQ + IMP_CLNO + 
##     M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + 
##     FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + 
##     FLAG.Reason.HomeImp
## 
##                       Df Deviance    AIC
## - FLAG.Reason.HomeImp  1   2195.3 2247.3
## - M_NINQ               1   2195.4 2247.4
## - LOAN                 1   2195.9 2247.9
## <none>                     2195.1 2249.1
## - IMP_CLNO             1   2197.5 2249.5
## - IMP_YOJ              1   2198.1 2250.1
## - M_CLAGE              1   2199.2 2251.2
## - IMP_MORTDUE          1   2199.3 2251.3
## - M_YOJ                1   2200.2 2252.2
## - IMP_VALUE            1   2201.5 2253.5
## - M_MORTDUE            1   2202.0 2254.0
## - FLAG.Job.Office      1   2203.3 2255.3
## - M_DELINQ             1   2208.2 2260.2
## - FLAG.Job.ProfExe     1   2212.9 2264.9
## - M_CLNO               1   2215.2 2267.2
## - IMP_NINQ             1   2216.8 2268.8
## - FLAG.Job.Mgr         1   2216.8 2268.8
## - FLAG.Job.Self        1   2217.4 2269.4
## - FLAG.Job.Other       1   2220.4 2272.4
## - FLAG.Job.Sales       1   2220.7 2272.7
## - M_DEROG              1   2254.7 2306.7
## - IMP_CLAGE            1   2258.4 2310.4
## - IMP_DEROG            1   2259.4 2311.4
## - IMP_DEBTINC          1   2290.9 2342.9
## - M_VALUE              1   2317.5 2369.5
## - IMP_DELINQ           1   2408.6 2460.6
## - M_DEBTINC            1   2853.7 2905.7
## 
## Step:  AIC=2247.29
## TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + M_NINQ + IMP_CLNO + 
##     M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + 
##     FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self
## 
##                    Df Deviance    AIC
## - M_NINQ            1   2195.6 2245.6
## - LOAN              1   2196.3 2246.3
## <none>                  2195.3 2247.3
## - IMP_CLNO          1   2197.9 2247.9
## - IMP_YOJ           1   2198.3 2248.3
## - M_CLAGE           1   2199.3 2249.3
## - IMP_MORTDUE       1   2199.7 2249.7
## - M_YOJ             1   2200.6 2250.6
## - IMP_VALUE         1   2202.0 2252.0
## - M_MORTDUE         1   2202.8 2252.8
## - FLAG.Job.Office   1   2203.7 2253.7
## - M_DELINQ          1   2208.4 2258.4
## - FLAG.Job.ProfExe  1   2213.6 2263.6
## - M_CLNO            1   2215.8 2265.8
## - IMP_NINQ          1   2216.8 2266.8
## - FLAG.Job.Mgr      1   2217.4 2267.4
## - FLAG.Job.Self     1   2218.6 2268.6
## - FLAG.Job.Other    1   2221.2 2271.2
## - FLAG.Job.Sales    1   2221.2 2271.2
## - M_DEROG           1   2255.5 2305.5
## - IMP_CLAGE         1   2258.4 2308.4
## - IMP_DEROG         1   2260.0 2310.0
## - IMP_DEBTINC       1   2291.3 2341.3
## - M_VALUE           1   2317.7 2367.7
## - IMP_DELINQ        1   2409.1 2459.1
## - M_DEBTINC         1   2855.8 2905.8
## 
## Step:  AIC=2245.57
## TARGET_BAD_FLAG ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO + 
##     IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + 
##     FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self
## 
##                    Df Deviance    AIC
## - LOAN              1   2196.6 2244.6
## <none>                  2195.6 2245.6
## - IMP_CLNO          1   2198.2 2246.2
## - IMP_YOJ           1   2198.6 2246.6
## - M_CLAGE           1   2199.6 2247.6
## - IMP_MORTDUE       1   2199.9 2247.9
## - M_YOJ             1   2201.3 2249.3
## - IMP_VALUE         1   2202.2 2250.2
## - M_MORTDUE         1   2202.9 2250.9
## - FLAG.Job.Office   1   2203.9 2251.9
## - FLAG.Job.ProfExe  1   2213.9 2261.9
## - M_DELINQ          1   2215.7 2263.7
## - M_CLNO            1   2216.8 2264.8
## - IMP_NINQ          1   2217.4 2265.4
## - FLAG.Job.Mgr      1   2217.7 2265.7
## - FLAG.Job.Self     1   2218.6 2266.6
## - FLAG.Job.Other    1   2221.5 2269.5
## - FLAG.Job.Sales    1   2221.6 2269.6
## - M_DEROG           1   2255.8 2303.8
## - IMP_CLAGE         1   2258.5 2306.5
## - IMP_DEROG         1   2260.1 2308.1
## - IMP_DEBTINC       1   2293.6 2341.6
## - M_VALUE           1   2320.0 2368.0
## - IMP_DELINQ        1   2409.1 2457.1
## - M_DEBTINC         1   2857.6 2905.6
## 
## Step:  AIC=2244.59
## TARGET_BAD_FLAG ~ IMP_MORTDUE + M_MORTDUE + IMP_VALUE + M_VALUE + 
##     IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + M_DELINQ + 
##     IMP_CLAGE + M_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + 
##     M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + FLAG.Job.Other + 
##     FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self
## 
##                    Df Deviance    AIC
## <none>                  2196.6 2244.6
## - IMP_CLNO          1   2199.5 2245.5
## - IMP_YOJ           1   2200.1 2246.1
## - M_CLAGE           1   2200.4 2246.4
## - IMP_MORTDUE       1   2200.9 2246.9
## - IMP_VALUE         1   2202.3 2248.3
## - M_YOJ             1   2202.9 2248.9
## - M_MORTDUE         1   2203.7 2249.7
## - FLAG.Job.Office   1   2204.8 2250.8
## - FLAG.Job.ProfExe  1   2214.9 2260.9
## - IMP_NINQ          1   2217.7 2263.7
## - M_DELINQ          1   2218.1 2264.1
## - FLAG.Job.Mgr      1   2218.5 2264.5
## - FLAG.Job.Self     1   2219.0 2265.0
## - M_CLNO            1   2219.3 2265.3
## - FLAG.Job.Other    1   2222.4 2268.4
## - FLAG.Job.Sales    1   2223.2 2269.2
## - M_DEROG           1   2256.1 2302.1
## - IMP_CLAGE         1   2260.4 2306.4
## - IMP_DEROG         1   2261.0 2307.0
## - IMP_DEBTINC       1   2293.9 2339.9
## - M_VALUE           1   2320.4 2366.4
## - IMP_DELINQ        1   2411.8 2457.8
## - M_DEBTINC         1   2875.7 2921.7
summary(LR_flag)
## 
## Call:
## glm(formula = TARGET_BAD_FLAG ~ IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + IMP_CLNO + M_CLNO + 
##     IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + 
##     FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self, 
##     family = "binomial", data = train)
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -7.102e+00  6.556e-01 -10.832  < 2e-16 ***
## IMP_MORTDUE      -4.207e-06  2.075e-06  -2.027 0.042664 *  
## M_MORTDUE         6.703e-01  2.481e-01   2.702 0.006888 ** 
## IMP_VALUE         3.346e-06  1.426e-06   2.347 0.018912 *  
## M_VALUE           5.384e+00  6.393e-01   8.422  < 2e-16 ***
## IMP_YOJ          -1.571e-02  8.484e-03  -1.851 0.064130 .  
## M_YOJ            -5.722e-01  2.333e-01  -2.453 0.014187 *  
## IMP_DEROG         5.418e-01  7.453e-02   7.269 3.61e-13 ***
## M_DEROG          -2.293e+00  3.367e-01  -6.810 9.75e-12 ***
## IMP_DELINQ        7.816e-01  6.226e-02  12.555  < 2e-16 ***
## M_DELINQ         -1.908e+00  4.495e-01  -4.244 2.20e-05 ***
## IMP_CLAGE        -6.288e-03  8.306e-04  -7.571 3.71e-14 ***
## M_CLAGE           8.359e-01  4.239e-01   1.972 0.048599 *  
## IMP_NINQ          1.417e-01  3.068e-02   4.620 3.83e-06 ***
## IMP_CLNO         -1.099e-02  6.424e-03  -1.711 0.087086 .  
## M_CLNO            3.331e+00  7.307e-01   4.558 5.15e-06 ***
## IMP_DEBTINC       9.388e-02  1.079e-02   8.700  < 2e-16 ***
## M_DEBTINC         2.812e+00  1.154e-01  24.373  < 2e-16 ***
## FLAG.Job.Mgr      2.163e+00  5.179e-01   4.177 2.95e-05 ***
## FLAG.Job.Office   1.396e+00  5.234e-01   2.667 0.007643 ** 
## FLAG.Job.Other    2.214e+00  5.018e-01   4.412 1.02e-05 ***
## FLAG.Job.ProfExe  1.985e+00  5.172e-01   3.838 0.000124 ***
## FLAG.Job.Sales    3.068e+00  6.220e-01   4.933 8.08e-07 ***
## FLAG.Job.Self     2.552e+00  5.797e-01   4.401 1.08e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4168.3  on 4189  degrees of freedom
## Residual deviance: 2196.6  on 4166  degrees of freedom
## AIC: 2244.6
## 
## Number of Fisher Scoring iterations: 6
# Predict LOSS Using Linear Regression
train_subset = subset(train, TARGET_BAD_FLAG == 1)
theUpperloss_LR = lm(TARGET_LOSS_AMT ~ . , data = train_subset)
theLowerloss_LR = lm(TARGET_LOSS_AMT ~ 1, data = train_subset)
LR_Loss = stepAIC(theUpperloss_LR, direction = "backward", scope = list(lower = theLowerloss_LR, upper = theUpperloss_LR))
## Start:  AIC=13598.56
## TARGET_LOSS_AMT ~ TARGET_BAD_FLAG + LOAN + IMP_MORTDUE + M_MORTDUE + 
##     IMP_VALUE + M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + 
##     IMP_DELINQ + M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + 
##     M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + 
##     FLAG.Job.Office + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + 
##     FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
## 
## 
## Step:  AIC=13598.56
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + M_CLAGE + IMP_NINQ + M_NINQ + IMP_CLNO + 
##     M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + 
##     FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + 
##     FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## - IMP_NINQ             1 6.0976e+05 1.0309e+10 13597
## - M_DELINQ             1 8.4722e+05 1.0309e+10 13597
## - M_VALUE              1 2.9678e+06 1.0312e+10 13597
## - FLAG.Job.Office      1 3.1046e+06 1.0312e+10 13597
## - M_YOJ                1 3.8661e+06 1.0312e+10 13597
## - FLAG.Job.Other       1 4.5477e+06 1.0313e+10 13597
## - FLAG.Job.Mgr         1 6.9592e+06 1.0316e+10 13597
## - FLAG.Job.ProfExe     1 1.1109e+07 1.0320e+10 13598
## - M_NINQ               1 1.3113e+07 1.0322e+10 13598
## - FLAG.Job.Sales       1 2.1519e+07 1.0330e+10 13598
## - M_DEROG              1 2.4887e+07 1.0333e+10 13599
## <none>                              1.0309e+10 13599
## - FLAG.Job.Self        1 2.5532e+07 1.0334e+10 13599
## - FLAG.Reason.HomeImp  1 6.0094e+07 1.0369e+10 13601
## - IMP_DEROG            1 9.4108e+07 1.0403e+10 13604
## - M_MORTDUE            1 1.0032e+08 1.0409e+10 13605
## - IMP_MORTDUE          1 1.0125e+08 1.0410e+10 13605
## - M_CLNO               1 1.5775e+08 1.0466e+10 13609
## - FLAG.Reason.DebtCon  1 2.3273e+08 1.0541e+10 13615
## - IMP_VALUE            1 2.3457e+08 1.0543e+10 13615
## - IMP_YOJ              1 4.0292e+08 1.0711e+10 13628
## - M_CLAGE              1 4.6109e+08 1.0770e+10 13633
## - IMP_DEBTINC          1 1.1473e+09 1.1456e+10 13684
## - IMP_DELINQ           1 1.6127e+09 1.1921e+10 13717
## - IMP_CLAGE            1 1.9883e+09 1.2297e+10 13743
## - IMP_CLNO             1 3.2159e+09 1.3524e+10 13822
## - M_DEBTINC            1 5.7999e+09 1.6108e+10 13967
## - LOAN                 1 5.0514e+10 6.0822e+10 15068
## 
## Step:  AIC=13596.61
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     M_DELINQ + IMP_CLAGE + M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO + 
##     IMP_DEBTINC + M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + 
##     FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + 
##     FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## - M_DELINQ             1 8.1916e+05 1.0310e+10 13595
## - FLAG.Job.Office      1 3.1771e+06 1.0312e+10 13595
## - M_VALUE              1 3.1811e+06 1.0312e+10 13595
## - M_YOJ                1 3.7335e+06 1.0313e+10 13595
## - FLAG.Job.Other       1 4.6211e+06 1.0314e+10 13595
## - FLAG.Job.Mgr         1 7.1038e+06 1.0316e+10 13595
## - FLAG.Job.ProfExe     1 1.1259e+07 1.0320e+10 13596
## - M_NINQ               1 1.3370e+07 1.0323e+10 13596
## - FLAG.Job.Sales       1 2.1297e+07 1.0330e+10 13596
## <none>                              1.0309e+10 13597
## - M_DEROG              1 2.4909e+07 1.0334e+10 13597
## - FLAG.Job.Self        1 2.5308e+07 1.0334e+10 13597
## - FLAG.Reason.HomeImp  1 5.9645e+07 1.0369e+10 13599
## - IMP_DEROG            1 9.6471e+07 1.0406e+10 13602
## - M_MORTDUE            1 9.9727e+07 1.0409e+10 13603
## - IMP_MORTDUE          1 1.0217e+08 1.0411e+10 13603
## - M_CLNO               1 1.5916e+08 1.0468e+10 13607
## - FLAG.Reason.DebtCon  1 2.3281e+08 1.0542e+10 13613
## - IMP_VALUE            1 2.3559e+08 1.0545e+10 13613
## - IMP_YOJ              1 4.0791e+08 1.0717e+10 13627
## - M_CLAGE              1 4.6816e+08 1.0777e+10 13631
## - IMP_DEBTINC          1 1.1467e+09 1.1456e+10 13682
## - IMP_DELINQ           1 1.6123e+09 1.1921e+10 13715
## - IMP_CLAGE            1 2.0135e+09 1.2323e+10 13742
## - IMP_CLNO             1 3.2323e+09 1.3541e+10 13821
## - M_DEBTINC            1 5.8622e+09 1.6171e+10 13968
## - LOAN                 1 5.0645e+10 6.0954e+10 15068
## 
## Step:  AIC=13594.68
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     M_VALUE + IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     IMP_CLAGE + M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + 
##     M_DEBTINC + FLAG.Job.Mgr + FLAG.Job.Office + FLAG.Job.Other + 
##     FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.DebtCon + 
##     FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## - M_VALUE              1 2.9232e+06 1.0313e+10 13593
## - FLAG.Job.Office      1 3.1345e+06 1.0313e+10 13593
## - M_YOJ                1 3.8521e+06 1.0314e+10 13593
## - FLAG.Job.Other       1 4.5520e+06 1.0315e+10 13593
## - FLAG.Job.Mgr         1 6.9977e+06 1.0317e+10 13593
## - FLAG.Job.ProfExe     1 1.1172e+07 1.0321e+10 13594
## - M_NINQ               1 1.3582e+07 1.0324e+10 13594
## - FLAG.Job.Sales       1 2.2234e+07 1.0332e+10 13594
## <none>                              1.0310e+10 13595
## - FLAG.Job.Self        1 2.5351e+07 1.0335e+10 13595
## - M_DEROG              1 2.9633e+07 1.0340e+10 13595
## - FLAG.Reason.HomeImp  1 5.9738e+07 1.0370e+10 13598
## - IMP_DEROG            1 9.6785e+07 1.0407e+10 13600
## - M_MORTDUE            1 9.9236e+07 1.0409e+10 13601
## - IMP_MORTDUE          1 1.0186e+08 1.0412e+10 13601
## - M_CLNO               1 2.1909e+08 1.0529e+10 13610
## - FLAG.Reason.DebtCon  1 2.3299e+08 1.0543e+10 13611
## - IMP_VALUE            1 2.3478e+08 1.0545e+10 13611
## - IMP_YOJ              1 4.0805e+08 1.0718e+10 13625
## - M_CLAGE              1 4.6887e+08 1.0779e+10 13630
## - IMP_DEBTINC          1 1.1462e+09 1.1456e+10 13680
## - IMP_DELINQ           1 1.6131e+09 1.1923e+10 13713
## - IMP_CLAGE            1 2.0135e+09 1.2324e+10 13741
## - IMP_CLNO             1 3.2317e+09 1.3542e+10 13819
## - M_DEBTINC            1 5.8673e+09 1.6177e+10 13966
## - LOAN                 1 5.0699e+10 6.1009e+10 15067
## 
## Step:  AIC=13592.91
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE + 
##     M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + 
##     FLAG.Job.Mgr + FLAG.Job.Office + FLAG.Job.Other + FLAG.Job.ProfExe + 
##     FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## - FLAG.Job.Office      1 3.0359e+06 1.0316e+10 13591
## - M_YOJ                1 4.0598e+06 1.0317e+10 13591
## - FLAG.Job.Other       1 4.3675e+06 1.0317e+10 13591
## - FLAG.Job.Mgr         1 6.8565e+06 1.0320e+10 13592
## - FLAG.Job.ProfExe     1 1.0893e+07 1.0324e+10 13592
## - M_NINQ               1 1.3094e+07 1.0326e+10 13592
## - FLAG.Job.Sales       1 2.2678e+07 1.0336e+10 13593
## <none>                              1.0313e+10 13593
## - FLAG.Job.Self        1 2.5772e+07 1.0339e+10 13593
## - M_DEROG              1 2.7925e+07 1.0341e+10 13593
## - FLAG.Reason.HomeImp  1 6.2237e+07 1.0375e+10 13596
## - IMP_DEROG            1 9.5082e+07 1.0408e+10 13598
## - IMP_MORTDUE          1 9.9413e+07 1.0412e+10 13599
## - M_MORTDUE            1 1.0060e+08 1.0414e+10 13599
## - M_CLNO               1 2.1826e+08 1.0531e+10 13608
## - IMP_VALUE            1 2.3188e+08 1.0545e+10 13609
## - FLAG.Reason.DebtCon  1 2.3900e+08 1.0552e+10 13610
## - IMP_YOJ              1 4.1145e+08 1.0724e+10 13623
## - M_CLAGE              1 4.6637e+08 1.0779e+10 13628
## - IMP_DEBTINC          1 1.1610e+09 1.1474e+10 13679
## - IMP_DELINQ           1 1.6111e+09 1.1924e+10 13711
## - IMP_CLAGE            1 2.0217e+09 1.2335e+10 13739
## - IMP_CLNO             1 3.2478e+09 1.3561e+10 13818
## - M_DEBTINC            1 5.8670e+09 1.6180e+10 13964
## - LOAN                 1 5.1144e+10 6.1457e+10 15071
## 
## Step:  AIC=13591.15
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE + 
##     M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + 
##     FLAG.Job.Mgr + FLAG.Job.Other + FLAG.Job.ProfExe + FLAG.Job.Sales + 
##     FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## - FLAG.Job.Other       1 1.5954e+06 1.0318e+10 13589
## - M_YOJ                1 5.4362e+06 1.0321e+10 13590
## - FLAG.Job.Mgr         1 5.9400e+06 1.0322e+10 13590
## - M_NINQ               1 1.3705e+07 1.0330e+10 13590
## - FLAG.Job.ProfExe     1 1.6310e+07 1.0332e+10 13590
## <none>                              1.0316e+10 13591
## - M_DEROG              1 2.7752e+07 1.0344e+10 13591
## - FLAG.Reason.HomeImp  1 5.9323e+07 1.0375e+10 13594
## - FLAG.Job.Sales       1 8.6207e+07 1.0402e+10 13596
## - M_MORTDUE            1 9.8109e+07 1.0414e+10 13597
## - IMP_DEROG            1 9.9756e+07 1.0416e+10 13597
## - IMP_MORTDUE          1 9.9906e+07 1.0416e+10 13597
## - FLAG.Job.Self        1 1.3831e+08 1.0454e+10 13600
## - M_CLNO               1 2.2521e+08 1.0541e+10 13607
## - IMP_VALUE            1 2.3068e+08 1.0547e+10 13608
## - FLAG.Reason.DebtCon  1 2.4079e+08 1.0557e+10 13608
## - IMP_YOJ              1 4.2274e+08 1.0739e+10 13622
## - M_CLAGE              1 4.6692e+08 1.0783e+10 13626
## - IMP_DEBTINC          1 1.1583e+09 1.1474e+10 13677
## - IMP_DELINQ           1 1.6084e+09 1.1924e+10 13709
## - IMP_CLAGE            1 2.0192e+09 1.2335e+10 13737
## - IMP_CLNO             1 3.2461e+09 1.3562e+10 13816
## - M_DEBTINC            1 5.8766e+09 1.6193e+10 13963
## - LOAN                 1 5.1170e+10 6.1486e+10 15069
## 
## Step:  AIC=13589.28
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE + 
##     M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + 
##     FLAG.Job.Mgr + FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + 
##     FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## - FLAG.Job.Mgr         1 4.5908e+06 1.0322e+10 13588
## - M_YOJ                1 5.1221e+06 1.0323e+10 13588
## - M_NINQ               1 1.3716e+07 1.0331e+10 13588
## - FLAG.Job.ProfExe     1 1.8408e+07 1.0336e+10 13589
## <none>                              1.0318e+10 13589
## - M_DEROG              1 2.7930e+07 1.0345e+10 13590
## - FLAG.Reason.HomeImp  1 5.8585e+07 1.0376e+10 13592
## - M_MORTDUE            1 9.9712e+07 1.0417e+10 13595
## - IMP_DEROG            1 1.0093e+08 1.0418e+10 13595
## - IMP_MORTDUE          1 1.0151e+08 1.0419e+10 13595
## - FLAG.Job.Sales       1 1.1004e+08 1.0428e+10 13596
## - FLAG.Job.Self        1 1.9710e+08 1.0515e+10 13603
## - M_CLNO               1 2.2781e+08 1.0545e+10 13605
## - IMP_VALUE            1 2.3084e+08 1.0548e+10 13606
## - FLAG.Reason.DebtCon  1 2.4022e+08 1.0558e+10 13606
## - IMP_YOJ              1 4.2336e+08 1.0741e+10 13621
## - M_CLAGE              1 4.6830e+08 1.0786e+10 13624
## - IMP_DEBTINC          1 1.1592e+09 1.1477e+10 13676
## - IMP_DELINQ           1 1.6268e+09 1.1944e+10 13709
## - IMP_CLAGE            1 2.0189e+09 1.2336e+10 13735
## - IMP_CLNO             1 3.2474e+09 1.3565e+10 13814
## - M_DEBTINC            1 5.8750e+09 1.6193e+10 13961
## - LOAN                 1 5.1250e+10 6.1568e+10 15068
## 
## Step:  AIC=13587.65
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     IMP_YOJ + M_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE + 
##     M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + 
##     FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.DebtCon + 
##     FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## - M_YOJ                1 6.0563e+06 1.0328e+10 13586
## - FLAG.Job.ProfExe     1 1.4775e+07 1.0337e+10 13587
## - M_NINQ               1 1.4799e+07 1.0337e+10 13587
## <none>                              1.0322e+10 13588
## - M_DEROG              1 2.9931e+07 1.0352e+10 13588
## - FLAG.Reason.HomeImp  1 5.8002e+07 1.0380e+10 13590
## - M_MORTDUE            1 9.7574e+07 1.0420e+10 13594
## - IMP_MORTDUE          1 9.8132e+07 1.0420e+10 13594
## - IMP_DEROG            1 9.8651e+07 1.0421e+10 13594
## - FLAG.Job.Sales       1 1.1705e+08 1.0439e+10 13595
## - FLAG.Job.Self        1 2.1021e+08 1.0532e+10 13602
## - M_CLNO               1 2.2937e+08 1.0552e+10 13604
## - IMP_VALUE            1 2.3161e+08 1.0554e+10 13604
## - FLAG.Reason.DebtCon  1 2.3908e+08 1.0561e+10 13605
## - IMP_YOJ              1 4.2604e+08 1.0748e+10 13619
## - M_CLAGE              1 4.7005e+08 1.0792e+10 13623
## - IMP_DEBTINC          1 1.1592e+09 1.1481e+10 13674
## - IMP_DELINQ           1 1.6269e+09 1.1949e+10 13707
## - IMP_CLAGE            1 2.0149e+09 1.2337e+10 13734
## - IMP_CLNO             1 3.2535e+09 1.3576e+10 13813
## - M_DEBTINC            1 5.8748e+09 1.6197e+10 13959
## - LOAN                 1 5.1322e+10 6.1645e+10 15067
## 
## Step:  AIC=13586.14
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     IMP_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE + 
##     M_CLAGE + M_NINQ + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + 
##     FLAG.Job.ProfExe + FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.DebtCon + 
##     FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## - M_NINQ               1 1.1730e+07 1.0340e+10 13585
## - FLAG.Job.ProfExe     1 1.6347e+07 1.0345e+10 13585
## <none>                              1.0328e+10 13586
## - M_DEROG              1 2.9684e+07 1.0358e+10 13586
## - FLAG.Reason.HomeImp  1 5.6273e+07 1.0384e+10 13589
## - M_MORTDUE            1 9.2571e+07 1.0421e+10 13592
## - IMP_MORTDUE          1 9.6484e+07 1.0425e+10 13592
## - IMP_DEROG            1 9.6980e+07 1.0425e+10 13592
## - FLAG.Job.Sales       1 1.1532e+08 1.0444e+10 13593
## - FLAG.Job.Self        1 2.1104e+08 1.0539e+10 13601
## - M_CLNO               1 2.2619e+08 1.0554e+10 13602
## - IMP_VALUE            1 2.3377e+08 1.0562e+10 13603
## - FLAG.Reason.DebtCon  1 2.3652e+08 1.0565e+10 13603
## - IMP_YOJ              1 4.2889e+08 1.0757e+10 13618
## - M_CLAGE              1 4.7533e+08 1.0804e+10 13621
## - IMP_DEBTINC          1 1.1598e+09 1.1488e+10 13672
## - IMP_DELINQ           1 1.6224e+09 1.1951e+10 13705
## - IMP_CLAGE            1 2.0091e+09 1.2337e+10 13732
## - IMP_CLNO             1 3.2511e+09 1.3579e+10 13811
## - M_DEBTINC            1 5.8732e+09 1.6201e+10 13957
## - LOAN                 1 5.1444e+10 6.1772e+10 15067
## 
## Step:  AIC=13585.08
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     IMP_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE + 
##     M_CLAGE + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.ProfExe + 
##     FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## - FLAG.Job.ProfExe     1 1.6402e+07 1.0356e+10 13584
## <none>                              1.0340e+10 13585
## - M_DEROG              1 2.8174e+07 1.0368e+10 13585
## - FLAG.Reason.HomeImp  1 5.6391e+07 1.0396e+10 13588
## - M_MORTDUE            1 8.9116e+07 1.0429e+10 13590
## - IMP_MORTDUE          1 9.5453e+07 1.0435e+10 13591
## - IMP_DEROG            1 9.6820e+07 1.0437e+10 13591
## - FLAG.Job.Sales       1 1.1628e+08 1.0456e+10 13592
## - FLAG.Job.Self        1 2.1606e+08 1.0556e+10 13600
## - IMP_VALUE            1 2.3838e+08 1.0578e+10 13602
## - FLAG.Reason.DebtCon  1 2.4090e+08 1.0581e+10 13602
## - M_CLNO               1 2.6088e+08 1.0601e+10 13604
## - IMP_YOJ              1 4.2197e+08 1.0762e+10 13616
## - M_CLAGE              1 4.7183e+08 1.0812e+10 13620
## - IMP_DEBTINC          1 1.1693e+09 1.1509e+10 13672
## - IMP_DELINQ           1 1.6163e+09 1.1956e+10 13704
## - IMP_CLAGE            1 1.9995e+09 1.2339e+10 13730
## - IMP_CLNO             1 3.2710e+09 1.3611e+10 13811
## - M_DEBTINC            1 5.8850e+09 1.6225e+10 13957
## - LOAN                 1 5.1499e+10 6.1839e+10 15066
## 
## Step:  AIC=13584.39
## TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + IMP_VALUE + 
##     IMP_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + IMP_CLAGE + 
##     M_CLAGE + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + FLAG.Job.Sales + 
##     FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp
## 
##                       Df  Sum of Sq        RSS   AIC
## <none>                              1.0356e+10 13584
## - M_DEROG              1 3.0998e+07 1.0387e+10 13585
## - FLAG.Reason.HomeImp  1 5.3866e+07 1.0410e+10 13587
## - M_MORTDUE            1 8.3528e+07 1.0440e+10 13589
## - IMP_MORTDUE          1 8.5303e+07 1.0442e+10 13589
## - IMP_DEROG            1 9.5960e+07 1.0452e+10 13590
## - FLAG.Job.Sales       1 1.2538e+08 1.0482e+10 13592
## - FLAG.Job.Self        1 2.3628e+08 1.0593e+10 13601
## - FLAG.Reason.DebtCon  1 2.4112e+08 1.0597e+10 13602
## - IMP_VALUE            1 2.4391e+08 1.0600e+10 13602
## - M_CLNO               1 2.5435e+08 1.0611e+10 13602
## - IMP_YOJ              1 4.3694e+08 1.0793e+10 13617
## - M_CLAGE              1 4.6531e+08 1.0822e+10 13619
## - IMP_DEBTINC          1 1.1604e+09 1.1517e+10 13670
## - IMP_DELINQ           1 1.6242e+09 1.1980e+10 13703
## - IMP_CLAGE            1 2.0759e+09 1.2432e+10 13734
## - IMP_CLNO             1 3.2580e+09 1.3614e+10 13809
## - M_DEBTINC            1 5.8692e+09 1.6225e+10 13955
## - LOAN                 1 5.1573e+10 6.1929e+10 15065
summary(LR_Loss)
## 
## Call:
## lm(formula = TARGET_LOSS_AMT ~ LOAN + IMP_MORTDUE + M_MORTDUE + 
##     IMP_VALUE + IMP_YOJ + IMP_DEROG + M_DEROG + IMP_DELINQ + 
##     IMP_CLAGE + M_CLAGE + IMP_CLNO + M_CLNO + IMP_DEBTINC + M_DEBTINC + 
##     FLAG.Job.Sales + FLAG.Job.Self + FLAG.Reason.DebtCon + FLAG.Reason.HomeImp, 
##     data = train_subset)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22411.0  -1149.3    121.5   1711.2  13821.3 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -1.202e+04  9.388e+02 -12.804  < 2e-16 ***
## LOAN                 8.057e-01  1.269e-02  63.511  < 2e-16 ***
## IMP_MORTDUE          9.825e-03  3.804e-03   2.583  0.00997 ** 
## M_MORTDUE           -1.224e+03  4.789e+02  -2.556  0.01077 *  
## IMP_VALUE           -1.173e-02  2.686e-03  -4.368 1.42e-05 ***
## IMP_YOJ             -1.094e+02  1.871e+01  -5.846 7.30e-09 ***
## IMP_DEROG            2.377e+02  8.677e+01   2.740  0.00629 ** 
## M_DEROG              1.159e+03  7.444e+02   1.557  0.11985    
## IMP_DELINQ           7.942e+02  7.046e+01  11.271  < 2e-16 ***
## IMP_CLAGE           -2.130e+01  1.672e+00 -12.742  < 2e-16 ***
## M_CLAGE             -5.753e+03  9.536e+02  -6.033 2.45e-09 ***
## IMP_CLNO             2.032e+02  1.273e+01  15.963  < 2e-16 ***
## M_CLNO               6.065e+03  1.360e+03   4.460 9.34e-06 ***
## IMP_DEBTINC          1.342e+02  1.409e+01   9.527  < 2e-16 ***
## M_DEBTINC            5.868e+03  2.739e+02  21.425  < 2e-16 ***
## FLAG.Job.Sales       2.567e+03  8.196e+02   3.131  0.00180 ** 
## FLAG.Job.Self        2.505e+03  5.826e+02   4.299 1.93e-05 ***
## FLAG.Reason.DebtCon  2.718e+03  6.260e+02   4.343 1.59e-05 ***
## FLAG.Reason.HomeImp  1.339e+03  6.522e+02   2.053  0.04043 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3576 on 810 degrees of freedom
## Multiple R-squared:  0.9033, Adjusted R-squared:  0.9011 
## F-statistic: 420.1 on 18 and 810 DF,  p-value: < 2.2e-16
# Predictions
P_Badflag = predict(LR_flag, test)
P_Loss = predict(LR_Loss, test)

# Calculate RMSE for individual predictions
RMSE_Badflag = sqrt(mean((test$TARGET_BAD_FLAG - P_Badflag)^2))
RMSE_Loss = sqrt(mean((test$TARGET_LOSS_AMT - P_Loss)^2))

print(paste("Bad Flag Prediction RMSE =", RMSE_Badflag))
## [1] "Bad Flag Prediction RMSE = 3.29460273953523"
print(paste("Loss Prediction RMSE =", RMSE_Loss))
## [1] "Loss Prediction RMSE = 12227.0972186223"
# Multiply predictions
P_Multiply = P_Badflag * P_Loss

# Calculate RMSE for multiplied predictions
RMSE_Multiply = sqrt(mean((test$TARGET_LOSS_AMT - P_Multiply)^2))
print(paste("Regression Method Multiplied RMSE =", RMSE_Multiply))
## [1] "Regression Method Multiplied RMSE = 45142.2425164878"
# !!! SUMMARY & ANALYSIS !!!
# Loss Prediction RMSE is way too large indicating large error
# Recommending using regular regression method for simplicity and accuracy