Lending Club Loans Data Analysis: Complete Analysis Code

Programming in R: Independent Study

J Herdmann

Winter 2013

R Session Information

sessionInfo()
## R version 2.15.1 (2012-06-22)
## Platform: x86_64-apple-darwin9.8.0/x86_64 (64-bit)
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] knitr_1.1
## 
## loaded via a namespace (and not attached):
## [1] digest_0.6.3   evaluate_0.4.3 formatR_0.7    stringr_0.6.2 
## [5] tools_2.15.1

Packages

library(gbm)
## Loading required package: survival
## Loading required package: splines
## Loading required package: lattice
## Loaded gbm 2.0-8
library(stringr)
library(Hmisc)
## Hmisc library by Frank E Harrell Jr
## 
## Type library(help='Hmisc'), ?Overview, or ?Hmisc.Overview') to see overall
## documentation.
## 
## NOTE:Hmisc no longer redefines [.factor to drop unused levels when
## subsetting.  To get the old behavior of Hmisc type dropUnusedLevels().
## Attaching package: 'Hmisc'
## The following object(s) are masked from 'package:survival':
## 
## untangle.specials
## The following object(s) are masked from 'package:base':
## 
## format.pval, round.POSIXt, trunc.POSIXt, units

Analysis Setup and Raw Data

Setting Directory and Loading Data

getwd()
## [1] "/Users/user/Desktop/loansData"
setwd("/Users/user/Desktop/loansData")
load("~/Desktop/loansData/loansData.rda")

Data Preview

x <- loansData
dim(x)
## [1] 2500   14

Dimensions

names(x)
##  [1] "Amount.Requested"               "Amount.Funded.By.Investors"    
##  [3] "Interest.Rate"                  "Loan.Length"                   
##  [5] "Loan.Purpose"                   "Debt.To.Income.Ratio"          
##  [7] "State"                          "Home.Ownership"                
##  [9] "Monthly.Income"                 "FICO.Range"                    
## [11] "Open.CREDIT.Lines"              "Revolving.CREDIT.Balance"      
## [13] "Inquiries.in.the.Last.6.Months" "Employment.Length"

Variable Names

str(x)
## 'data.frame':    2500 obs. of  14 variables:
##  $ Amount.Requested              : int  20000 19200 35000 10000 12000 6000 10000 33500 14675 7000 ...
##  $ Amount.Funded.By.Investors    : num  20000 19200 35000 9975 12000 ...
##  $ Interest.Rate                 : Factor w/ 430 levels "","10.00%","10.01%",..: 409 60 335 430 49 166 398 220 130 379 ...
##  $ Loan.Length                   : Factor w/ 3 levels "","36 months",..: 2 2 3 2 2 2 2 3 2 2 ...
##  $ Loan.Purpose                  : Factor w/ 15 levels "","car","credit_card",..: 4 4 4 4 3 11 4 3 3 3 ...
##  $ Debt.To.Income.Ratio          : Factor w/ 3500 levels "","0.01%","0.02%",..: 690 2136 1681 630 1078 1305 1909 670 1992 3210 ...
##  $ State                         : Factor w/ 51 levels "","AK","AL","AR",..: 41 44 6 18 32 8 21 20 6 6 ...
##  $ Home.Ownership                : Factor w/ 6 levels "","MORTGAGE",..: 2 2 2 2 6 5 6 2 6 6 ...
##  $ Monthly.Income                : num  6542 4583 11500 3833 3195 ...
##  $ FICO.Range                    : Factor w/ 43 levels "","640-644","645-649",..: 21 17 12 13 13 8 18 15 11 17 ...
##  $ Open.CREDIT.Lines             : int  14 12 14 10 11 17 10 12 9 8 ...
##  $ Revolving.CREDIT.Balance      : int  14272 11140 21977 9346 14469 10391 15957 27874 7246 7612 ...
##  $ Inquiries.in.the.Last.6.Months: int  2 1 1 0 0 2 0 0 1 0 ...
##  $ Employment.Length             : Factor w/ 13 levels "","< 1 year",..: 2 5 5 8 12 6 4 4 11 6 ...
head(x)
##       Amount.Requested Amount.Funded.By.Investors Interest.Rate
## 81174            20000                      20000         8.90%
## 99592            19200                      19200        12.12%
## 80059            35000                      35000        21.98%
## 15825            10000                       9975         9.99%
## 33182            12000                      12000        11.71%
## 62403             6000                       6000        15.31%
##       Loan.Length       Loan.Purpose Debt.To.Income.Ratio State
## 81174   36 months debt_consolidation               14.90%    SC
## 99592   36 months debt_consolidation               28.36%    TX
## 80059   60 months debt_consolidation               23.81%    CA
## 15825   36 months debt_consolidation               14.30%    KS
## 33182   36 months        credit_card               18.78%    NJ
## 62403   36 months              other               20.05%    CT
##       Home.Ownership Monthly.Income FICO.Range Open.CREDIT.Lines
## 81174       MORTGAGE           6542    735-739                14
## 99592       MORTGAGE           4583    715-719                12
## 80059       MORTGAGE          11500    690-694                14
## 15825       MORTGAGE           3833    695-699                10
## 33182           RENT           3195    695-699                11
## 62403            OWN           4892    670-674                17
##       Revolving.CREDIT.Balance Inquiries.in.the.Last.6.Months
## 81174                    14272                              2
## 99592                    11140                              1
## 80059                    21977                              1
## 15825                     9346                              0
## 33182                    14469                              0
## 62403                    10391                              2
##       Employment.Length
## 81174          < 1 year
## 99592           2 years
## 80059           2 years
## 15825           5 years
## 33182           9 years
## 62403           3 years

Summarized Previews of Data

sapply(x[1, ], class)
##               Amount.Requested     Amount.Funded.By.Investors 
##                      "integer"                      "numeric" 
##                  Interest.Rate                    Loan.Length 
##                       "factor"                       "factor" 
##                   Loan.Purpose           Debt.To.Income.Ratio 
##                       "factor"                       "factor" 
##                          State                 Home.Ownership 
##                       "factor"                       "factor" 
##                 Monthly.Income                     FICO.Range 
##                      "numeric"                       "factor" 
##              Open.CREDIT.Lines       Revolving.CREDIT.Balance 
##                      "integer"                      "integer" 
## Inquiries.in.the.Last.6.Months              Employment.Length 
##                      "integer"                       "factor"

Specifies Variable's Data Classes

Data Transformations For Analysis

loansData$Debt.To.Income.Ratio <- gsub("%", "", loansData$Debt.To.Income.Ratio)
loansData$Debt.To.Income.Ratio <- as.numeric(loansData$Debt.To.Income.Ratio)

Drops the “%” character and converts the observations from “factors” to “numeric”

loansData$Interest.Rate <- gsub("%", "", loansData$Interest.Rate)
loansData$Interest.Rate <- as.numeric(loansData$Interest.Rate)

Drops the “%” character and converts the observations from “factors” to “numeric”

sum(is.na(loansData))
## [1] 7
names(loansData[, !complete.cases(t(loansData))])
## [1] "Monthly.Income"                 "Open.CREDIT.Lines"             
## [3] "Revolving.CREDIT.Balance"       "Inquiries.in.the.Last.6.Months"

Checks for missing values (NA's) and identifies how many there are and the names of the columns they are found in

loansData$Monthly.Income[is.na(loansData$Monthly.Income)] <- mean(loansData$Monthly.Income, 
    na.rm = TRUE)
loansData$Open.CREDIT.Lines[is.na(loansData$Open.CREDIT.Lines)] <- mean(loansData$Open.CREDIT.Lines, 
    na.rm = TRUE)
loansData$Revolving.CREDIT.Balance[is.na(loansData$Revolving.CREDIT.Balance)] <- mean(loansData$Revolving.CREDIT.Balance, 
    na.rm = TRUE)
loansData$Inquiries.in.the.Last.6.Months[is.na(loansData$Inquiries.in.the.Last.6.Months)] <- mean(loansData$Inquiries.in.the.Last.6.Months, 
    na.rm = TRUE)

Replaces the NA's with mean values of their corresponding columns

sum(is.na(loansData))
## [1] 0

No missing values remain

SplitFICO <- data.frame(str_split_fixed(loansData$FICO.Range, "-", 2))
SplitFICO$X1 <- as.numeric(as.character(SplitFICO$X1))
SplitFICO$X2 <- as.numeric(as.character(SplitFICO$X2))
FICO.Mean <- rowMeans(SplitFICO)
loansData <- data.frame(loansData, FICO.Mean)

To use FICO score data for analysis the range was split and an average value was calculated for each

loansData$FICO.Range <- NULL
loansData$Amount.Funded.By.Investors <- NULL

Removes the 'FICO.Range' column and 'Amount.Funded.By.Investors' since this will have no affect on Interest Rates

Exploratory Analysis

Training and Testing Data Sets

positions <- sample(nrow(loansData), size = floor((nrow(loansData)/4) * 3))
training <- loansData[positions, ]
testing <- loansData[positions, ]

Creates a train and test data set that has been scaled down for simplicity

actualValues <- testing$Interest.Rate
testing$Interest.Rate <- NULL
gbmMod <- gbm(Interest.Rate ~ ., training, n.trees = 1000, shrinkage = 0.01, 
    distribution = "gaussian", interaction.depth = 7, bag.fraction = 0.9, cv.fold = 5, 
    n.minobsinnode = 50)
## CV: 1 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1       17.7221         16.0974     0.0100    0.2785
##      2       17.4737         15.8915     0.0100    0.2429
##      3       17.2291         15.6899     0.0100    0.2310
##      4       16.9929         15.4908     0.0100    0.2296
##      5       16.7619         15.2850     0.0100    0.2176
##      6       16.5339         15.0974     0.0100    0.2089
##      7       16.3111         14.9128     0.0100    0.2279
##      8       16.0898         14.7274     0.0100    0.2823
##      9       15.8756         14.5395     0.0100    0.2115
##     10       15.6646         14.3663     0.0100    0.2216
##     20       13.7544         12.7711     0.0100    0.1845
##     40       10.8274         10.2924     0.0100    0.0927
##     60        8.7604          8.5596     0.0100    0.0816
##     80        7.2983          7.3134     0.0100    0.0521
##    100        6.2425          6.4290     0.0100    0.0362
##    120        5.4682          5.7541     0.0100    0.0313
##    140        4.8854          5.2471     0.0100    0.0201
##    160        4.4300          4.8594     0.0100    0.0211
##    180        4.0692          4.5592     0.0100    0.0075
##    200        3.7802          4.3258     0.0100    0.0125
##    220        3.5452          4.1443     0.0100    0.0092
##    240        3.3568          3.9979     0.0100    0.0007
##    260        3.1952          3.8799     0.0100    0.0019
##    280        3.0623          3.7890     0.0100    0.0026
##    300        2.9428          3.7135     0.0100    0.0023
##    320        2.8416          3.6513     0.0100   -0.0002
##    340        2.7533          3.6073     0.0100   -0.0014
##    360        2.6737          3.5700     0.0100    0.0006
##    380        2.5999          3.5409     0.0100   -0.0016
##    400        2.5344          3.5227     0.0100    0.0003
##    420        2.4760          3.5071     0.0100   -0.0013
##    440        2.4230          3.4946     0.0100   -0.0014
##    460        2.3742          3.4855     0.0100   -0.0020
##    480        2.3287          3.4762     0.0100   -0.0027
##    500        2.2845          3.4742     0.0100   -0.0027
##    520        2.2471          3.4739     0.0100   -0.0007
##    540        2.2110          3.4734     0.0100    0.0003
##    560        2.1753          3.4718     0.0100   -0.0013
##    580        2.1421          3.4776     0.0100   -0.0004
##    600        2.1099          3.4792     0.0100   -0.0022
##    620        2.0816          3.4797     0.0100   -0.0014
##    640        2.0544          3.4806     0.0100   -0.0005
##    660        2.0259          3.4816     0.0100   -0.0031
##    680        2.0001          3.4804     0.0100   -0.0009
##    700        1.9744          3.4846     0.0100   -0.0010
##    720        1.9506          3.4917     0.0100   -0.0006
##    740        1.9261          3.4952     0.0100   -0.0006
##    760        1.9060          3.4978     0.0100   -0.0006
##    780        1.8850          3.5004     0.0100   -0.0024
##    800        1.8638          3.5042     0.0100    0.0001
##    820        1.8442          3.5122     0.0100   -0.0010
##    840        1.8258          3.5122     0.0100   -0.0010
##    860        1.8081          3.5139     0.0100   -0.0019
##    880        1.7901          3.5200     0.0100   -0.0011
##    900        1.7722          3.5254     0.0100   -0.0020
##    920        1.7549          3.5281     0.0100   -0.0011
##    940        1.7370          3.5324     0.0100   -0.0018
##    960        1.7212          3.5395     0.0100   -0.0007
##    980        1.7051          3.5450     0.0100   -0.0019
##   1000        1.6890          3.5500     0.0100   -0.0010
## 
## CV: 2 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1       17.2418         17.9806     0.0100    0.2671
##      2       16.9979         17.7297     0.0100    0.2053
##      3       16.7606         17.4829     0.0100    0.2253
##      4       16.5304         17.2436     0.0100    0.2171
##      5       16.3036         17.0082     0.0100    0.2033
##      6       16.0797         16.7755     0.0100    0.2208
##      7       15.8620         16.5411     0.0100    0.1948
##      8       15.6478         16.3191     0.0100    0.2015
##      9       15.4353         16.1018     0.0100    0.2151
##     10       15.2294         15.8898     0.0100    0.2136
##     20       13.3673         13.9710     0.0100    0.1658
##     40       10.5388         11.1184     0.0100    0.1329
##     60        8.5504          9.1511     0.0100    0.0749
##     80        7.1352          7.7615     0.0100    0.0408
##    100        6.1209          6.7520     0.0100    0.0366
##    120        5.3661          6.0265     0.0100    0.0225
##    140        4.7922          5.4937     0.0100    0.0177
##    160        4.3561          5.1158     0.0100    0.0174
##    180        3.9960          4.7724     0.0100    0.0133
##    200        3.7141          4.5249     0.0100    0.0058
##    220        3.4820          4.3401     0.0100    0.0054
##    240        3.2998          4.2026     0.0100    0.0036
##    260        3.1414          4.0978     0.0100    0.0033
##    280        3.0053          3.9990     0.0100   -0.0006
##    300        2.8920          3.9215     0.0100   -0.0011
##    320        2.7975          3.8567     0.0100   -0.0003
##    340        2.7155          3.8111     0.0100    0.0006
##    360        2.6433          3.7762     0.0100    0.0002
##    380        2.5778          3.7346     0.0100   -0.0014
##    400        2.5151          3.7034     0.0100   -0.0025
##    420        2.4612          3.6866     0.0100   -0.0009
##    440        2.4111          3.6616     0.0100   -0.0028
##    460        2.3646          3.6441     0.0100    0.0005
##    480        2.3224          3.6307     0.0100   -0.0030
##    500        2.2796          3.6183     0.0100   -0.0016
##    520        2.2417          3.6103     0.0100   -0.0018
##    540        2.2042          3.6003     0.0100   -0.0004
##    560        2.1690          3.5898     0.0100    0.0003
##    580        2.1373          3.5860     0.0100   -0.0002
##    600        2.1062          3.5823     0.0100   -0.0022
##    620        2.0760          3.5796     0.0100   -0.0002
##    640        2.0482          3.5736     0.0100   -0.0008
##    660        2.0227          3.5723     0.0100   -0.0001
##    680        1.9979          3.5705     0.0100   -0.0005
##    700        1.9737          3.5634     0.0100   -0.0012
##    720        1.9514          3.5610     0.0100   -0.0006
##    740        1.9305          3.5600     0.0100   -0.0014
##    760        1.9095          3.5601     0.0100   -0.0017
##    780        1.8889          3.5611     0.0100   -0.0010
##    800        1.8709          3.5588     0.0100   -0.0020
##    820        1.8505          3.5592     0.0100   -0.0011
##    840        1.8290          3.5597     0.0100   -0.0023
##    860        1.8121          3.5617     0.0100   -0.0012
##    880        1.7934          3.5604     0.0100   -0.0020
##    900        1.7748          3.5625     0.0100   -0.0011
##    920        1.7558          3.5617     0.0100   -0.0025
##    940        1.7389          3.5651     0.0100   -0.0010
##    960        1.7227          3.5647     0.0100   -0.0014
##    980        1.7065          3.5672     0.0100   -0.0020
##   1000        1.6908          3.5696     0.0100   -0.0009
## 
## CV: 3 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1       17.7269         16.0833     0.0100    0.2659
##      2       17.4825         15.8504     0.0100    0.2449
##      3       17.2443         15.6303     0.0100    0.2228
##      4       17.0094         15.4072     0.0100    0.2209
##      5       16.7800         15.1940     0.0100    0.2583
##      6       16.5555         14.9818     0.0100    0.2257
##      7       16.3350         14.7820     0.0100    0.1857
##      8       16.1204         14.5781     0.0100    0.1925
##      9       15.9100         14.3778     0.0100    0.2594
##     10       15.7001         14.1785     0.0100    0.1841
##     20       13.8029         12.4300     0.0100    0.1682
##     40       10.9111          9.8491     0.0100    0.1173
##     60        8.8683          8.0417     0.0100    0.1023
##     80        7.4091          6.7797     0.0100    0.0687
##    100        6.3498          5.8582     0.0100    0.0353
##    120        5.5677          5.2113     0.0100    0.0233
##    140        4.9736          4.7320     0.0100    0.0163
##    160        4.5015          4.3541     0.0100    0.0135
##    180        4.1405          4.0783     0.0100    0.0123
##    200        3.8553          3.8535     0.0100    0.0096
##    220        3.6248          3.6922     0.0100    0.0074
##    240        3.4320          3.5625     0.0100    0.0038
##    260        3.2691          3.4645     0.0100    0.0049
##    280        3.1307          3.3825     0.0100   -0.0000
##    300        3.0133          3.3270     0.0100    0.0047
##    320        2.9094          3.2834     0.0100    0.0009
##    340        2.8175          3.2524     0.0100   -0.0003
##    360        2.7316          3.2324     0.0100   -0.0008
##    380        2.6549          3.2136     0.0100    0.0000
##    400        2.5858          3.2038     0.0100   -0.0005
##    420        2.5258          3.1952     0.0100   -0.0022
##    440        2.4698          3.1935     0.0100   -0.0009
##    460        2.4181          3.1890     0.0100   -0.0016
##    480        2.3696          3.1872     0.0100   -0.0002
##    500        2.3239          3.1870     0.0100   -0.0020
##    520        2.2840          3.1882     0.0100   -0.0022
##    540        2.2454          3.1889     0.0100   -0.0002
##    560        2.2103          3.1982     0.0100   -0.0007
##    580        2.1754          3.2027     0.0100   -0.0007
##    600        2.1423          3.2075     0.0100   -0.0018
##    620        2.1119          3.2143     0.0100   -0.0008
##    640        2.0838          3.2174     0.0100   -0.0003
##    660        2.0554          3.2197     0.0100   -0.0018
##    680        2.0282          3.2245     0.0100   -0.0012
##    700        2.0030          3.2345     0.0100   -0.0020
##    720        1.9791          3.2390     0.0100   -0.0015
##    740        1.9556          3.2469     0.0100   -0.0009
##    760        1.9330          3.2551     0.0100   -0.0007
##    780        1.9122          3.2618     0.0100   -0.0024
##    800        1.8903          3.2682     0.0100   -0.0014
##    820        1.8699          3.2703     0.0100   -0.0012
##    840        1.8492          3.2774     0.0100   -0.0007
##    860        1.8308          3.2824     0.0100   -0.0008
##    880        1.8110          3.2863     0.0100   -0.0004
##    900        1.7921          3.2892     0.0100   -0.0016
##    920        1.7753          3.2959     0.0100   -0.0018
##    940        1.7579          3.3029     0.0100   -0.0010
##    960        1.7403          3.3061     0.0100   -0.0012
##    980        1.7241          3.3140     0.0100   -0.0016
##   1000        1.7086          3.3167     0.0100   -0.0009
## 
## CV: 4 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1       17.2978         17.7847     0.0100    0.1887
##      2       17.0576         17.5617     0.0100    0.2381
##      3       16.8226         17.3350     0.0100    0.2540
##      4       16.5919         17.1128     0.0100    0.2229
##      5       16.3646         16.8928     0.0100    0.2646
##      6       16.1405         16.6762     0.0100    0.2331
##      7       15.9195         16.4695     0.0100    0.1889
##      8       15.7069         16.2706     0.0100    0.2025
##      9       15.4970         16.0639     0.0100    0.1941
##     10       15.2908         15.8654     0.0100    0.2367
##     20       13.4306         14.1062     0.0100    0.2035
##     40       10.6036         11.3857     0.0100    0.1152
##     60        8.6231          9.4207     0.0100    0.0815
##     80        7.2183          7.9651     0.0100    0.0316
##    100        6.2000          6.8955     0.0100    0.0421
##    120        5.4426          6.0906     0.0100    0.0282
##    140        4.8678          5.4978     0.0100    0.0269
##    160        4.4244          5.0563     0.0100    0.0105
##    180        4.0803          4.7079     0.0100    0.0093
##    200        3.7994          4.4269     0.0100    0.0065
##    220        3.5694          4.2135     0.0100    0.0066
##    240        3.3789          4.0478     0.0100    0.0048
##    260        3.2252          3.9152     0.0100    0.0056
##    280        3.0885          3.8004     0.0100    0.0013
##    300        2.9710          3.7135     0.0100   -0.0008
##    320        2.8712          3.6552     0.0100    0.0001
##    340        2.7858          3.6046     0.0100    0.0008
##    360        2.7080          3.5758     0.0100    0.0024
##    380        2.6365          3.5502     0.0100   -0.0029
##    400        2.5674          3.5229     0.0100    0.0016
##    420        2.5072          3.5058     0.0100   -0.0035
##    440        2.4521          3.4920     0.0100    0.0007
##    460        2.4030          3.4812     0.0100   -0.0010
##    480        2.3558          3.4722     0.0100   -0.0017
##    500        2.3121          3.4702     0.0100   -0.0027
##    520        2.2717          3.4698     0.0100   -0.0018
##    540        2.2344          3.4597     0.0100   -0.0013
##    560        2.1979          3.4581     0.0100   -0.0006
##    580        2.1631          3.4549     0.0100   -0.0008
##    600        2.1321          3.4533     0.0100   -0.0005
##    620        2.1023          3.4517     0.0100   -0.0027
##    640        2.0745          3.4502     0.0100   -0.0021
##    660        2.0463          3.4547     0.0100   -0.0006
##    680        2.0199          3.4540     0.0100   -0.0020
##    700        1.9940          3.4589     0.0100   -0.0026
##    720        1.9694          3.4601     0.0100   -0.0027
##    740        1.9465          3.4652     0.0100   -0.0007
##    760        1.9238          3.4640     0.0100   -0.0010
##    780        1.9027          3.4625     0.0100   -0.0017
##    800        1.8822          3.4699     0.0100   -0.0010
##    820        1.8611          3.4740     0.0100   -0.0005
##    840        1.8404          3.4757     0.0100   -0.0011
##    860        1.8216          3.4787     0.0100   -0.0006
##    880        1.8044          3.4810     0.0100   -0.0016
##    900        1.7859          3.4866     0.0100   -0.0016
##    920        1.7693          3.4910     0.0100   -0.0008
##    940        1.7522          3.4941     0.0100   -0.0023
##    960        1.7357          3.4962     0.0100   -0.0017
##    980        1.7201          3.4934     0.0100   -0.0009
##   1000        1.7036          3.4944     0.0100   -0.0006
## 
## CV: 5 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1       16.9606         19.1592     0.0100    0.2492
##      2       16.7328         18.9087     0.0100    0.2254
##      3       16.5085         18.6607     0.0100    0.1917
##      4       16.2906         18.4106     0.0100    0.2723
##      5       16.0758         18.1666     0.0100    0.2025
##      6       15.8650         17.9309     0.0100    0.2454
##      7       15.6561         17.6980     0.0100    0.2308
##      8       15.4528         17.4671     0.0100    0.2034
##      9       15.2542         17.2468     0.0100    0.2040
##     10       15.0570         17.0252     0.0100    0.1962
##     20       13.2830         14.9910     0.0100    0.1526
##     40       10.5647         11.8655     0.0100    0.0872
##     60        8.6469          9.6551     0.0100    0.0773
##     80        7.2685          8.0642     0.0100    0.0484
##    100        6.2652          6.9085     0.0100    0.0340
##    120        5.5237          6.0475     0.0100    0.0281
##    140        4.9512          5.4200     0.0100    0.0134
##    160        4.5120          4.9377     0.0100    0.0118
##    180        4.1610          4.5496     0.0100    0.0051
##    200        3.8828          4.2561     0.0100    0.0061
##    220        3.6521          4.0204     0.0100    0.0069
##    240        3.4661          3.8587     0.0100    0.0040
##    260        3.3049          3.7167     0.0100    0.0018
##    280        3.1641          3.6004     0.0100    0.0024
##    300        3.0419          3.5115     0.0100   -0.0004
##    320        2.9388          3.4419     0.0100   -0.0020
##    340        2.8488          3.3818     0.0100   -0.0010
##    360        2.7678          3.3314     0.0100    0.0007
##    380        2.6962          3.2822     0.0100   -0.0030
##    400        2.6285          3.2433     0.0100    0.0009
##    420        2.5677          3.2164     0.0100   -0.0013
##    440        2.5081          3.1885     0.0100   -0.0011
##    460        2.4576          3.1626     0.0100   -0.0030
##    480        2.4106          3.1417     0.0100    0.0009
##    500        2.3656          3.1184     0.0100   -0.0024
##    520        2.3223          3.0999     0.0100   -0.0007
##    540        2.2818          3.0874     0.0100   -0.0020
##    560        2.2450          3.0762     0.0100   -0.0016
##    580        2.2088          3.0650     0.0100   -0.0011
##    600        2.1762          3.0571     0.0100   -0.0006
##    620        2.1432          3.0447     0.0100   -0.0020
##    640        2.1103          3.0364     0.0100    0.0001
##    660        2.0815          3.0352     0.0100   -0.0013
##    680        2.0528          3.0358     0.0100   -0.0005
##    700        2.0265          3.0347     0.0100   -0.0005
##    720        2.0010          3.0374     0.0100   -0.0007
##    740        1.9762          3.0387     0.0100   -0.0021
##    760        1.9505          3.0375     0.0100   -0.0017
##    780        1.9263          3.0423     0.0100   -0.0005
##    800        1.9045          3.0440     0.0100   -0.0001
##    820        1.8827          3.0445     0.0100   -0.0012
##    840        1.8629          3.0411     0.0100   -0.0012
##    860        1.8412          3.0431     0.0100   -0.0008
##    880        1.8225          3.0467     0.0100   -0.0011
##    900        1.8033          3.0520     0.0100   -0.0019
##    920        1.7859          3.0554     0.0100   -0.0012
##    940        1.7681          3.0554     0.0100   -0.0014
##    960        1.7508          3.0592     0.0100   -0.0024
##    980        1.7347          3.0589     0.0100   -0.0002
##   1000        1.7173          3.0636     0.0100   -0.0021
## 
## Iter   TrainDeviance   ValidDeviance   StepSize   Improve
##      1       17.3923             nan     0.0100    0.2434
##      2       17.1551             nan     0.0100    0.2336
##      3       16.9214             nan     0.0100    0.2401
##      4       16.6918             nan     0.0100    0.2184
##      5       16.4656             nan     0.0100    0.2161
##      6       16.2442             nan     0.0100    0.2459
##      7       16.0272             nan     0.0100    0.2285
##      8       15.8154             nan     0.0100    0.2060
##      9       15.6041             nan     0.0100    0.1864
##     10       15.3997             nan     0.0100    0.1966
##     20       13.5472             nan     0.0100    0.1978
##     40       10.7152             nan     0.0100    0.1270
##     60        8.7159             nan     0.0100    0.0558
##     80        7.2870             nan     0.0100    0.0423
##    100        6.2562             nan     0.0100    0.0435
##    120        5.4929             nan     0.0100    0.0248
##    140        4.9199             nan     0.0100    0.0204
##    160        4.4792             nan     0.0100    0.0144
##    180        4.1366             nan     0.0100    0.0146
##    200        3.8537             nan     0.0100    0.0135
##    220        3.6344             nan     0.0100    0.0054
##    240        3.4514             nan     0.0100    0.0036
##    260        3.2988             nan     0.0100    0.0011
##    280        3.1712             nan     0.0100    0.0022
##    300        3.0563             nan     0.0100   -0.0009
##    320        2.9586             nan     0.0100   -0.0016
##    340        2.8697             nan     0.0100    0.0014
##    360        2.7945             nan     0.0100   -0.0006
##    380        2.7272             nan     0.0100   -0.0008
##    400        2.6654             nan     0.0100    0.0018
##    420        2.6104             nan     0.0100    0.0004
##    440        2.5596             nan     0.0100   -0.0011
##    460        2.5087             nan     0.0100   -0.0017
##    480        2.4627             nan     0.0100   -0.0017
##    500        2.4234             nan     0.0100   -0.0013
##    520        2.3850             nan     0.0100   -0.0004
##    540        2.3483             nan     0.0100   -0.0013
##    560        2.3137             nan     0.0100   -0.0007
##    580        2.2813             nan     0.0100   -0.0006
##    600        2.2515             nan     0.0100   -0.0011
##    620        2.2226             nan     0.0100   -0.0007
##    640        2.1935             nan     0.0100   -0.0015
##    660        2.1677             nan     0.0100   -0.0009
##    680        2.1420             nan     0.0100   -0.0009
##    700        2.1166             nan     0.0100   -0.0003
##    720        2.0946             nan     0.0100   -0.0009
##    740        2.0730             nan     0.0100   -0.0017
##    760        2.0514             nan     0.0100   -0.0007
##    780        2.0312             nan     0.0100   -0.0004
##    800        2.0109             nan     0.0100   -0.0010
##    820        1.9917             nan     0.0100    0.0003
##    840        1.9732             nan     0.0100   -0.0005
##    860        1.9565             nan     0.0100   -0.0018
##    880        1.9358             nan     0.0100   -0.0010
##    900        1.9203             nan     0.0100   -0.0003
##    920        1.9031             nan     0.0100   -0.0003
##    940        1.8879             nan     0.0100   -0.0016
##    960        1.8732             nan     0.0100   -0.0004
##    980        1.8562             nan     0.0100   -0.0010
##   1000        1.8401             nan     0.0100   -0.0018

Applies a gradient boost using a general boosted regression model (gbm) that is better fit to handle non-linear, multivatiative data

best.iter <- gbm.perf(gbmMod, method = "cv")

plot of chunk unnamed-chunk-18

Finds the best iteration

ListScores <- summary(gbmMod, best.iter)

plot of chunk unnamed-chunk-19

View of the summary of best iterations for importance of attributes

names(ListScores)[1] <- "attribute"
names(ListScores)[2] <- "importance"
ListScores
##                                                     attribute importance
## FICO.Mean                                           FICO.Mean   60.89937
## Loan.Length                                       Loan.Length   15.92223
## Amount.Requested                             Amount.Requested   12.02739
## State                                                   State    4.42039
## Open.CREDIT.Lines                           Open.CREDIT.Lines    2.12498
## Inquiries.in.the.Last.6.Months Inquiries.in.the.Last.6.Months    2.12495
## Employment.Length                           Employment.Length    0.87534
## Loan.Purpose                                     Loan.Purpose    0.65035
## Debt.To.Income.Ratio                     Debt.To.Income.Ratio    0.36767
## Monthly.Income                                 Monthly.Income    0.28913
## Revolving.CREDIT.Balance             Revolving.CREDIT.Balance    0.23217
## Home.Ownership                                 Home.Ownership    0.06604

Variable contribution table showing the contribution (%) to predicting interest rates. Based on this table, FICO Mean, Loan Length and Amount Requested are the major contributors and will be further analyzed.

result <- predict(gbmMod, testing, best.iter, type = "response")
rMSEpercent <- sqrt(mean((actualValues - result)^2))/mean(actualValues) * 100
rMSEpercent
## [1] 11.29

Performs a Mean Squared Error (MSE) calculation on training and test data sets of randomly sampled variables within the model. Using predicted interest rates and actual observed rates, the MSE was calculated and found to be approximately 11%. This fairly high margin of error may be attributed to the amount of data used in the test sampling in the training set, but because the three attributes showed such a significantly higher importance (combined was almost 90%) than the others, there should not be any major errors factored into the concluding analysis.

Final Analysis

Plotting Final Analysis

png(filename = "FinalPlot.png", width = 800, height = 800)
par(mfrow = c(2, 2), mar = c(3, 3, 2, 1), oma = c(0, 0, 3, 1))
interestCut = cut2(loansData$Interest.Rate, g = 7)
amtRequestedCut = cut2(loansData$Amount.Requested, g = 5)

Interest Rate Plot

plot(loansData$FICO.Mean, loansData$Interest.Rate, pch = 19, col = interestCut, 
    main = "Figure A: Color grouped by interest rate", ylab = "", xlab = "")
lmInterest <- lm(loansData$Interest.Rate ~ loansData$FICO.Mean)
abline(lmInterest, col = "blue", lwd = 3)
mtext("Interest Rate", side = 2, las = 1, line = -3, at = 1.05 * max(loansData$Interest.Rate))
mtext("FICO Mean", side = 1, line = 2, at = max(loansData$FICO.Mean))

plot of chunk unnamed-chunk-23

Displays the seven levels of interest rates defined by the Lending Club in colored grouping that shows that higher FICO scores correspond with lower interest rates, while lower FICO scores correspond with higher interest rates

Loan Length Plot

plot(loansData$FICO.Mean, loansData$Interest.Rate, pch = 19, col = loansData$Loan.Length, 
    , main = "Figure B: Color grouped by loan length", ylab = "", xlab = "")
lmLength <- lm(loansData$Interest.Rate ~ loansData$FICO.Mean * loansData$Loan.Length)
abline(c(lmLength$coeff[1] + lmLength$coeff[3], lmLength$coeff[2] + lmLength$coeff[4]), 
    col = "blue", lwd = 3)
mtext("Interest Rate", side = 2, las = 1, line = -3, at = 1.05 * max(loansData$Interest.Rate))
mtext("FICO Mean", side = 1, line = 2, at = max(loansData$FICO.Mean))

plot of chunk unnamed-chunk-24

Displays the two levels of loan length (green is 60 months and red is 36 months) versus FICO mean and interest rate. In general, those who apply for a longer loan length (green) will have a higher interest rate than those whose loan length is shorter (red).

Amount Requested Plot

plot(loansData$FICO.Mean, loansData$Interest.Rate, pch = 19, col = amtRequestedCut, 
    , main = "Figure C: Color grouped by amount requested", ylab = "", xlab = "")
lmAmt <- lm(loansData$Interest.Rate ~ loansData$FICO.Mean * loansData$Loan.Length * 
    loansData$Amount.Requested)
mtext("Interest Rate", side = 2, las = 1, line = -3, at = 1.05 * max(loansData$Interest.Rate))
mtext("FICO Mean", side = 1, line = 2, at = max(loansData$FICO.Mean))

plot of chunk unnamed-chunk-25

Displays the five groupings of amount requested with the highest amounts shown in light blue and the lowest amounts in black versus FICO mean and interest rate. Generally those who requested higher amounts did so at a higher interest rate than those who applied for lesser amounts.

Summary Statistics

summary(lmAmt)
## 
## Call:
## lm(formula = loansData$Interest.Rate ~ loansData$FICO.Mean * 
##     loansData$Loan.Length * loansData$Amount.Requested)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.433 -1.404 -0.122  1.222  9.963 
## 
## Coefficients:
##                                                                                Estimate
## (Intercept)                                                                    6.81e+01
## loansData$FICO.Mean                                                           -8.10e-02
## loansData$Loan.Length60 months                                                 3.15e+00
## loansData$Amount.Requested                                                     3.60e-04
## loansData$FICO.Mean:loansData$Loan.Length60 months                            -6.90e-04
## loansData$FICO.Mean:loansData$Amount.Requested                                -3.28e-07
## loansData$Loan.Length60 months:loansData$Amount.Requested                      4.79e-04
## loansData$FICO.Mean:loansData$Loan.Length60 months:loansData$Amount.Requested -6.22e-07
##                                                                               Std. Error
## (Intercept)                                                                     1.70e+00
## loansData$FICO.Mean                                                             2.40e-03
## loansData$Loan.Length60 months                                                  4.72e+00
## loansData$Amount.Requested                                                      1.31e-04
## loansData$FICO.Mean:loansData$Loan.Length60 months                              6.65e-03
## loansData$FICO.Mean:loansData$Amount.Requested                                  1.83e-07
## loansData$Loan.Length60 months:loansData$Amount.Requested                       2.64e-04
## loansData$FICO.Mean:loansData$Loan.Length60 months:loansData$Amount.Requested   3.71e-07
##                                                                               t value
## (Intercept)                                                                     40.03
## loansData$FICO.Mean                                                            -33.78
## loansData$Loan.Length60 months                                                   0.67
## loansData$Amount.Requested                                                       2.75
## loansData$FICO.Mean:loansData$Loan.Length60 months                              -0.10
## loansData$FICO.Mean:loansData$Amount.Requested                                  -1.79
## loansData$Loan.Length60 months:loansData$Amount.Requested                        1.82
## loansData$FICO.Mean:loansData$Loan.Length60 months:loansData$Amount.Requested   -1.68
##                                                                               Pr(>|t|)
## (Intercept)                                                                     <2e-16
## loansData$FICO.Mean                                                             <2e-16
## loansData$Loan.Length60 months                                                   0.505
## loansData$Amount.Requested                                                       0.006
## loansData$FICO.Mean:loansData$Loan.Length60 months                               0.917
## loansData$FICO.Mean:loansData$Amount.Requested                                   0.073
## loansData$Loan.Length60 months:loansData$Amount.Requested                        0.069
## loansData$FICO.Mean:loansData$Loan.Length60 months:loansData$Amount.Requested    0.093
##                                                                                  
## (Intercept)                                                                   ***
## loansData$FICO.Mean                                                           ***
## loansData$Loan.Length60 months                                                   
## loansData$Amount.Requested                                                    ** 
## loansData$FICO.Mean:loansData$Loan.Length60 months                               
## loansData$FICO.Mean:loansData$Amount.Requested                                .  
## loansData$Loan.Length60 months:loansData$Amount.Requested                     .  
## loansData$FICO.Mean:loansData$Loan.Length60 months:loansData$Amount.Requested .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 2.1 on 2492 degrees of freedom
## Multiple R-squared: 0.749,   Adjusted R-squared: 0.748 
## F-statistic: 1.06e+03 on 7 and 2492 DF,  p-value: <2e-16
anova(lm(FICO.Mean ~ ., loansData))
## Analysis of Variance Table
## 
## Response: FICO.Mean
##                                  Df  Sum Sq Mean Sq F value  Pr(>F)    
## Amount.Requested                  1   20858   20858   57.60 4.6e-14 ***
## Interest.Rate                     1 1870365 1870365 5164.89 < 2e-16 ***
## Loan.Length                       1  193158  193158  533.39 < 2e-16 ***
## Loan.Purpose                     13   53168    4090   11.29 < 2e-16 ***
## Debt.To.Income.Ratio              1   13269   13269   36.64 1.6e-09 ***
## State                            45   17890     398    1.10  0.3040    
## Home.Ownership                    4    4689    1172    3.24  0.0117 *  
## Monthly.Income                    1    1237    1237    3.42  0.0647 .  
## Open.CREDIT.Lines                 1    3793    3793   10.47  0.0012 ** 
## Revolving.CREDIT.Balance          1     341     341    0.94  0.3320    
## Inquiries.in.the.Last.6.Months    1   10458   10458   28.88 8.4e-08 ***
## Employment.Length                11    2221     202    0.56  0.8641    
## Residuals                      2418  875632     362                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(lm(Interest.Rate ~ ., loansData))
## Analysis of Variance Table
## 
## Response: Interest.Rate
##                                  Df Sum Sq Mean Sq F value  Pr(>F)    
## Amount.Requested                  1   4819    4819 1160.28 < 2e-16 ***
## Loan.Length                       1   4318    4318 1039.71 < 2e-16 ***
## Loan.Purpose                     13   1338     103   24.78 < 2e-16 ***
## Debt.To.Income.Ratio              1    650     650  156.59 < 2e-16 ***
## State                            45    705      16    3.77 1.1e-15 ***
## Home.Ownership                    4    559     140   33.66 < 2e-16 ***
## Monthly.Income                    1     45      45   10.81   0.001 ** 
## Open.CREDIT.Lines                 1      2       2    0.50   0.480    
## Revolving.CREDIT.Balance          1      8       8    1.98   0.160    
## Inquiries.in.the.Last.6.Months    1   1396    1396  336.10 < 2e-16 ***
## Employment.Length                11    107      10    2.35   0.007 ** 
## FICO.Mean                         1  19636   19636 4727.84 < 2e-16 ***
## Residuals                      2418  10042       4                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1