Loans Analysis


Download data

download.file("https://spark-public.s3.amazonaws.com/dataanalysis/loansData.csv", 
    destfile = "./loansData.csv", method = "curl")

Save data

loansData <- read.csv("./loansData.csv")

Save Workspace

save(list = ls(all = T), file = "loansData.rda")

Download CodeBook

download.file("https://spark-public.s3.amazonaws.com/dataanalysis/loansCodebook.pdf", 
    destfile = "./loansCodebook.pdf", method = "curl")

Load libraries

library(car)
## Loading required package: MASS
## Loading required package: nnet
library(stats)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 2.15.2

Processing

Data already converted to .csv file. Entered into Workspace.

Exploratory Analysis

names(loansData)
##  [1] "Amount.Requested"               "Amount.Funded.By.Investors"    
##  [3] "Interest.Rate"                  "Loan.Length"                   
##  [5] "Loan.Purpose"                   "Debt.To.Income.Ratio"          
##  [7] "State"                          "Home.Ownership"                
##  [9] "Monthly.Income"                 "FICO.Range"                    
## [11] "Open.CREDIT.Lines"              "Revolving.CREDIT.Balance"      
## [13] "Inquiries.in.the.Last.6.Months" "Employment.Length"
head(loansData)
##       Amount.Requested Amount.Funded.By.Investors Interest.Rate
## 81174            20000                      20000         8.90%
## 99592            19200                      19200        12.12%
## 80059            35000                      35000        21.98%
## 15825            10000                       9975         9.99%
## 33182            12000                      12000        11.71%
## 62403             6000                       6000        15.31%
##       Loan.Length       Loan.Purpose Debt.To.Income.Ratio State
## 81174   36 months debt_consolidation               14.90%    SC
## 99592   36 months debt_consolidation               28.36%    TX
## 80059   60 months debt_consolidation               23.81%    CA
## 15825   36 months debt_consolidation               14.30%    KS
## 33182   36 months        credit_card               18.78%    NJ
## 62403   36 months              other               20.05%    CT
##       Home.Ownership Monthly.Income FICO.Range Open.CREDIT.Lines
## 81174       MORTGAGE           6542    735-739                14
## 99592       MORTGAGE           4583    715-719                12
## 80059       MORTGAGE          11500    690-694                14
## 15825       MORTGAGE           3833    695-699                10
## 33182           RENT           3195    695-699                11
## 62403            OWN           4892    670-674                17
##       Revolving.CREDIT.Balance Inquiries.in.the.Last.6.Months
## 81174                    14272                              2
## 99592                    11140                              1
## 80059                    21977                              1
## 15825                     9346                              0
## 33182                    14469                              0
## 62403                    10391                              2
##       Employment.Length
## 81174          < 1 year
## 99592           2 years
## 80059           2 years
## 15825           5 years
## 33182           9 years
## 62403           3 years
summary(loansData)
##  Amount.Requested Amount.Funded.By.Investors Interest.Rate 
##  Min.   : 1000    Min.   :    0              12.12% : 122  
##  1st Qu.: 6000    1st Qu.: 6000              7.90%  : 119  
##  Median :10000    Median :10000              13.11% : 115  
##  Mean   :12406    Mean   :12002              15.31% :  76  
##  3rd Qu.:17000    3rd Qu.:16000              14.09% :  72  
##  Max.   :35000    Max.   :35000              14.33% :  69  
##                                              (Other):1927  
##     Loan.Length               Loan.Purpose  Debt.To.Income.Ratio
##  36 months:1952   debt_consolidation:1307   0%     :   8        
##  60 months: 548   credit_card       : 444   12.54% :   6        
##                   other             : 201   12.20% :   5        
##                   home_improvement  : 152   12.85% :   5        
##                   major_purchase    : 101   14.22% :   5        
##                   small_business    :  87   14.66% :   5        
##                   (Other)           : 208   (Other):2466        
##      State       Home.Ownership Monthly.Income     FICO.Range  
##  CA     : 433   MORTGAGE:1148   Min.   :   588   670-674: 171  
##  NY     : 255   NONE    :   1   1st Qu.:  3500   675-679: 166  
##  TX     : 174   OTHER   :   5   Median :  5000   680-684: 157  
##  FL     : 169   OWN     : 200   Mean   :  5689   695-699: 153  
##  IL     : 101   RENT    :1146   3rd Qu.:  6800   665-669: 145  
##  GA     :  98                   Max.   :102750   690-694: 140  
##  (Other):1270                   NA's   :1        (Other):1568  
##  Open.CREDIT.Lines Revolving.CREDIT.Balance Inquiries.in.the.Last.6.Months
##  Min.   : 2.0      Min.   :     0           Min.   :0.000                 
##  1st Qu.: 7.0      1st Qu.:  5586           1st Qu.:0.000                 
##  Median : 9.0      Median : 10962           Median :0.000                 
##  Mean   :10.1      Mean   : 15245           Mean   :0.906                 
##  3rd Qu.:13.0      3rd Qu.: 18889           3rd Qu.:1.000                 
##  Max.   :38.0      Max.   :270800           Max.   :9.000                 
##  NA's   :2         NA's   :2                NA's   :2                     
##  Employment.Length
##  10+ years:653    
##  < 1 year :250    
##  2 years  :244    
##  3 years  :235    
##  5 years  :202    
##  4 years  :192    
##  (Other)  :724
sapply(loansData[1, ], class)
##               Amount.Requested     Amount.Funded.By.Investors 
##                      "integer"                      "numeric" 
##                  Interest.Rate                    Loan.Length 
##                       "factor"                       "factor" 
##                   Loan.Purpose           Debt.To.Income.Ratio 
##                       "factor"                       "factor" 
##                          State                 Home.Ownership 
##                       "factor"                       "factor" 
##                 Monthly.Income                     FICO.Range 
##                      "numeric"                       "factor" 
##              Open.CREDIT.Lines       Revolving.CREDIT.Balance 
##                      "integer"                      "integer" 
## Inquiries.in.the.Last.6.Months              Employment.Length 
##                      "integer"                       "factor"

Find missing values

sum(is.na(loansData))
## [1] 7
which(matrix(is.na(loansData), nrow = 2500, ncol = 14), arr.ind = T)
##       row col
## [1,]  367   9
## [2,]  367  11
## [3,] 1595  11
## [4,]  367  12
## [5,] 1595  12
## [6,]  367  13
## [7,] 1595  13

We notice that rows 367 and 1595 have missing values. It is better at this time to include the missing values until we come to a conclusion otherwise.

In order to begin, we are reminded of the purpose of the analysis which is to identify and quantify any associations between the interest rate of the loan and other variables within the data set. We are also guided by querying the possibility of other variables presenting an importance in association with the interest rate after taking into account the applicant's FICO score. That question was, if two people have the same FICO score, can the other variables explain a difference in interest rate between them?

summary(loansData$Interest)
##  12.12%   7.90%  13.11%  15.31%  14.09%  14.33%   8.90%  11.14%   6.03% 
##     122     119     115      76      72      69      64      58      57 
##  17.27%  16.29%   6.62%  10.16%  15.80%  17.77%  11.71%   7.62%  18.49% 
##      56      51      49      48      39      38      36      34      33 
##  13.99%  14.65%  19.05%  13.49%  10.74%  13.67%   7.49%  11.49%  12.69% 
##      29      29      26      25      24      23      23      21      21 
##   7.51%   9.76%  10.99%  18.25%  19.72%  14.27%  20.49%  12.42%  18.75% 
##      21      21      20      19      19      18      18      17      17 
##   7.88%  22.47%  10.65%  11.99%   5.79%  10.59%   9.91%  10.38%  17.99% 
##      17      15      14      14      14      13      13      12      12 
##  21.00%  21.49%  10.75%  16.77%   5.42%   9.63%   9.99%  12.53%  15.27% 
##      12      12      11      11      11      11      11      10      10 
##  15.81%  15.96%  17.49%  21.98%   5.99%   7.29%   8.49%  11.89%  16.89% 
##      10      10      10      10      10      10      10       9       9 
##  19.22%   6.91%   6.99%  10.00%  10.25%  10.36%  10.37%  11.12%  11.83% 
##       9       9       9       8       8       8       8       8       8 
##  13.06%  13.98%  16.32%  16.49%  18.64%  22.95%   7.66%  11.86%  12.68% 
##       8       8       8       8       8       8       8       7       7 
##  14.11%  15.23%  15.99%  18.55%  19.03%  19.99%  23.28%  23.76%   7.14% 
##       7       7       7       7       7       7       7       7       7 
##   8.59%   8.94%  11.11%  11.48%  12.99%  14.96%  17.58%  20.50%   6.54% 
##       7       7       6       6       6       6       6       6       6 
## (Other) 
##     398
levels(loansData$Interest)
##   [1] "10.00%" "10.08%" "10.16%" "10.20%" "10.25%" "10.28%" "10.36%"
##   [8] "10.37%" "10.38%" "10.46%" "10.59%" "10.62%" "10.65%" "10.74%"
##  [15] "10.75%" "10.83%" "10.91%" "10.95%" "10.99%" "11.03%" "11.09%"
##  [22] "11.11%" "11.12%" "11.14%" "11.26%" "11.36%" "11.48%" "11.49%"
##  [29] "11.54%" "11.58%" "11.59%" "11.66%" "11.71%" "11.78%" "11.83%"
##  [36] "11.86%" "11.89%" "11.97%" "11.99%" "12.12%" "12.18%" "12.21%"
##  [43] "12.23%" "12.29%" "12.41%" "12.42%" "12.49%" "12.53%" "12.61%"
##  [50] "12.68%" "12.69%" "12.73%" "12.84%" "12.86%" "12.87%" "12.92%"
##  [57] "12.98%" "12.99%" "13.06%" "13.11%" "13.12%" "13.16%" "13.17%"
##  [64] "13.22%" "13.23%" "13.24%" "13.30%" "13.35%" "13.43%" "13.47%"
##  [71] "13.48%" "13.49%" "13.55%" "13.57%" "13.61%" "13.67%" "13.72%"
##  [78] "13.75%" "13.79%" "13.80%" "13.85%" "13.87%" "13.92%" "13.93%"
##  [85] "13.98%" "13.99%" "14.07%" "14.09%" "14.11%" "14.12%" "14.17%"
##  [92] "14.18%" "14.22%" "14.26%" "14.27%" "14.33%" "14.35%" "14.42%"
##  [99] "14.46%" "14.50%" "14.59%" "14.61%" "14.65%" "14.70%" "14.72%"
## [106] "14.74%" "14.79%" "14.82%" "14.83%" "14.84%" "14.91%" "14.96%"
## [113] "15.01%" "15.05%" "15.13%" "15.20%" "15.21%" "15.23%" "15.27%"
## [120] "15.28%" "15.31%" "15.33%" "15.37%" "15.45%" "15.57%" "15.58%"
## [127] "15.62%" "15.65%" "15.68%" "15.70%" "15.80%" "15.81%" "15.95%"
## [134] "15.96%" "15.99%" "16.00%" "16.02%" "16.07%" "16.29%" "16.32%"
## [141] "16.35%" "16.40%" "16.45%" "16.49%" "16.63%" "16.69%" "16.70%"
## [148] "16.71%" "16.77%" "16.82%" "16.83%" "16.89%" "17.04%" "17.14%"
## [155] "17.15%" "17.19%" "17.27%" "17.43%" "17.44%" "17.49%" "17.51%"
## [162] "17.54%" "17.56%" "17.58%" "17.77%" "17.80%" "17.88%" "17.90%"
## [169] "17.93%" "17.99%" "18.17%" "18.25%" "18.29%" "18.30%" "18.39%"
## [176] "18.49%" "18.55%" "18.62%" "18.64%" "18.67%" "18.75%" "18.79%"
## [183] "19.03%" "19.04%" "19.05%" "19.13%" "19.22%" "19.41%" "19.42%"
## [190] "19.47%" "19.69%" "19.72%" "19.74%" "19.91%" "19.99%" "20.25%"
## [197] "20.30%" "20.49%" "20.50%" "20.52%" "20.53%" "20.77%" "20.89%"
## [204] "20.99%" "21.00%" "21.14%" "21.27%" "21.28%" "21.48%" "21.49%"
## [211] "21.67%" "21.74%" "21.97%" "21.98%" "22.11%" "22.45%" "22.47%"
## [218] "22.78%" "22.95%" "23.28%" "23.33%" "23.63%" "23.76%" "23.83%"
## [225] "23.91%" "24.20%" "24.33%" "24.70%" "24.89%" "5.42%"  "5.79%" 
## [232] "5.99%"  "6.00%"  "6.03%"  "6.17%"  "6.54%"  "6.62%"  "6.76%" 
## [239] "6.91%"  "6.92%"  "6.99%"  "7.14%"  "7.29%"  "7.40%"  "7.43%" 
## [246] "7.49%"  "7.51%"  "7.62%"  "7.66%"  "7.68%"  "7.74%"  "7.75%" 
## [253] "7.88%"  "7.90%"  "7.91%"  "8.00%"  "8.07%"  "8.32%"  "8.49%" 
## [260] "8.59%"  "8.63%"  "8.88%"  "8.90%"  "8.94%"  "9.07%"  "9.20%" 
## [267] "9.25%"  "9.32%"  "9.33%"  "9.62%"  "9.63%"  "9.76%"  "9.88%" 
## [274] "9.91%"  "9.99%"

We notice that the loansData$Interest column hasn't been leveled. Meaning, we visually see there to be a lack of uniformity. This can be corrected.

Interest <- loansData$Interest
levelsI <- levels(Interest)
levelsN <- c(levelsI[230:275], levelsI[1:229])
InterestN <- factor(Interest, levelsN)
loansDataCorrection <- loansData
loansDataCorrection$Interest.Rate <- InterestN

par(mfrow = c(1, 2))
barplot(table(loansDataCorrection$Interest), col = "blue", main = "loansDataCorrection$Interest")
barplot(table(loansData$Interest), col = "blue", main = "loansData$Interest")

plot of chunk unnamed-chunk-10

With the now corrected levels in the Interest column, we may now view the relationship between the Interest Rate Vs. FICO score.

par(mfrow = c(1, 1))
plot(as.numeric(loansDataCorrection$FICO), as.numeric(loansDataCorrection$Interest), 
    xlab = "FICO RANGE", ylab = "Interest RATE", pch = 1, cex = 0.8, main = "Interest Rate Vs. FICO Range")

plot of chunk unnamed-chunk-11

The Interest rate and FICO score seem to be linearly correlated, with some variance. Let us further examine differences between the length of the loan term. Perhaps there will be a clear 'boundary separation'.

par(mfrow = c(1, 2))
plot(as.numeric(loansDataCorrection$FICO), as.numeric(loansDataCorrection$Interest), 
    xlab = "FICO range", ylab = "Interest Rate", cex = 0.5, main = "Interest Rate Vs. FICO Range, Colored by Loan Length", 
    cex.main = 0.75, pch = 19, col = as.numeric(loansDataCorrection$Loan.L))
legend(27, 275, col = unique(as.numeric(loansDataCorrection$Loan.L)), legend = unique(loansDataCorrection$Loan.L), 
    pch = 19, cex = 0.5)
boxplot(as.numeric(loansDataCorrection$Interest) ~ loansDataCorrection$Loan.L, 
    xlab = "Loan Length, 36 or 60 month Loan", ylab = "Interest Rate")

plot of chunk unnamed-chunk-12

From the scatter-plot and box plot, we can see that longer loan terms have a higher interest. We take a Two Sample t-test.

t.test(as.numeric(loansDataCorrection$Interest) ~ as.factor(loansDataCorrection$Loan.L), 
    alternative = "less", conf.level = 0.99)
## 
##  Welch Two Sample t-test
## 
## data:  as.numeric(loansDataCorrection$Interest) by as.factor(loansDataCorrection$Loan.L) 
## t = -21.72, df = 832.1, p-value < 2.2e-16
## alternative hypothesis: true difference in means is less than 0 
## 99 percent confidence interval:
##    -Inf -67.54 
## sample estimates:
## mean in group 36 months mean in group 60 months 
##                   97.69                  173.34

It is evident from the p-value that there is a very high significance;

p-value < 2.2e-16

boxplot(as.numeric(loansDataCorrection$Interest) ~ loansDataCorrection$Inquiries, 
    loansDataCorrection, xlab = "Number of Inquiries in the past 6 Months", 
    ylab = "Interest Rate", main = "Ineterest Rate Vs. Inquiries")

plot of chunk unnamed-chunk-14

plot(as.numeric(loansDataCorrection$Amount.R), as.numeric(loansDataCorrection$Interest), 
    xlab = "Amount Requested", ylab = "Interest Rate", main = "Interest Rate vs. Amount Requested")

plot of chunk unnamed-chunk-15

The scatter-plot alludes one to turn it into a more manageable factor variable. We use the cut2() function

library(Hmisc)
## Warning: package 'Hmisc' was built under R version 2.15.1
## Loading required package: survival
## Loading required package: splines
## Hmisc library by Frank E Harrell Jr
## 
## Type library(help='Hmisc'), ?Overview, or ?Hmisc.Overview') to see overall
## documentation.
## 
## NOTE:Hmisc no longer redefines [.factor to drop unused levels when
## subsetting.  To get the old behavior of Hmisc type dropUnusedLevels().
## Attaching package: 'Hmisc'
## The following object(s) are masked from 'package:survival':
## 
## untangle.specials
## The following object(s) are masked from 'package:car':
## 
## recode
## The following object(s) are masked from 'package:base':
## 
## format.pval, round.POSIXt, trunc.POSIXt, units

What we will do is break the Amount requested into four groups allowing a clear visual representation of the Interest rate and range amount.

AmountRange <- cut2(loansDataCorrection$Amount.R, g = 4)

boxplot(as.numeric(loansDataCorrection$Interest) ~ AmountRange, xlab = "Amount Requested, in 4 Groupings", 
    ylab = "Interest Rate", main = "Interest Rate vs. Amount Requested")

plot of chunk unnamed-chunk-17

Let us perform a multiple comparison procedure and statistical test in order to find a significant difference in means from each group. We do this by the TukeyHSD function.

TukeyHSD(aov(as.numeric(loansDataCorrection$Interest) ~ AmountRange))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = as.numeric(loansDataCorrection$Interest) ~ AmountRange)
## 
## $AmountRange
##                              diff     lwr   upr  p adj
## [ 6075,10050)-[ 1000, 6075) 10.10 -0.3290 20.52 0.0618
## [10050,17200)-[ 1000, 6075) 20.26  9.7892 30.73 0.0000
## [17200,35000]-[ 1000, 6075) 62.16 51.6438 72.67 0.0000
## [10050,17200)-[ 6075,10050) 10.16 -0.3524 20.68 0.0626
## [17200,35000]-[ 6075,10050) 52.06 41.5024 62.61 0.0000
## [17200,35000]-[10050,17200) 41.89 31.2915 52.50 0.0000

The group that shows a significant difference in mean in comparison to the interest rate are the most requested amount for loans. Let us go further and examine the particular purpose for the loans against the interest rates.

boxplot(as.numeric(loansDataCorrection$Interest) ~ loansDataCorrection$Loan.P, 
    xlab = "Loan Purpose", ylab = "Interest Rate", main = "Interest Rate vs. Loan Purpose")

plot of chunk unnamed-chunk-19

In viewing monthly income.

hist(loansDataCorrection$Monthly, breaks = 50, col = "blue", xlab = "Monthly Income", 
    main = "Monthly Income")

plot of chunk unnamed-chunk-20

It seems from the histogram that monthly income doesn't play much importance. So, lets do a data transformation by taking the log. This will help us properly grasp the distribution if it were symmetric. Also, allowing us to correlate against the interest rate.

par(mfrow = c(1, 2))
hist(log(loansDataCorrection$Monthly), breaks = 50, col = "blue", xlab = "log(Monthly Income)", 
    main = "monthly Income, log")
plot(log(loansDataCorrection$Monthly), as.numeric(loansDataCorrection$Interest), 
    xlab = "log(Monthly Income)", ylab = "Interest Rate", cex = 0.6, main = "Interest Rate vs. log(Monthly Income")

plot of chunk unnamed-chunk-21

Let us look at the Debt variable for uniformity.

levels(loansDataCorrection$Debt)
##    [1] "0%"     "0.04%"  "0.17%"  "0.18%"  "0.24%"  "0.26%"  "0.33%" 
##    [8] "0.34%"  "0.47%"  "0.48%"  "0.51%"  "0.52%"  "0.53%"  "0.56%" 
##   [15] "0.58%"  "0.60%"  "0.62%"  "0.72%"  "0.94%"  "1%"     "1.02%" 
##   [22] "1.03%"  "1.05%"  "1.06%"  "1.08%"  "1.20%"  "1.23%"  "1.24%" 
##   [29] "1.28%"  "1.30%"  "1.31%"  "1.34%"  "1.37%"  "1.41%"  "1.43%" 
##   [36] "1.49%"  "1.50%"  "1.51%"  "1.58%"  "1.59%"  "1.62%"  "1.63%" 
##   [43] "1.68%"  "1.69%"  "1.70%"  "1.73%"  "1.78%"  "1.79%"  "1.82%" 
##   [50] "1.83%"  "1.84%"  "1.88%"  "1.92%"  "1.97%"  "1.98%"  "1.99%" 
##   [57] "10%"    "10.01%" "10.02%" "10.05%" "10.08%" "10.10%" "10.11%"
##   [64] "10.12%" "10.16%" "10.17%" "10.19%" "10.20%" "10.21%" "10.22%"
##   [71] "10.23%" "10.25%" "10.27%" "10.28%" "10.29%" "10.30%" "10.32%"
##   [78] "10.34%" "10.35%" "10.36%" "10.37%" "10.38%" "10.39%" "10.40%"
##   [85] "10.41%" "10.42%" "10.43%" "10.45%" "10.46%" "10.47%" "10.53%"
##   [92] "10.55%" "10.56%" "10.58%" "10.59%" "10.61%" "10.63%" "10.65%"
##   [99] "10.67%" "10.68%" "10.69%" "10.71%" "10.72%" "10.73%" "10.74%"
##  [106] "10.75%" "10.76%" "10.77%" "10.79%" "10.80%" "10.81%" "10.84%"
##  [113] "10.86%" "10.87%" "10.88%" "10.89%" "10.93%" "10.94%" "10.96%"
##  [120] "10.97%" "10.98%" "10.99%" "11.01%" "11.02%" "11.03%" "11.04%"
##  [127] "11.05%" "11.07%" "11.08%" "11.12%" "11.15%" "11.16%" "11.17%"
##  [134] "11.18%" "11.19%" "11.20%" "11.21%" "11.22%" "11.23%" "11.24%"
##  [141] "11.27%" "11.29%" "11.31%" "11.32%" "11.34%" "11.35%" "11.37%"
##  [148] "11.38%" "11.39%" "11.40%" "11.41%" "11.42%" "11.44%" "11.45%"
##  [155] "11.46%" "11.48%" "11.50%" "11.52%" "11.53%" "11.55%" "11.59%"
##  [162] "11.61%" "11.63%" "11.64%" "11.66%" "11.67%" "11.68%" "11.70%"
##  [169] "11.71%" "11.72%" "11.73%" "11.74%" "11.77%" "11.78%" "11.79%"
##  [176] "11.80%" "11.81%" "11.84%" "11.85%" "11.86%" "11.89%" "11.91%"
##  [183] "11.92%" "11.93%" "11.94%" "11.95%" "11.96%" "11.98%" "11.99%"
##  [190] "12%"    "12.02%" "12.03%" "12.04%" "12.05%" "12.06%" "12.07%"
##  [197] "12.10%" "12.11%" "12.13%" "12.15%" "12.19%" "12.20%" "12.21%"
##  [204] "12.23%" "12.24%" "12.26%" "12.27%" "12.29%" "12.30%" "12.32%"
##  [211] "12.33%" "12.35%" "12.36%" "12.39%" "12.40%" "12.41%" "12.42%"
##  [218] "12.44%" "12.46%" "12.47%" "12.49%" "12.50%" "12.51%" "12.52%"
##  [225] "12.54%" "12.56%" "12.58%" "12.59%" "12.60%" "12.61%" "12.63%"
##  [232] "12.67%" "12.68%" "12.69%" "12.70%" "12.72%" "12.73%" "12.77%"
##  [239] "12.78%" "12.79%" "12.80%" "12.81%" "12.82%" "12.83%" "12.85%"
##  [246] "12.86%" "12.89%" "12.90%" "12.92%" "12.93%" "12.94%" "12.96%"
##  [253] "12.98%" "12.99%" "13%"    "13.02%" "13.03%" "13.06%" "13.07%"
##  [260] "13.08%" "13.09%" "13.10%" "13.11%" "13.13%" "13.14%" "13.16%"
##  [267] "13.17%" "13.18%" "13.19%" "13.26%" "13.27%" "13.28%" "13.29%"
##  [274] "13.30%" "13.31%" "13.32%" "13.34%" "13.37%" "13.38%" "13.39%"
##  [281] "13.40%" "13.41%" "13.42%" "13.44%" "13.45%" "13.48%" "13.49%"
##  [288] "13.50%" "13.52%" "13.53%" "13.55%" "13.56%" "13.57%" "13.58%"
##  [295] "13.59%" "13.61%" "13.62%" "13.64%" "13.65%" "13.67%" "13.69%"
##  [302] "13.71%" "13.72%" "13.73%" "13.74%" "13.75%" "13.76%" "13.77%"
##  [309] "13.78%" "13.79%" "13.80%" "13.81%" "13.82%" "13.83%" "13.85%"
##  [316] "13.86%" "13.87%" "13.88%" "13.89%" "13.90%" "13.91%" "13.93%"
##  [323] "13.94%" "13.96%" "13.98%" "13.99%" "14.01%" "14.02%" "14.05%"
##  [330] "14.06%" "14.07%" "14.08%" "14.11%" "14.12%" "14.13%" "14.15%"
##  [337] "14.16%" "14.18%" "14.19%" "14.20%" "14.22%" "14.25%" "14.26%"
##  [344] "14.27%" "14.29%" "14.30%" "14.31%" "14.32%" "14.33%" "14.34%"
##  [351] "14.36%" "14.37%" "14.40%" "14.41%" "14.43%" "14.44%" "14.45%"
##  [358] "14.48%" "14.49%" "14.50%" "14.51%" "14.52%" "14.57%" "14.59%"
##  [365] "14.60%" "14.61%" "14.62%" "14.63%" "14.65%" "14.66%" "14.67%"
##  [372] "14.68%" "14.69%" "14.70%" "14.72%" "14.74%" "14.75%" "14.76%"
##  [379] "14.77%" "14.78%" "14.79%" "14.80%" "14.82%" "14.84%" "14.85%"
##  [386] "14.86%" "14.87%" "14.88%" "14.89%" "14.90%" "14.92%" "14.93%"
##  [393] "14.95%" "14.96%" "14.98%" "14.99%" "15%"    "15.02%" "15.03%"
##  [400] "15.04%" "15.05%" "15.07%" "15.08%" "15.09%" "15.10%" "15.11%"
##  [407] "15.12%" "15.13%" "15.14%" "15.16%" "15.17%" "15.19%" "15.20%"
##  [414] "15.21%" "15.22%" "15.23%" "15.24%" "15.28%" "15.29%" "15.30%"
##  [421] "15.32%" "15.33%" "15.34%" "15.35%" "15.36%" "15.37%" "15.38%"
##  [428] "15.41%" "15.42%" "15.43%" "15.45%" "15.46%" "15.47%" "15.50%"
##  [435] "15.51%" "15.53%" "15.54%" "15.55%" "15.56%" "15.58%" "15.60%"
##  [442] "15.61%" "15.64%" "15.66%" "15.68%" "15.69%" "15.70%" "15.71%"
##  [449] "15.72%" "15.74%" "15.75%" "15.76%" "15.79%" "15.80%" "15.82%"
##  [456] "15.83%" "15.86%" "15.87%" "15.88%" "15.89%" "15.90%" "15.91%"
##  [463] "15.92%" "15.93%" "15.94%" "15.95%" "15.96%" "15.97%" "15.98%"
##  [470] "16%"    "16.01%" "16.02%" "16.03%" "16.04%" "16.06%" "16.07%"
##  [477] "16.08%" "16.09%" "16.14%" "16.15%" "16.16%" "16.17%" "16.18%"
##  [484] "16.20%" "16.21%" "16.22%" "16.23%" "16.24%" "16.26%" "16.28%"
##  [491] "16.29%" "16.32%" "16.33%" "16.35%" "16.36%" "16.38%" "16.39%"
##  [498] "16.40%" "16.41%" "16.42%" "16.43%" "16.44%" "16.45%" "16.46%"
##  [505] "16.47%" "16.48%" "16.49%" "16.52%" "16.53%" "16.54%" "16.55%"
##  [512] "16.56%" "16.57%" "16.58%" "16.59%" "16.61%" "16.62%" "16.64%"
##  [519] "16.66%" "16.67%" "16.68%" "16.69%" "16.70%" "16.71%" "16.72%"
##  [526] "16.73%" "16.74%" "16.75%" "16.76%" "16.77%" "16.78%" "16.80%"
##  [533] "16.84%" "16.85%" "16.87%" "16.89%" "16.92%" "16.93%" "16.94%"
##  [540] "16.97%" "16.98%" "16.99%" "17%"    "17.01%" "17.04%" "17.05%"
##  [547] "17.07%" "17.08%" "17.10%" "17.11%" "17.12%" "17.15%" "17.16%"
##  [554] "17.17%" "17.19%" "17.21%" "17.22%" "17.27%" "17.29%" "17.31%"
##  [561] "17.32%" "17.34%" "17.35%" "17.36%" "17.37%" "17.39%" "17.40%"
##  [568] "17.41%" "17.43%" "17.44%" "17.45%" "17.46%" "17.47%" "17.48%"
##  [575] "17.49%" "17.50%" "17.51%" "17.55%" "17.58%" "17.59%" "17.61%"
##  [582] "17.62%" "17.63%" "17.65%" "17.67%" "17.68%" "17.70%" "17.71%"
##  [589] "17.72%" "17.73%" "17.76%" "17.77%" "17.78%" "17.79%" "17.80%"
##  [596] "17.82%" "17.86%" "17.88%" "17.89%" "17.90%" "17.91%" "17.92%"
##  [603] "17.93%" "17.94%" "17.95%" "17.96%" "17.97%" "17.98%" "17.99%"
##  [610] "18%"    "18.02%" "18.04%" "18.05%" "18.07%" "18.08%" "18.10%"
##  [617] "18.14%" "18.15%" "18.16%" "18.17%" "18.18%" "18.20%" "18.21%"
##  [624] "18.23%" "18.24%" "18.28%" "18.30%" "18.31%" "18.32%" "18.33%"
##  [631] "18.35%" "18.36%" "18.37%" "18.38%" "18.39%" "18.40%" "18.41%"
##  [638] "18.42%" "18.44%" "18.45%" "18.46%" "18.48%" "18.50%" "18.52%"
##  [645] "18.55%" "18.57%" "18.60%" "18.61%" "18.62%" "18.63%" "18.66%"
##  [652] "18.67%" "18.72%" "18.73%" "18.74%" "18.75%" "18.78%" "18.81%"
##  [659] "18.82%" "18.83%" "18.84%" "18.85%" "18.86%" "18.89%" "18.90%"
##  [666] "18.91%" "18.92%" "18.93%" "18.95%" "18.96%" "18.99%" "19%"   
##  [673] "19.01%" "19.02%" "19.05%" "19.06%" "19.07%" "19.10%" "19.11%"
##  [680] "19.12%" "19.13%" "19.15%" "19.16%" "19.17%" "19.20%" "19.21%"
##  [687] "19.22%" "19.23%" "19.24%" "19.25%" "19.26%" "19.27%" "19.28%"
##  [694] "19.29%" "19.30%" "19.32%" "19.33%" "19.35%" "19.36%" "19.37%"
##  [701] "19.38%" "19.41%" "19.43%" "19.45%" "19.46%" "19.47%" "19.48%"
##  [708] "19.49%" "19.52%" "19.53%" "19.55%" "19.56%" "19.57%" "19.58%"
##  [715] "19.59%" "19.60%" "19.61%" "19.62%" "19.63%" "19.64%" "19.65%"
##  [722] "19.68%" "19.70%" "19.71%" "19.77%" "19.78%" "19.81%" "19.82%"
##  [729] "19.83%" "19.85%" "19.86%" "19.88%" "19.89%" "19.90%" "19.92%"
##  [736] "19.93%" "19.94%" "19.97%" "19.98%" "2%"     "2.13%"  "2.14%" 
##  [743] "2.15%"  "2.18%"  "2.19%"  "2.21%"  "2.23%"  "2.25%"  "2.33%" 
##  [750] "2.37%"  "2.38%"  "2.44%"  "2.48%"  "2.50%"  "2.51%"  "2.52%" 
##  [757] "2.54%"  "2.55%"  "2.63%"  "2.67%"  "2.68%"  "2.77%"  "2.83%" 
##  [764] "2.84%"  "2.85%"  "2.86%"  "2.88%"  "2.89%"  "2.92%"  "2.93%" 
##  [771] "2.95%"  "2.98%"  "20.03%" "20.04%" "20.05%" "20.06%" "20.07%"
##  [778] "20.09%" "20.10%" "20.11%" "20.13%" "20.14%" "20.15%" "20.16%"
##  [785] "20.17%" "20.18%" "20.21%" "20.23%" "20.24%" "20.25%" "20.26%"
##  [792] "20.27%" "20.28%" "20.30%" "20.31%" "20.32%" "20.34%" "20.35%"
##  [799] "20.37%" "20.38%" "20.40%" "20.44%" "20.46%" "20.48%" "20.49%"
##  [806] "20.50%" "20.51%" "20.52%" "20.54%" "20.55%" "20.56%" "20.57%"
##  [813] "20.58%" "20.59%" "20.60%" "20.62%" "20.67%" "20.68%" "20.69%"
##  [820] "20.70%" "20.71%" "20.72%" "20.73%" "20.74%" "20.75%" "20.76%"
##  [827] "20.77%" "20.80%" "20.81%" "20.82%" "20.83%" "20.85%" "20.87%"
##  [834] "20.89%" "20.91%" "20.92%" "20.93%" "20.94%" "20.95%" "20.96%"
##  [841] "20.98%" "20.99%" "21%"    "21.03%" "21.04%" "21.11%" "21.12%"
##  [848] "21.16%" "21.17%" "21.18%" "21.20%" "21.21%" "21.22%" "21.23%"
##  [855] "21.24%" "21.25%" "21.28%" "21.30%" "21.31%" "21.32%" "21.33%"
##  [862] "21.34%" "21.35%" "21.36%" "21.42%" "21.43%" "21.44%" "21.45%"
##  [869] "21.46%" "21.47%" "21.49%" "21.50%" "21.51%" "21.52%" "21.53%"
##  [876] "21.54%" "21.58%" "21.59%" "21.61%" "21.63%" "21.66%" "21.67%"
##  [883] "21.69%" "21.70%" "21.73%" "21.74%" "21.75%" "21.77%" "21.80%"
##  [890] "21.81%" "21.82%" "21.83%" "21.84%" "21.88%" "21.89%" "21.90%"
##  [897] "21.92%" "21.93%" "21.94%" "21.95%" "21.98%" "21.99%" "22%"   
##  [904] "22.01%" "22.03%" "22.04%" "22.06%" "22.08%" "22.09%" "22.10%"
##  [911] "22.13%" "22.14%" "22.15%" "22.19%" "22.20%" "22.21%" "22.22%"
##  [918] "22.24%" "22.26%" "22.29%" "22.30%" "22.31%" "22.32%" "22.34%"
##  [925] "22.35%" "22.36%" "22.38%" "22.39%" "22.41%" "22.42%" "22.45%"
##  [932] "22.47%" "22.48%" "22.49%" "22.50%" "22.52%" "22.53%" "22.55%"
##  [939] "22.57%" "22.58%" "22.60%" "22.61%" "22.63%" "22.64%" "22.65%"
##  [946] "22.66%" "22.67%" "22.69%" "22.70%" "22.72%" "22.74%" "22.76%"
##  [953] "22.81%" "22.83%" "22.84%" "22.87%" "22.93%" "22.95%" "22.98%"
##  [960] "22.99%" "23.02%" "23.03%" "23.04%" "23.05%" "23.06%" "23.07%"
##  [967] "23.12%" "23.15%" "23.18%" "23.24%" "23.25%" "23.27%" "23.29%"
##  [974] "23.30%" "23.34%" "23.35%" "23.36%" "23.37%" "23.38%" "23.40%"
##  [981] "23.41%" "23.44%" "23.48%" "23.50%" "23.51%" "23.52%" "23.54%"
##  [988] "23.56%" "23.58%" "23.59%" "23.60%" "23.62%" "23.63%" "23.64%"
##  [995] "23.69%" "23.70%" "23.75%" "23.79%" "23.80%" "23.81%" "23.84%"
## [1002] "23.85%" "23.88%" "23.89%" "23.93%" "23.94%" "23.95%" "23.97%"
## [1009] "23.99%" "24.01%" "24.02%" "24.04%" "24.05%" "24.06%" "24.07%"
## [1016] "24.09%" "24.11%" "24.12%" "24.14%" "24.15%" "24.16%" "24.17%"
## [1023] "24.19%" "24.20%" "24.21%" "24.22%" "24.23%" "24.24%" "24.25%"
## [1030] "24.29%" "24.30%" "24.36%" "24.40%" "24.41%" "24.42%" "24.43%"
## [1037] "24.45%" "24.47%" "24.48%" "24.50%" "24.51%" "24.53%" "24.57%"
## [1044] "24.58%" "24.59%" "24.63%" "24.64%" "24.65%" "24.66%" "24.68%"
## [1051] "24.69%" "24.71%" "24.74%" "24.75%" "24.77%" "24.78%" "24.80%"
## [1058] "24.82%" "24.85%" "24.88%" "24.90%" "24.93%" "24.95%" "24.96%"
## [1065] "24.97%" "24.98%" "25.01%" "25.03%" "25.07%" "25.13%" "25.16%"
## [1072] "25.22%" "25.23%" "25.29%" "25.32%" "25.33%" "25.35%" "25.36%"
## [1079] "25.40%" "25.41%" "25.48%" "25.49%" "25.53%" "25.56%" "25.58%"
## [1086] "25.59%" "25.67%" "25.70%" "25.76%" "25.77%" "25.80%" "25.84%"
## [1093] "25.86%" "25.87%" "25.89%" "25.91%" "25.95%" "26.02%" "26.03%"
## [1100] "26.06%" "26.08%" "26.09%" "26.11%" "26.14%" "26.18%" "26.19%"
## [1107] "26.21%" "26.29%" "26.32%" "26.33%" "26.35%" "26.36%" "26.38%"
## [1114] "26.42%" "26.44%" "26.50%" "26.53%" "26.54%" "26.56%" "26.62%"
## [1121] "26.65%" "26.68%" "26.70%" "26.72%" "26.74%" "26.75%" "26.84%"
## [1128] "26.87%" "26.92%" "26.95%" "26.96%" "26.98%" "27.06%" "27.07%"
## [1135] "27.13%" "27.14%" "27.15%" "27.16%" "27.23%" "27.25%" "27.28%"
## [1142] "27.32%" "27.34%" "27.35%" "27.41%" "27.47%" "27.48%" "27.50%"
## [1149] "27.53%" "27.54%" "27.56%" "27.59%" "27.60%" "27.63%" "27.64%"
## [1156] "27.65%" "27.69%" "27.71%" "27.72%" "27.74%" "27.76%" "27.78%"
## [1163] "27.80%" "27.83%" "27.88%" "27.89%" "27.92%" "27.96%" "28.01%"
## [1170] "28.08%" "28.15%" "28.20%" "28.25%" "28.27%" "28.28%" "28.29%"
## [1177] "28.35%" "28.36%" "28.39%" "28.46%" "28.51%" "28.54%" "28.56%"
## [1184] "28.61%" "28.63%" "28.64%" "28.74%" "28.75%" "28.76%" "28.77%"
## [1191] "28.80%" "28.81%" "28.82%" "28.84%" "28.87%" "28.89%" "28.91%"
## [1198] "29.01%" "29.02%" "29.03%" "29.05%" "29.10%" "29.11%" "29.13%"
## [1205] "29.18%" "29.19%" "29.25%" "29.26%" "29.38%" "29.40%" "29.45%"
## [1212] "29.46%" "29.48%" "29.51%" "29.55%" "29.58%" "29.62%" "29.63%"
## [1219] "29.82%" "29.83%" "29.86%" "29.94%" "3%"     "3.01%"  "3.04%" 
## [1226] "3.05%"  "3.07%"  "3.09%"  "3.11%"  "3.12%"  "3.16%"  "3.18%" 
## [1233] "3.23%"  "3.25%"  "3.27%"  "3.30%"  "3.31%"  "3.33%"  "3.34%" 
## [1240] "3.37%"  "3.38%"  "3.40%"  "3.41%"  "3.44%"  "3.45%"  "3.47%" 
## [1247] "3.48%"  "3.50%"  "3.58%"  "3.63%"  "3.65%"  "3.66%"  "3.67%" 
## [1254] "3.72%"  "3.78%"  "3.80%"  "3.82%"  "3.83%"  "3.87%"  "3.88%" 
## [1261] "3.89%"  "3.90%"  "3.94%"  "3.96%"  "3.98%"  "30.15%" "30.24%"
## [1268] "30.30%" "30.36%" "30.40%" "30.46%" "30.56%" "30.58%" "30.61%"
## [1275] "30.62%" "30.71%" "30.77%" "30.82%" "30.95%" "30.96%" "30.97%"
## [1282] "31%"    "31.02%" "31.12%" "31.20%" "31.34%" "31.45%" "31.46%"
## [1289] "31.48%" "31.70%" "31.71%" "31.79%" "31.84%" "32%"    "32.10%"
## [1296] "32.13%" "32.16%" "32.19%" "32.20%" "32.21%" "32.25%" "32.49%"
## [1303] "32.56%" "32.64%" "32.76%" "32.91%" "32.95%" "33.12%" "33.15%"
## [1310] "33.23%" "33.24%" "33.28%" "33.30%" "33.37%" "33.42%" "33.43%"
## [1317] "33.47%" "33.56%" "33.62%" "33.64%" "33.67%" "33.75%" "33.80%"
## [1324] "33.90%" "33.93%" "34.04%" "34.06%" "34.20%" "34.26%" "34.41%"
## [1331] "34.56%" "34.74%" "34.88%" "34.91%" "4%"     "4.02%"  "4.04%" 
## [1338] "4.05%"  "4.08%"  "4.10%"  "4.14%"  "4.16%"  "4.17%"  "4.19%" 
## [1345] "4.25%"  "4.26%"  "4.29%"  "4.30%"  "4.32%"  "4.34%"  "4.36%" 
## [1352] "4.37%"  "4.39%"  "4.40%"  "4.43%"  "4.47%"  "4.48%"  "4.51%" 
## [1359] "4.53%"  "4.56%"  "4.58%"  "4.61%"  "4.62%"  "4.68%"  "4.70%" 
## [1366] "4.73%"  "4.74%"  "4.75%"  "4.76%"  "4.79%"  "4.80%"  "4.81%" 
## [1373] "4.85%"  "4.86%"  "4.89%"  "4.90%"  "4.91%"  "4.92%"  "4.93%" 
## [1380] "4.94%"  "4.95%"  "4.99%"  "5.01%"  "5.02%"  "5.07%"  "5.10%" 
## [1387] "5.16%"  "5.20%"  "5.21%"  "5.24%"  "5.26%"  "5.27%"  "5.28%" 
## [1394] "5.30%"  "5.31%"  "5.33%"  "5.36%"  "5.42%"  "5.43%"  "5.44%" 
## [1401] "5.46%"  "5.47%"  "5.49%"  "5.50%"  "5.52%"  "5.56%"  "5.58%" 
## [1408] "5.59%"  "5.60%"  "5.62%"  "5.63%"  "5.64%"  "5.65%"  "5.70%" 
## [1415] "5.72%"  "5.74%"  "5.76%"  "5.80%"  "5.81%"  "5.82%"  "5.83%" 
## [1422] "5.84%"  "5.85%"  "5.86%"  "5.87%"  "5.88%"  "5.90%"  "5.91%" 
## [1429] "5.93%"  "5.95%"  "5.98%"  "6.01%"  "6.04%"  "6.07%"  "6.10%" 
## [1436] "6.11%"  "6.16%"  "6.19%"  "6.24%"  "6.25%"  "6.26%"  "6.27%" 
## [1443] "6.28%"  "6.29%"  "6.30%"  "6.31%"  "6.36%"  "6.37%"  "6.38%" 
## [1450] "6.39%"  "6.41%"  "6.43%"  "6.46%"  "6.48%"  "6.49%"  "6.51%" 
## [1457] "6.52%"  "6.56%"  "6.58%"  "6.60%"  "6.61%"  "6.62%"  "6.64%" 
## [1464] "6.65%"  "6.68%"  "6.71%"  "6.72%"  "6.74%"  "6.75%"  "6.76%" 
## [1471] "6.79%"  "6.80%"  "6.81%"  "6.82%"  "6.83%"  "6.84%"  "6.86%" 
## [1478] "6.91%"  "6.92%"  "6.94%"  "6.98%"  "6.99%"  "7.02%"  "7.04%" 
## [1485] "7.06%"  "7.07%"  "7.08%"  "7.10%"  "7.13%"  "7.14%"  "7.15%" 
## [1492] "7.17%"  "7.23%"  "7.24%"  "7.25%"  "7.26%"  "7.28%"  "7.29%" 
## [1499] "7.32%"  "7.33%"  "7.36%"  "7.38%"  "7.39%"  "7.40%"  "7.41%" 
## [1506] "7.43%"  "7.45%"  "7.49%"  "7.50%"  "7.51%"  "7.52%"  "7.53%" 
## [1513] "7.55%"  "7.56%"  "7.57%"  "7.59%"  "7.60%"  "7.64%"  "7.65%" 
## [1520] "7.66%"  "7.67%"  "7.68%"  "7.72%"  "7.73%"  "7.75%"  "7.77%" 
## [1527] "7.78%"  "7.80%"  "7.82%"  "7.84%"  "7.85%"  "7.88%"  "7.89%" 
## [1534] "7.90%"  "7.92%"  "7.94%"  "7.95%"  "7.96%"  "7.97%"  "7.98%" 
## [1541] "8%"     "8.01%"  "8.02%"  "8.03%"  "8.04%"  "8.05%"  "8.06%" 
## [1548] "8.07%"  "8.08%"  "8.10%"  "8.11%"  "8.12%"  "8.14%"  "8.15%" 
## [1555] "8.16%"  "8.20%"  "8.21%"  "8.23%"  "8.26%"  "8.28%"  "8.29%" 
## [1562] "8.31%"  "8.33%"  "8.34%"  "8.36%"  "8.37%"  "8.38%"  "8.40%" 
## [1569] "8.43%"  "8.44%"  "8.45%"  "8.46%"  "8.47%"  "8.48%"  "8.49%" 
## [1576] "8.51%"  "8.52%"  "8.56%"  "8.57%"  "8.60%"  "8.61%"  "8.62%" 
## [1583] "8.64%"  "8.69%"  "8.70%"  "8.72%"  "8.74%"  "8.76%"  "8.77%" 
## [1590] "8.80%"  "8.81%"  "8.82%"  "8.83%"  "8.84%"  "8.86%"  "8.89%" 
## [1597] "8.90%"  "8.91%"  "8.92%"  "8.93%"  "8.95%"  "8.96%"  "8.99%" 
## [1604] "9%"     "9.02%"  "9.03%"  "9.04%"  "9.05%"  "9.06%"  "9.07%" 
## [1611] "9.09%"  "9.10%"  "9.12%"  "9.14%"  "9.15%"  "9.16%"  "9.17%" 
## [1618] "9.18%"  "9.19%"  "9.20%"  "9.22%"  "9.26%"  "9.28%"  "9.29%" 
## [1625] "9.30%"  "9.31%"  "9.33%"  "9.35%"  "9.37%"  "9.40%"  "9.41%" 
## [1632] "9.44%"  "9.46%"  "9.47%"  "9.49%"  "9.51%"  "9.52%"  "9.53%" 
## [1639] "9.54%"  "9.55%"  "9.57%"  "9.58%"  "9.59%"  "9.60%"  "9.61%" 
## [1646] "9.64%"  "9.66%"  "9.67%"  "9.68%"  "9.70%"  "9.71%"  "9.72%" 
## [1653] "9.73%"  "9.74%"  "9.75%"  "9.76%"  "9.77%"  "9.79%"  "9.81%" 
## [1660] "9.84%"  "9.86%"  "9.89%"  "9.90%"  "9.92%"  "9.93%"  "9.95%" 
## [1667] "9.96%"  "9.98%"  "9.99%"

Again, we need to organize the levels in consecutive order. We can then view bar plots of the original correction of the debt variable with the new correction of the debt variable. What we see with the new corrected debt graph is a nice symmetric shape that will contribute to the following step, which is identify linearity.

Debt <- loansDataCorrection$Debt
levelsD <- levels(Debt)
levelsDN <- c(levelsD[1:56], levelsD[740:772], levelsD[1223:1265], levelsD[1335:1669], 
    levelsD[57:739], levelsD[773:1222], levelsD[1266:1334])
DebtN <- factor(Debt, levelsDN)
loansDataCorrectionCn <- loansDataCorrection
loansDataCorrectionCn$Debt.To.Income.Ratio <- DebtN

par(mfrow = c(1, 2))
barplot(table(loansDataCorrection$Debt), col = "blue", main = "loansDataCorrection$Debt")
barplot(table(loansDataCorrectionCn$Debt), col = "blue", main = "loansDataCorrectionCn$Debt")

plot of chunk unnamed-chunk-23

par(mfrow = c(1, 2))
plot(as.numeric(loansDataCorrectionCn$Debt), as.numeric(loansDataCorrectionCn$Interest), 
    xlab = "Debt-to-Income Ration", ylab = "Interest Rate", main = "Interest Rate vs. Debt")
plot(as.numeric(loansDataCorrectionCn$Debt), as.numeric(loansDataCorrectionCn$FICO), 
    xlab = "Debt-to-Income Ratio", ylab = "FICO Score Range", main = "FICO vs. Debt")

plot of chunk unnamed-chunk-24

As mentioned before, we see a fairly linear pattern in the Interest Rate vs. FICO score plot. For this, we can use a linear regression model(Basic least Squares). The variables we will take into consideration will be: Monthly income, FICO score, amount requested, loan length, open credit, revolving credit balance, combined factors, debt, and inquiries. We will be excluding loan purpose for the sack of preventing an overload in the model.

lm <- lm(as.numeric(loansDataCorrectionCn$Interest) ~ as.numeric(loansDataCorrectionCn$FICO) + 
    as.factor(loansDataCorrectionCn$Loan.L) + as.factor(AmountRange) + as.factor(AmountRange) * 
    as.numeric(loansDataCorrectionCn$FICO) + as.numeric(loansDataCorrectionCn$Debt) + 
    as.numeric(loansDataCorrectionCn$Inquiries) + as.numeric(loansDataCorrectionCn$Open.C) * 
    as.numeric(loansDataCorrectionCn$Debt))
summary(lm)
## 
## Call:
## lm(formula = as.numeric(loansDataCorrectionCn$Interest) ~ as.numeric(loansDataCorrectionCn$FICO) + 
##     as.factor(loansDataCorrectionCn$Loan.L) + as.factor(AmountRange) + 
##     as.factor(AmountRange) * as.numeric(loansDataCorrectionCn$FICO) + 
##     as.numeric(loansDataCorrectionCn$Debt) + as.numeric(loansDataCorrectionCn$Inquiries) + 
##     as.numeric(loansDataCorrectionCn$Open.C) * as.numeric(loansDataCorrectionCn$Debt))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -171.36  -28.39   -2.84   25.14  183.64 
## 
## Coefficients:
##                                                                                  Estimate
## (Intercept)                                                                      2.04e+02
## as.numeric(loansDataCorrectionCn$FICO)                                          -6.88e+00
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                 5.88e+01
## as.factor(AmountRange)[ 6075,10050)                                              2.18e+01
## as.factor(AmountRange)[10050,17200)                                              3.82e+01
## as.factor(AmountRange)[17200,35000]                                              7.10e+01
## as.numeric(loansDataCorrectionCn$Debt)                                          -2.69e-02
## as.numeric(loansDataCorrectionCn$Inquiries)                                      6.60e+00
## as.numeric(loansDataCorrectionCn$Open.C)                                        -3.28e+00
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)      -7.39e-01
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)      -1.28e+00
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]      -1.58e+00
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)  2.76e-03
##                                                                                 Std. Error
## (Intercept)                                                                       5.11e+00
## as.numeric(loansDataCorrectionCn$FICO)                                            2.20e-01
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                  2.12e+00
## as.factor(AmountRange)[ 6075,10050)                                               4.83e+00
## as.factor(AmountRange)[10050,17200)                                               5.03e+00
## as.factor(AmountRange)[17200,35000]                                               5.31e+00
## as.numeric(loansDataCorrectionCn$Debt)                                            4.16e-03
## as.numeric(loansDataCorrectionCn$Inquiries)                                       6.53e-01
## as.numeric(loansDataCorrectionCn$Open.C)                                          4.06e-01
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)        3.12e-01
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)        3.19e-01
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]        3.22e-01
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)   3.88e-04
##                                                                                 t value
## (Intercept)                                                                       39.91
## as.numeric(loansDataCorrectionCn$FICO)                                           -31.27
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                  27.75
## as.factor(AmountRange)[ 6075,10050)                                                4.52
## as.factor(AmountRange)[10050,17200)                                                7.58
## as.factor(AmountRange)[17200,35000]                                               13.38
## as.numeric(loansDataCorrectionCn$Debt)                                            -6.47
## as.numeric(loansDataCorrectionCn$Inquiries)                                       10.10
## as.numeric(loansDataCorrectionCn$Open.C)                                          -8.07
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)        -2.37
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)        -4.01
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]        -4.91
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)    7.11
##                                                                                 Pr(>|t|)
## (Intercept)                                                                      < 2e-16
## as.numeric(loansDataCorrectionCn$FICO)                                           < 2e-16
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                 < 2e-16
## as.factor(AmountRange)[ 6075,10050)                                              6.3e-06
## as.factor(AmountRange)[10050,17200)                                              4.7e-14
## as.factor(AmountRange)[17200,35000]                                              < 2e-16
## as.numeric(loansDataCorrectionCn$Debt)                                           1.2e-10
## as.numeric(loansDataCorrectionCn$Inquiries)                                      < 2e-16
## as.numeric(loansDataCorrectionCn$Open.C)                                         1.1e-15
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)         0.018
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)       6.3e-05
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]       9.8e-07
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)  1.6e-12
##                                                                                    
## (Intercept)                                                                     ***
## as.numeric(loansDataCorrectionCn$FICO)                                          ***
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                ***
## as.factor(AmountRange)[ 6075,10050)                                             ***
## as.factor(AmountRange)[10050,17200)                                             ***
## as.factor(AmountRange)[17200,35000]                                             ***
## as.numeric(loansDataCorrectionCn$Debt)                                          ***
## as.numeric(loansDataCorrectionCn$Inquiries)                                     ***
## as.numeric(loansDataCorrectionCn$Open.C)                                        ***
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)      *  
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)      ***
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]      ***
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 39.6 on 2485 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared: 0.73,    Adjusted R-squared: 0.728 
## F-statistic:  559 on 12 and 2485 DF,  p-value: <2e-16
confint(lm)
##                                                                                      2.5 %
## (Intercept)                                                                     193.785205
## as.numeric(loansDataCorrectionCn$FICO)                                           -7.312717
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                 54.635829
## as.factor(AmountRange)[ 6075,10050)                                              12.376010
## as.factor(AmountRange)[10050,17200)                                              28.306751
## as.factor(AmountRange)[17200,35000]                                              60.598061
## as.numeric(loansDataCorrectionCn$Debt)                                           -0.035081
## as.numeric(loansDataCorrectionCn$Inquiries)                                       5.317156
## as.numeric(loansDataCorrectionCn$Open.C)                                         -4.071126
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)       -1.351181
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)       -1.906100
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]       -2.213757
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)   0.001996
##                                                                                     97.5 %
## (Intercept)                                                                     213.809960
## as.numeric(loansDataCorrectionCn$FICO)                                           -6.449763
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                 62.944456
## as.factor(AmountRange)[ 6075,10050)                                              31.307316
## as.factor(AmountRange)[10050,17200)                                              48.046739
## as.factor(AmountRange)[17200,35000]                                              81.411324
## as.numeric(loansDataCorrectionCn$Debt)                                           -0.018767
## as.numeric(loansDataCorrectionCn$Inquiries)                                       7.878643
## as.numeric(loansDataCorrectionCn$Open.C)                                         -2.478909
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)       -0.127523
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)       -0.654124
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]       -0.949678
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)   0.003518
anova(lm)
## Analysis of Variance Table
## 
## Response: as.numeric(loansDataCorrectionCn$Interest)
##                                                                                   Df
## as.numeric(loansDataCorrectionCn$FICO)                                             1
## as.factor(loansDataCorrectionCn$Loan.L)                                            1
## as.factor(AmountRange)                                                             3
## as.numeric(loansDataCorrectionCn$Debt)                                             1
## as.numeric(loansDataCorrectionCn$Inquiries)                                        1
## as.numeric(loansDataCorrectionCn$Open.C)                                           1
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)                      3
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)    1
## Residuals                                                                       2485
##                                                                                  Sum Sq
## as.numeric(loansDataCorrectionCn$FICO)                                          7079425
## as.factor(loansDataCorrectionCn$Loan.L)                                         2551922
## as.factor(AmountRange)                                                           585275
## as.numeric(loansDataCorrectionCn$Debt)                                             4784
## as.numeric(loansDataCorrectionCn$Inquiries)                                      150763
## as.numeric(loansDataCorrectionCn$Open.C)                                          20181
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)                     47238
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)   79138
## Residuals                                                                       3894775
##                                                                                 Mean Sq
## as.numeric(loansDataCorrectionCn$FICO)                                          7079425
## as.factor(loansDataCorrectionCn$Loan.L)                                         2551922
## as.factor(AmountRange)                                                           195092
## as.numeric(loansDataCorrectionCn$Debt)                                             4784
## as.numeric(loansDataCorrectionCn$Inquiries)                                      150763
## as.numeric(loansDataCorrectionCn$Open.C)                                          20181
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)                     15746
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)   79138
## Residuals                                                                          1567
##                                                                                 F value
## as.numeric(loansDataCorrectionCn$FICO)                                          4516.92
## as.factor(loansDataCorrectionCn$Loan.L)                                         1628.21
## as.factor(AmountRange)                                                           124.48
## as.numeric(loansDataCorrectionCn$Debt)                                             3.05
## as.numeric(loansDataCorrectionCn$Inquiries)                                       96.19
## as.numeric(loansDataCorrectionCn$Open.C)                                          12.88
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)                     10.05
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)   50.49
## Residuals                                                                              
##                                                                                  Pr(>F)
## as.numeric(loansDataCorrectionCn$FICO)                                          < 2e-16
## as.factor(loansDataCorrectionCn$Loan.L)                                         < 2e-16
## as.factor(AmountRange)                                                          < 2e-16
## as.numeric(loansDataCorrectionCn$Debt)                                          0.08075
## as.numeric(loansDataCorrectionCn$Inquiries)                                     < 2e-16
## as.numeric(loansDataCorrectionCn$Open.C)                                        0.00034
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)                   1.4e-06
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C) 1.6e-12
## Residuals                                                                              
##                                                                                    
## as.numeric(loansDataCorrectionCn$FICO)                                          ***
## as.factor(loansDataCorrectionCn$Loan.L)                                         ***
## as.factor(AmountRange)                                                          ***
## as.numeric(loansDataCorrectionCn$Debt)                                          .  
## as.numeric(loansDataCorrectionCn$Inquiries)                                     ***
## as.numeric(loansDataCorrectionCn$Open.C)                                        ***
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)                   ***
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C) ***
## Residuals                                                                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The following corresponding graphs are presented in an order that very much reflect key performance of the exploratory analysis. These graphs are considered as the foremost visual representation to the goal of the assignment. Further detailed description of the analysis can be viewed in the write up.

par(mfrow = c(1, 1))
plot(as.numeric(loansDataCorrectionCn$FICO[which(is.na(loansDataCorrectionCn$Inquiries) == 
    F)]), as.numeric(loansDataCorrectionCn$Interest[which(is.na(loansDataCorrectionCn$Inquiries) == 
    F)]), cex = 0.6, xlab = "FICO Range", ylab = "Interest Rate", main = "Interest Rate vs. FICO Score Range")

plot of chunk unnamed-chunk-26

plot(as.numeric(loansDataCorrection$FICO), as.numeric(loansDataCorrection$Interest), 
    xlab = "FICO range", ylab = "Interest Rate", cex = 0.5, main = "Interest Rate Vs. FICO Range, Colored by Loan Length", 
    cex.main = 0.75, pch = 19, col = as.numeric(loansDataCorrection$Loan.L))
legend(27, 275, col = unique(as.numeric(loansDataCorrection$Loan.L)), legend = unique(loansDataCorrection$Loan.L), 
    pch = 19, cex = 0.5)

plot of chunk unnamed-chunk-27

plot(as.numeric(loansDataCorrectionCn$FICO[which(is.na(loansDataCorrectionCn$Inquiries) == 
    F)]), as.numeric(loansDataCorrectionCn$Interest[which(is.na(loansDataCorrectionCn$Inquiries) == 
    F)]), xlab = "FICO Range", ylab = "Interest Rate", main = "Interest vs. FICO with Linear Model Fitted Points")
points(as.numeric(loansDataCorrectionCn$FICO[which(is.na(loansDataCorrectionCn$Inquiries) == 
    F)]), lm$fitted, pch = 20, cex = 0.4, col = "blue")

plot of chunk unnamed-chunk-28


plot(lm$fitted, lm$residuals, xlab = "Linear M. Fitted", ylab = "Linear M. Residuals", 
    main = "Linear Model Residuals", cex = 0.6)
abline(c(0, 0), col = "red", lwd = 2)

plot of chunk unnamed-chunk-28

Because of certain non-linearity in the relations among the interest rate and other variables, we squared those variables to the regression.

lm2 <- lm(as.numeric(loansDataCorrectionCn$Interest) ~ as.numeric(loansDataCorrectionCn$FICO)^2 + 
    as.factor(loansDataCorrectionCn$Loan.L) + as.factor(AmountRange) + as.factor(AmountRange) * 
    as.numeric(loansDataCorrectionCn$FICO) + as.numeric(loansDataCorrectionCn$Debt) + 
    as.numeric(loansDataCorrectionCn$Inquiries) + as.numeric(loansDataCorrectionCn$Open.C)^2 * 
    as.numeric(loansDataCorrectionCn$Debt))

summary(lm2)
## 
## Call:
## lm(formula = as.numeric(loansDataCorrectionCn$Interest) ~ as.numeric(loansDataCorrectionCn$FICO)^2 + 
##     as.factor(loansDataCorrectionCn$Loan.L) + as.factor(AmountRange) + 
##     as.factor(AmountRange) * as.numeric(loansDataCorrectionCn$FICO) + 
##     as.numeric(loansDataCorrectionCn$Debt) + as.numeric(loansDataCorrectionCn$Inquiries) + 
##     as.numeric(loansDataCorrectionCn$Open.C)^2 * as.numeric(loansDataCorrectionCn$Debt))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -171.36  -28.39   -2.84   25.14  183.64 
## 
## Coefficients:
##                                                                                  Estimate
## (Intercept)                                                                      2.04e+02
## as.numeric(loansDataCorrectionCn$FICO)                                          -6.88e+00
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                 5.88e+01
## as.factor(AmountRange)[ 6075,10050)                                              2.18e+01
## as.factor(AmountRange)[10050,17200)                                              3.82e+01
## as.factor(AmountRange)[17200,35000]                                              7.10e+01
## as.numeric(loansDataCorrectionCn$Debt)                                          -2.69e-02
## as.numeric(loansDataCorrectionCn$Inquiries)                                      6.60e+00
## as.numeric(loansDataCorrectionCn$Open.C)                                        -3.28e+00
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)      -7.39e-01
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)      -1.28e+00
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]      -1.58e+00
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)  2.76e-03
##                                                                                 Std. Error
## (Intercept)                                                                       5.11e+00
## as.numeric(loansDataCorrectionCn$FICO)                                            2.20e-01
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                  2.12e+00
## as.factor(AmountRange)[ 6075,10050)                                               4.83e+00
## as.factor(AmountRange)[10050,17200)                                               5.03e+00
## as.factor(AmountRange)[17200,35000]                                               5.31e+00
## as.numeric(loansDataCorrectionCn$Debt)                                            4.16e-03
## as.numeric(loansDataCorrectionCn$Inquiries)                                       6.53e-01
## as.numeric(loansDataCorrectionCn$Open.C)                                          4.06e-01
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)        3.12e-01
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)        3.19e-01
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]        3.22e-01
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)   3.88e-04
##                                                                                 t value
## (Intercept)                                                                       39.91
## as.numeric(loansDataCorrectionCn$FICO)                                           -31.27
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                  27.75
## as.factor(AmountRange)[ 6075,10050)                                                4.52
## as.factor(AmountRange)[10050,17200)                                                7.58
## as.factor(AmountRange)[17200,35000]                                               13.38
## as.numeric(loansDataCorrectionCn$Debt)                                            -6.47
## as.numeric(loansDataCorrectionCn$Inquiries)                                       10.10
## as.numeric(loansDataCorrectionCn$Open.C)                                          -8.07
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)        -2.37
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)        -4.01
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]        -4.91
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)    7.11
##                                                                                 Pr(>|t|)
## (Intercept)                                                                      < 2e-16
## as.numeric(loansDataCorrectionCn$FICO)                                           < 2e-16
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                 < 2e-16
## as.factor(AmountRange)[ 6075,10050)                                              6.3e-06
## as.factor(AmountRange)[10050,17200)                                              4.7e-14
## as.factor(AmountRange)[17200,35000]                                              < 2e-16
## as.numeric(loansDataCorrectionCn$Debt)                                           1.2e-10
## as.numeric(loansDataCorrectionCn$Inquiries)                                      < 2e-16
## as.numeric(loansDataCorrectionCn$Open.C)                                         1.1e-15
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)         0.018
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)       6.3e-05
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]       9.8e-07
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)  1.6e-12
##                                                                                    
## (Intercept)                                                                     ***
## as.numeric(loansDataCorrectionCn$FICO)                                          ***
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                ***
## as.factor(AmountRange)[ 6075,10050)                                             ***
## as.factor(AmountRange)[10050,17200)                                             ***
## as.factor(AmountRange)[17200,35000]                                             ***
## as.numeric(loansDataCorrectionCn$Debt)                                          ***
## as.numeric(loansDataCorrectionCn$Inquiries)                                     ***
## as.numeric(loansDataCorrectionCn$Open.C)                                        ***
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)      *  
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)      ***
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]      ***
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 39.6 on 2485 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared: 0.73,    Adjusted R-squared: 0.728 
## F-statistic:  559 on 12 and 2485 DF,  p-value: <2e-16
confint(lm2)
##                                                                                      2.5 %
## (Intercept)                                                                     193.785205
## as.numeric(loansDataCorrectionCn$FICO)                                           -7.312717
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                 54.635829
## as.factor(AmountRange)[ 6075,10050)                                              12.376010
## as.factor(AmountRange)[10050,17200)                                              28.306751
## as.factor(AmountRange)[17200,35000]                                              60.598061
## as.numeric(loansDataCorrectionCn$Debt)                                           -0.035081
## as.numeric(loansDataCorrectionCn$Inquiries)                                       5.317156
## as.numeric(loansDataCorrectionCn$Open.C)                                         -4.071126
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)       -1.351181
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)       -1.906100
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]       -2.213757
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)   0.001996
##                                                                                     97.5 %
## (Intercept)                                                                     213.809960
## as.numeric(loansDataCorrectionCn$FICO)                                           -6.449763
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                 62.944456
## as.factor(AmountRange)[ 6075,10050)                                              31.307316
## as.factor(AmountRange)[10050,17200)                                              48.046739
## as.factor(AmountRange)[17200,35000]                                              81.411324
## as.numeric(loansDataCorrectionCn$Debt)                                           -0.018767
## as.numeric(loansDataCorrectionCn$Inquiries)                                       7.878643
## as.numeric(loansDataCorrectionCn$Open.C)                                         -2.478909
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[ 6075,10050)       -0.127523
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[10050,17200)       -0.654124
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)[17200,35000]       -0.949678
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)   0.003518
anova(lm2)
## Analysis of Variance Table
## 
## Response: as.numeric(loansDataCorrectionCn$Interest)
##                                                                                   Df
## as.numeric(loansDataCorrectionCn$FICO)                                             1
## as.factor(loansDataCorrectionCn$Loan.L)                                            1
## as.factor(AmountRange)                                                             3
## as.numeric(loansDataCorrectionCn$Debt)                                             1
## as.numeric(loansDataCorrectionCn$Inquiries)                                        1
## as.numeric(loansDataCorrectionCn$Open.C)                                           1
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)                      3
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)    1
## Residuals                                                                       2485
##                                                                                  Sum Sq
## as.numeric(loansDataCorrectionCn$FICO)                                          7079425
## as.factor(loansDataCorrectionCn$Loan.L)                                         2551922
## as.factor(AmountRange)                                                           585275
## as.numeric(loansDataCorrectionCn$Debt)                                             4784
## as.numeric(loansDataCorrectionCn$Inquiries)                                      150763
## as.numeric(loansDataCorrectionCn$Open.C)                                          20181
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)                     47238
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)   79138
## Residuals                                                                       3894775
##                                                                                 Mean Sq
## as.numeric(loansDataCorrectionCn$FICO)                                          7079425
## as.factor(loansDataCorrectionCn$Loan.L)                                         2551922
## as.factor(AmountRange)                                                           195092
## as.numeric(loansDataCorrectionCn$Debt)                                             4784
## as.numeric(loansDataCorrectionCn$Inquiries)                                      150763
## as.numeric(loansDataCorrectionCn$Open.C)                                          20181
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)                     15746
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)   79138
## Residuals                                                                          1567
##                                                                                 F value
## as.numeric(loansDataCorrectionCn$FICO)                                          4516.92
## as.factor(loansDataCorrectionCn$Loan.L)                                         1628.21
## as.factor(AmountRange)                                                           124.48
## as.numeric(loansDataCorrectionCn$Debt)                                             3.05
## as.numeric(loansDataCorrectionCn$Inquiries)                                       96.19
## as.numeric(loansDataCorrectionCn$Open.C)                                          12.88
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)                     10.05
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C)   50.49
## Residuals                                                                              
##                                                                                  Pr(>F)
## as.numeric(loansDataCorrectionCn$FICO)                                          < 2e-16
## as.factor(loansDataCorrectionCn$Loan.L)                                         < 2e-16
## as.factor(AmountRange)                                                          < 2e-16
## as.numeric(loansDataCorrectionCn$Debt)                                          0.08075
## as.numeric(loansDataCorrectionCn$Inquiries)                                     < 2e-16
## as.numeric(loansDataCorrectionCn$Open.C)                                        0.00034
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)                   1.4e-06
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C) 1.6e-12
## Residuals                                                                              
##                                                                                    
## as.numeric(loansDataCorrectionCn$FICO)                                          ***
## as.factor(loansDataCorrectionCn$Loan.L)                                         ***
## as.factor(AmountRange)                                                          ***
## as.numeric(loansDataCorrectionCn$Debt)                                          .  
## as.numeric(loansDataCorrectionCn$Inquiries)                                     ***
## as.numeric(loansDataCorrectionCn$Open.C)                                        ***
## as.numeric(loansDataCorrectionCn$FICO):as.factor(AmountRange)                   ***
## as.numeric(loansDataCorrectionCn$Debt):as.numeric(loansDataCorrectionCn$Open.C) ***
## Residuals                                                                          
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(lm2)

plot of chunk unnamed-chunk-30 plot of chunk unnamed-chunk-30 plot of chunk unnamed-chunk-30 plot of chunk unnamed-chunk-30

We now implement a quadratic relation to the regression. The so called, Polynomial curve-fitting. Although the name strikes one to think it to be otherwise a regression, it still is.

lm3 <- lm(as.numeric(loansDataCorrectionCn$Interest) ~ I(as.numeric(loansDataCorrectionCn$FICO)^2) + 
    as.factor(loansDataCorrectionCn$Loan.L) + as.factor(AmountRange) + as.factor(AmountRange) * 
    as.numeric(loansDataCorrectionCn$FICO) + as.numeric(loansDataCorrectionCn$Debt) + 
    as.numeric(loansDataCorrectionCn$Inquiries) + I(as.numeric(loansDataCorrectionCn$Open.C)^2) * 
    as.numeric(loansDataCorrectionCn$Debt))

summary(lm3)
## 
## Call:
## lm(formula = as.numeric(loansDataCorrectionCn$Interest) ~ I(as.numeric(loansDataCorrectionCn$FICO)^2) + 
##     as.factor(loansDataCorrectionCn$Loan.L) + as.factor(AmountRange) + 
##     as.factor(AmountRange) * as.numeric(loansDataCorrectionCn$FICO) + 
##     as.numeric(loansDataCorrectionCn$Debt) + as.numeric(loansDataCorrectionCn$Inquiries) + 
##     I(as.numeric(loansDataCorrectionCn$Open.C)^2) * as.numeric(loansDataCorrectionCn$Debt))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -157.14  -22.82   -2.17   19.23  183.77 
## 
## Coefficients:
##                                                                                       Estimate
## (Intercept)                                                                           2.46e+02
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                           2.81e-01
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                      5.90e+01
## as.factor(AmountRange)[ 6075,10050)                                                   2.60e+01
## as.factor(AmountRange)[10050,17200)                                                   4.48e+01
## as.factor(AmountRange)[17200,35000]                                                   7.91e+01
## as.numeric(loansDataCorrectionCn$FICO)                                               -1.62e+01
## as.numeric(loansDataCorrectionCn$Debt)                                               -1.14e-02
## as.numeric(loansDataCorrectionCn$Inquiries)                                           5.91e+00
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                        -1.22e-01
## as.factor(AmountRange)[ 6075,10050):as.numeric(loansDataCorrectionCn$FICO)           -1.04e+00
## as.factor(AmountRange)[10050,17200):as.numeric(loansDataCorrectionCn$FICO)           -1.65e+00
## as.factor(AmountRange)[17200,35000]:as.numeric(loansDataCorrectionCn$FICO)           -2.02e+00
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)  1.11e-04
##                                                                                      Std. Error
## (Intercept)                                                                            4.59e+00
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                            1.24e-02
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                       1.93e+00
## as.factor(AmountRange)[ 6075,10050)                                                    4.41e+00
## as.factor(AmountRange)[10050,17200)                                                    4.60e+00
## as.factor(AmountRange)[17200,35000]                                                    4.85e+00
## as.numeric(loansDataCorrectionCn$FICO)                                                 4.54e-01
## as.numeric(loansDataCorrectionCn$Debt)                                                 2.33e-03
## as.numeric(loansDataCorrectionCn$Inquiries)                                            5.96e-01
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                          1.75e-02
## as.factor(AmountRange)[ 6075,10050):as.numeric(loansDataCorrectionCn$FICO)             2.85e-01
## as.factor(AmountRange)[10050,17200):as.numeric(loansDataCorrectionCn$FICO)             2.92e-01
## as.factor(AmountRange)[17200,35000]:as.numeric(loansDataCorrectionCn$FICO)             2.95e-01
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)   1.51e-05
##                                                                                      t value
## (Intercept)                                                                            53.63
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                            22.77
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                       30.52
## as.factor(AmountRange)[ 6075,10050)                                                     5.90
## as.factor(AmountRange)[10050,17200)                                                     9.74
## as.factor(AmountRange)[17200,35000]                                                    16.31
## as.numeric(loansDataCorrectionCn$FICO)                                                -35.59
## as.numeric(loansDataCorrectionCn$Debt)                                                 -4.89
## as.numeric(loansDataCorrectionCn$Inquiries)                                             9.91
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                          -6.96
## as.factor(AmountRange)[ 6075,10050):as.numeric(loansDataCorrectionCn$FICO)             -3.64
## as.factor(AmountRange)[10050,17200):as.numeric(loansDataCorrectionCn$FICO)             -5.65
## as.factor(AmountRange)[17200,35000]:as.numeric(loansDataCorrectionCn$FICO)             -6.87
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)    7.36
##                                                                                      Pr(>|t|)
## (Intercept)                                                                           < 2e-16
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                           < 2e-16
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                      < 2e-16
## as.factor(AmountRange)[ 6075,10050)                                                   4.0e-09
## as.factor(AmountRange)[10050,17200)                                                   < 2e-16
## as.factor(AmountRange)[17200,35000]                                                   < 2e-16
## as.numeric(loansDataCorrectionCn$FICO)                                                < 2e-16
## as.numeric(loansDataCorrectionCn$Debt)                                                1.1e-06
## as.numeric(loansDataCorrectionCn$Inquiries)                                           < 2e-16
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                         4.3e-12
## as.factor(AmountRange)[ 6075,10050):as.numeric(loansDataCorrectionCn$FICO)            0.00028
## as.factor(AmountRange)[10050,17200):as.numeric(loansDataCorrectionCn$FICO)            1.8e-08
## as.factor(AmountRange)[17200,35000]:as.numeric(loansDataCorrectionCn$FICO)            8.2e-12
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)  2.5e-13
##                                                                                         
## (Intercept)                                                                          ***
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                          ***
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                     ***
## as.factor(AmountRange)[ 6075,10050)                                                  ***
## as.factor(AmountRange)[10050,17200)                                                  ***
## as.factor(AmountRange)[17200,35000]                                                  ***
## as.numeric(loansDataCorrectionCn$FICO)                                               ***
## as.numeric(loansDataCorrectionCn$Debt)                                               ***
## as.numeric(loansDataCorrectionCn$Inquiries)                                          ***
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                        ***
## as.factor(AmountRange)[ 6075,10050):as.numeric(loansDataCorrectionCn$FICO)           ***
## as.factor(AmountRange)[10050,17200):as.numeric(loansDataCorrectionCn$FICO)           ***
## as.factor(AmountRange)[17200,35000]:as.numeric(loansDataCorrectionCn$FICO)           ***
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 36.1 on 2484 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared: 0.775,   Adjusted R-squared: 0.774 
## F-statistic:  659 on 13 and 2484 DF,  p-value: <2e-16
confint(lm3)
##                                                                                           2.5 %
## (Intercept)                                                                           2.369e+02
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                           2.571e-01
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                      5.521e+01
## as.factor(AmountRange)[ 6075,10050)                                                   1.738e+01
## as.factor(AmountRange)[10050,17200)                                                   3.579e+01
## as.factor(AmountRange)[17200,35000]                                                   6.959e+01
## as.numeric(loansDataCorrectionCn$FICO)                                               -1.706e+01
## as.numeric(loansDataCorrectionCn$Debt)                                               -1.594e-02
## as.numeric(loansDataCorrectionCn$Inquiries)                                           4.738e+00
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                        -1.561e-01
## as.factor(AmountRange)[ 6075,10050):as.numeric(loansDataCorrectionCn$FICO)           -1.596e+00
## as.factor(AmountRange)[10050,17200):as.numeric(loansDataCorrectionCn$FICO)           -2.220e+00
## as.factor(AmountRange)[17200,35000]:as.numeric(loansDataCorrectionCn$FICO)           -2.601e+00
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)  8.181e-05
##                                                                                          97.5 %
## (Intercept)                                                                           2.549e+02
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                           3.055e-01
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                      6.279e+01
## as.factor(AmountRange)[ 6075,10050)                                                   3.467e+01
## as.factor(AmountRange)[10050,17200)                                                   5.383e+01
## as.factor(AmountRange)[17200,35000]                                                   8.861e+01
## as.numeric(loansDataCorrectionCn$FICO)                                               -1.527e+01
## as.numeric(loansDataCorrectionCn$Debt)                                               -6.818e-03
## as.numeric(loansDataCorrectionCn$Inquiries)                                           7.075e+00
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                        -8.751e-02
## as.factor(AmountRange)[ 6075,10050):as.numeric(loansDataCorrectionCn$FICO)           -4.787e-01
## as.factor(AmountRange)[10050,17200):as.numeric(loansDataCorrectionCn$FICO)           -1.076e+00
## as.factor(AmountRange)[17200,35000]:as.numeric(loansDataCorrectionCn$FICO)           -1.446e+00
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)  1.412e-04
anova(lm3)
## Analysis of Variance Table
## 
## Response: as.numeric(loansDataCorrectionCn$Interest)
##                                                                                        Df
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                             1
## as.factor(loansDataCorrectionCn$Loan.L)                                                 1
## as.factor(AmountRange)                                                                  3
## as.numeric(loansDataCorrectionCn$FICO)                                                  1
## as.numeric(loansDataCorrectionCn$Debt)                                                  1
## as.numeric(loansDataCorrectionCn$Inquiries)                                             1
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                           1
## as.factor(AmountRange):as.numeric(loansDataCorrectionCn$FICO)                           3
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)    1
## Residuals                                                                            2484
##                                                                                       Sum Sq
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                          5798823
## as.factor(loansDataCorrectionCn$Loan.L)                                              2481695
## as.factor(AmountRange)                                                                492417
## as.numeric(loansDataCorrectionCn$FICO)                                               2130589
## as.numeric(loansDataCorrectionCn$Debt)                                                     8
## as.numeric(loansDataCorrectionCn$Inquiries)                                           125661
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                             32
## as.factor(AmountRange):as.numeric(loansDataCorrectionCn$FICO)                          73173
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)   70692
## Residuals                                                                            3240411
##                                                                                      Mean Sq
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                          5798823
## as.factor(loansDataCorrectionCn$Loan.L)                                              2481695
## as.factor(AmountRange)                                                                164139
## as.numeric(loansDataCorrectionCn$FICO)                                               2130589
## as.numeric(loansDataCorrectionCn$Debt)                                                     8
## as.numeric(loansDataCorrectionCn$Inquiries)                                           125661
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                             32
## as.factor(AmountRange):as.numeric(loansDataCorrectionCn$FICO)                          24391
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)   70692
## Residuals                                                                               1305
##                                                                                      F value
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                          4445.20
## as.factor(loansDataCorrectionCn$Loan.L)                                              1902.39
## as.factor(AmountRange)                                                                125.82
## as.numeric(loansDataCorrectionCn$FICO)                                               1633.24
## as.numeric(loansDataCorrectionCn$Debt)                                                  0.01
## as.numeric(loansDataCorrectionCn$Inquiries)                                            96.33
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                           0.02
## as.factor(AmountRange):as.numeric(loansDataCorrectionCn$FICO)                          18.70
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)   54.19
## Residuals                                                                                   
##                                                                                       Pr(>F)
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                          < 2e-16
## as.factor(loansDataCorrectionCn$Loan.L)                                              < 2e-16
## as.factor(AmountRange)                                                               < 2e-16
## as.numeric(loansDataCorrectionCn$FICO)                                               < 2e-16
## as.numeric(loansDataCorrectionCn$Debt)                                                  0.94
## as.numeric(loansDataCorrectionCn$Inquiries)                                          < 2e-16
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                           0.88
## as.factor(AmountRange):as.numeric(loansDataCorrectionCn$FICO)                        5.4e-12
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2) 2.5e-13
## Residuals                                                                                   
##                                                                                         
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                          ***
## as.factor(loansDataCorrectionCn$Loan.L)                                              ***
## as.factor(AmountRange)                                                               ***
## as.numeric(loansDataCorrectionCn$FICO)                                               ***
## as.numeric(loansDataCorrectionCn$Debt)                                                  
## as.numeric(loansDataCorrectionCn$Inquiries)                                          ***
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                           
## as.factor(AmountRange):as.numeric(loansDataCorrectionCn$FICO)                        ***
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2) ***
## Residuals                                                                               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(lm3)

plot of chunk unnamed-chunk-32 plot of chunk unnamed-chunk-32 plot of chunk unnamed-chunk-32 plot of chunk unnamed-chunk-32

The difference in the following Linear model is the “Amount Range”, which has been change to the original Amount.R variable. This was done so that overall view of the parameters for the entire amount requested can been seen. We obtain the summary estimate and view the confidence interval in order to make the claim for the variables influence in the interest rate.

lm4 <- lm(as.numeric(loansDataCorrectionCn$Interest) ~ I(as.numeric(loansDataCorrectionCn$FICO)^2) + 
    +as.factor(loansDataCorrectionCn$Loan.L) + as.numeric(loansDataCorrection$Amount.R) + 
    as.numeric(loansDataCorrection$Amount.R) * +as.numeric(loansDataCorrectionCn$FICO) + 
    as.numeric(loansDataCorrectionCn$Debt) + +as.numeric(loansDataCorrectionCn$Inquiries) + 
    I(as.numeric(loansDataCorrectionCn$Open.C)^2) * +as.numeric(loansDataCorrectionCn$Debt))

summary(lm4)
## 
## Call:
## lm(formula = as.numeric(loansDataCorrectionCn$Interest) ~ I(as.numeric(loansDataCorrectionCn$FICO)^2) + 
##     +as.factor(loansDataCorrectionCn$Loan.L) + as.numeric(loansDataCorrection$Amount.R) + 
##     as.numeric(loansDataCorrection$Amount.R) * +as.numeric(loansDataCorrectionCn$FICO) + 
##     as.numeric(loansDataCorrectionCn$Debt) + +as.numeric(loansDataCorrectionCn$Inquiries) + 
##     I(as.numeric(loansDataCorrectionCn$Open.C)^2) * +as.numeric(loansDataCorrectionCn$Debt))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -147.85  -22.34   -2.38   19.98  187.82 
## 
## Coefficients:
##                                                                                       Estimate
## (Intercept)                                                                           2.35e+02
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                           2.82e-01
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                      5.64e+01
## as.numeric(loansDataCorrection$Amount.R)                                              4.02e-03
## as.numeric(loansDataCorrectionCn$FICO)                                               -1.62e+01
## as.numeric(loansDataCorrectionCn$Debt)                                               -1.14e-02
## as.numeric(loansDataCorrectionCn$Inquiries)                                           6.01e+00
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                        -1.34e-01
## as.numeric(loansDataCorrection$Amount.R):as.numeric(loansDataCorrectionCn$FICO)      -9.43e-05
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)  1.16e-04
##                                                                                      Std. Error
## (Intercept)                                                                            4.36e+00
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                            1.21e-02
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                       1.89e+00
## as.numeric(loansDataCorrection$Amount.R)                                               2.19e-04
## as.numeric(loansDataCorrectionCn$FICO)                                                 4.37e-01
## as.numeric(loansDataCorrectionCn$Debt)                                                 2.27e-03
## as.numeric(loansDataCorrectionCn$Inquiries)                                            5.83e-01
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                          1.71e-02
## as.numeric(loansDataCorrection$Amount.R):as.numeric(loansDataCorrectionCn$FICO)        1.27e-05
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)   1.48e-05
##                                                                                      t value
## (Intercept)                                                                            53.93
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                            23.23
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                       29.84
## as.numeric(loansDataCorrection$Amount.R)                                               18.40
## as.numeric(loansDataCorrectionCn$FICO)                                                -37.06
## as.numeric(loansDataCorrectionCn$Debt)                                                 -5.02
## as.numeric(loansDataCorrectionCn$Inquiries)                                            10.30
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                          -7.81
## as.numeric(loansDataCorrection$Amount.R):as.numeric(loansDataCorrectionCn$FICO)        -7.40
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)    7.85
##                                                                                      Pr(>|t|)
## (Intercept)                                                                           < 2e-16
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                           < 2e-16
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                      < 2e-16
## as.numeric(loansDataCorrection$Amount.R)                                              < 2e-16
## as.numeric(loansDataCorrectionCn$FICO)                                                < 2e-16
## as.numeric(loansDataCorrectionCn$Debt)                                                5.5e-07
## as.numeric(loansDataCorrectionCn$Inquiries)                                           < 2e-16
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                         8.3e-15
## as.numeric(loansDataCorrection$Amount.R):as.numeric(loansDataCorrectionCn$FICO)       1.8e-13
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)  6.0e-15
##                                                                                         
## (Intercept)                                                                          ***
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                          ***
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                     ***
## as.numeric(loansDataCorrection$Amount.R)                                             ***
## as.numeric(loansDataCorrectionCn$FICO)                                               ***
## as.numeric(loansDataCorrectionCn$Debt)                                               ***
## as.numeric(loansDataCorrectionCn$Inquiries)                                          ***
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                        ***
## as.numeric(loansDataCorrection$Amount.R):as.numeric(loansDataCorrectionCn$FICO)      ***
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2) ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 35.4 on 2488 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared: 0.784,   Adjusted R-squared: 0.783 
## F-statistic: 1e+03 on 9 and 2488 DF,  p-value: <2e-16
confint(lm4)
##                                                                                           2.5 %
## (Intercept)                                                                           2.265e+02
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                           2.578e-01
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                      5.268e+01
## as.numeric(loansDataCorrection$Amount.R)                                              3.596e-03
## as.numeric(loansDataCorrectionCn$FICO)                                               -1.707e+01
## as.numeric(loansDataCorrectionCn$Debt)                                               -1.588e-02
## as.numeric(loansDataCorrectionCn$Inquiries)                                           4.865e+00
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                        -1.675e-01
## as.numeric(loansDataCorrection$Amount.R):as.numeric(loansDataCorrectionCn$FICO)      -1.193e-04
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)  8.730e-05
##                                                                                          97.5 %
## (Intercept)                                                                           2.436e+02
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                           3.053e-01
## as.factor(loansDataCorrectionCn$Loan.L)60 months                                      6.009e+01
## as.numeric(loansDataCorrection$Amount.R)                                              4.453e-03
## as.numeric(loansDataCorrectionCn$FICO)                                               -1.535e+01
## as.numeric(loansDataCorrectionCn$Debt)                                               -6.960e-03
## as.numeric(loansDataCorrectionCn$Inquiries)                                           7.152e+00
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                        -1.003e-01
## as.numeric(loansDataCorrection$Amount.R):as.numeric(loansDataCorrectionCn$FICO)      -6.935e-05
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)  1.454e-04
anova(lm4)
## Analysis of Variance Table
## 
## Response: as.numeric(loansDataCorrectionCn$Interest)
##                                                                                        Df
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                             1
## as.factor(loansDataCorrectionCn$Loan.L)                                                 1
## as.numeric(loansDataCorrection$Amount.R)                                                1
## as.numeric(loansDataCorrectionCn$FICO)                                                  1
## as.numeric(loansDataCorrectionCn$Debt)                                                  1
## as.numeric(loansDataCorrectionCn$Inquiries)                                             1
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                           1
## as.numeric(loansDataCorrection$Amount.R):as.numeric(loansDataCorrectionCn$FICO)         1
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)    1
## Residuals                                                                            2488
##                                                                                       Sum Sq
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                          5798823
## as.factor(loansDataCorrectionCn$Loan.L)                                              2481695
## as.numeric(loansDataCorrection$Amount.R)                                              609288
## as.numeric(loansDataCorrectionCn$FICO)                                               2133909
## as.numeric(loansDataCorrectionCn$Debt)                                                     3
## as.numeric(loansDataCorrectionCn$Inquiries)                                           129540
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                           1407
## as.numeric(loansDataCorrection$Amount.R):as.numeric(loansDataCorrectionCn$FICO)        67848
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)   77184
## Residuals                                                                            3113804
##                                                                                      Mean Sq
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                          5798823
## as.factor(loansDataCorrectionCn$Loan.L)                                              2481695
## as.numeric(loansDataCorrection$Amount.R)                                              609288
## as.numeric(loansDataCorrectionCn$FICO)                                               2133909
## as.numeric(loansDataCorrectionCn$Debt)                                                     3
## as.numeric(loansDataCorrectionCn$Inquiries)                                           129540
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                           1407
## as.numeric(loansDataCorrection$Amount.R):as.numeric(loansDataCorrectionCn$FICO)        67848
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)   77184
## Residuals                                                                               1252
##                                                                                      F value
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                          4633.39
## as.factor(loansDataCorrectionCn$Loan.L)                                              1982.93
## as.numeric(loansDataCorrection$Amount.R)                                              486.83
## as.numeric(loansDataCorrectionCn$FICO)                                               1705.04
## as.numeric(loansDataCorrectionCn$Debt)                                                  0.00
## as.numeric(loansDataCorrectionCn$Inquiries)                                           103.51
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                           1.12
## as.numeric(loansDataCorrection$Amount.R):as.numeric(loansDataCorrectionCn$FICO)        54.21
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2)   61.67
## Residuals                                                                                   
##                                                                                       Pr(>F)
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                          < 2e-16
## as.factor(loansDataCorrectionCn$Loan.L)                                              < 2e-16
## as.numeric(loansDataCorrection$Amount.R)                                             < 2e-16
## as.numeric(loansDataCorrectionCn$FICO)                                               < 2e-16
## as.numeric(loansDataCorrectionCn$Debt)                                                  0.96
## as.numeric(loansDataCorrectionCn$Inquiries)                                          < 2e-16
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                           0.29
## as.numeric(loansDataCorrection$Amount.R):as.numeric(loansDataCorrectionCn$FICO)      2.4e-13
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2) 6.0e-15
## Residuals                                                                                   
##                                                                                         
## I(as.numeric(loansDataCorrectionCn$FICO)^2)                                          ***
## as.factor(loansDataCorrectionCn$Loan.L)                                              ***
## as.numeric(loansDataCorrection$Amount.R)                                             ***
## as.numeric(loansDataCorrectionCn$FICO)                                               ***
## as.numeric(loansDataCorrectionCn$Debt)                                                  
## as.numeric(loansDataCorrectionCn$Inquiries)                                          ***
## I(as.numeric(loansDataCorrectionCn$Open.C)^2)                                           
## as.numeric(loansDataCorrection$Amount.R):as.numeric(loansDataCorrectionCn$FICO)      ***
## as.numeric(loansDataCorrectionCn$Debt):I(as.numeric(loansDataCorrectionCn$Open.C)^2) ***
## Residuals                                                                               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

The following graphs will only be included in the final written analysis.

hist(as.numeric(loansDataCorrection$Interest), breaks = 100, col = "blue", xlab = "Interest Rate (%)", 
    border = "green", main = "Interest Rate Histogram")

plot of chunk unnamed-chunk-34


plot(lm$fitted, lm$residuals, xlab = "Linear M. Fitted", ylab = "Linear M. Residuals", 
    main = "Linear Model Residuals", cex = 0.6)
abline(c(0, 0), col = "red", lwd = 2)

plot of chunk unnamed-chunk-34


plot(lm2)

plot of chunk unnamed-chunk-34 plot of chunk unnamed-chunk-34 plot of chunk unnamed-chunk-34 plot of chunk unnamed-chunk-34

plot(lm3)

plot of chunk unnamed-chunk-34 plot of chunk unnamed-chunk-34 plot of chunk unnamed-chunk-34 plot of chunk unnamed-chunk-34