Data Analysis Project 1

Step 2 EDA

George Fisher george@georgefisher.com

Observations

  1. Multicollinearity between Amount.Requested and Amount.Funded.By.Investors
  2. Monthly.Income
  3. Open.CREDIT.Lines
  4. Revolving.CREDIT.Balance
  5. Inquiries.in.the.Last.6.Months
  6. FICO.numeric

Download the cleaned data file

setwd("~/R/Data Analysis/Data Analysis Project 1")
source("~/Dropbox/R_functions/eda.R")
source("~/Dropbox/R_functions/lm_utilities.R")
load("loansData.clean.rda")
# c('Interest.Rate', 'Amount.Requested','Amount.Requested.Cuts',
# 'Amount.Funded.By.Investors','Amount.Funded.By.Investors.Cuts',
# 'Loan.Length', 'Loan.Purpose',
# 'Debt.To.Income.Ratio','Debt.To.Income.Ratio.Cuts', 'State',
# 'Home.Ownership', 'Monthly.Income','Monthly.Income.Cuts', 'FICO.Range',
# 'FICO.numeric', 'Open.CREDIT.Lines',
# 'Revolving.CREDIT.Balance','Revolving.CREDIT.Balance.Cuts',
# 'Inquiries.in.the.Last.6.Months', 'Employment.Length')
first.look(loansData.clean, pairs_columns = c("Loan.Purpose", "State", "Home.Ownership", 
    "Monthly.Income", "Open.CREDIT.Lines", "Revolving.CREDIT.Balance", "Employment.Length", 
    "Inquiries.in.the.Last.6.Months", "Debt.To.Income.Ratio", "Amount.Requested", 
    "Amount.Funded.By.Investors", "Loan.Length", "FICO.numeric", "Interest.Rate"))
## ----- str: look at data types and values -------
## 'data.frame':    2500 obs. of  21 variables:
##  $ Amount.Requested               : int  20000 19200 35000 10000 12000 6000 10000 33500 14675 7000 ...
##  $ Amount.Funded.By.Investors     : num  20000 19200 35000 9975 12000 ...
##  $ Interest.Rate                  : num  0.089 0.1212 0.2198 0.0999 0.1171 ...
##  $ Loan.Length                    : Factor w/ 2 levels "36 months","60 months": 1 1 2 1 1 1 1 2 1 1 ...
##  $ Loan.Purpose                   : Factor w/ 14 levels "car","credit_card",..: 3 3 3 3 2 10 3 2 2 2 ...
##  $ Debt.To.Income.Ratio           : num  0.149 0.284 0.238 0.143 0.188 ...
##  $ State                          : Factor w/ 46 levels "AK","AL","AR",..: 37 39 5 16 28 7 19 18 5 5 ...
##  $ Home.Ownership                 : Factor w/ 5 levels "MORTGAGE","NONE",..: 1 1 1 1 5 4 5 1 5 5 ...
##  $ Monthly.Income                 : num  6542 4583 11500 3833 3195 ...
##  $ FICO.Range                     : Factor w/ 38 levels "640-644","645-649",..: 20 16 11 12 12 7 17 14 10 16 ...
##  $ Open.CREDIT.Lines              : int  14 12 14 10 11 17 10 12 9 8 ...
##  $ Revolving.CREDIT.Balance       : int  14272 11140 21977 9346 14469 10391 15957 27874 7246 7612 ...
##  $ Inquiries.in.the.Last.6.Months : int  2 1 1 0 0 2 0 0 1 0 ...
##  $ Employment.Length              : Factor w/ 12 levels "< 1 year","1 year",..: 1 4 4 7 11 5 3 3 10 5 ...
##  $ FICO.numeric                   : int  21 17 12 13 13 8 18 15 11 17 ...
##  $ FICO.numeric2                  : num  441 289 144 169 169 64 324 225 121 289 ...
##  $ Debt.To.Income.Ratio.Cuts      : Factor w/ 18 levels "[0,0.02]","(0.02,0.04]",..: 8 15 12 8 10 11 14 8 14 4 ...
##  $ Revolving.CREDIT.Balance.Cuts  : Factor w/ 14 levels "[0,2e+04]","(2e+04,4e+04]",..: 1 1 2 1 1 1 1 2 1 1 ...
##  $ Monthly.Income.Cuts            : Factor w/ 11 levels "[0,1e+04]","(1e+04,2e+04]",..: 1 1 2 1 1 1 1 2 1 1 ...
##  $ Amount.Funded.By.Investors.Cuts: Factor w/ 19 levels "[-2e+03,0]","(0,2e+03]",..: 11 11 19 6 7 4 6 18 9 5 ...
##  $ Amount.Requested.Cuts          : Factor w/ 18 levels "[0,2e+03]","(2e+03,4e+03]",..: 10 10 18 5 6 3 5 17 8 4 ...
## NULL
## 
## ----- sample size: 2500
## ----- sample size of complete cases: 2498
## ----- difference: 2 
## 
## ----- names: variable names
##  [1] "Amount.Requested"                "Amount.Funded.By.Investors"     
##  [3] "Interest.Rate"                   "Loan.Length"                    
##  [5] "Loan.Purpose"                    "Debt.To.Income.Ratio"           
##  [7] "State"                           "Home.Ownership"                 
##  [9] "Monthly.Income"                  "FICO.Range"                     
## [11] "Open.CREDIT.Lines"               "Revolving.CREDIT.Balance"       
## [13] "Inquiries.in.the.Last.6.Months"  "Employment.Length"              
## [15] "FICO.numeric"                    "FICO.numeric2"                  
## [17] "Debt.To.Income.Ratio.Cuts"       "Revolving.CREDIT.Balance.Cuts"  
## [19] "Monthly.Income.Cuts"             "Amount.Funded.By.Investors.Cuts"
## [21] "Amount.Requested.Cuts"          
## 
## ----- head: first six rows
##       Amount.Requested Amount.Funded.By.Investors Interest.Rate
## 81174            20000                      20000        0.0890
## 99592            19200                      19200        0.1212
## 80059            35000                      35000        0.2198
## 15825            10000                       9975        0.0999
## 33182            12000                      12000        0.1171
## 62403             6000                       6000        0.1531
##       Loan.Length       Loan.Purpose Debt.To.Income.Ratio State
## 81174   36 months debt_consolidation               0.1490    SC
## 99592   36 months debt_consolidation               0.2836    TX
## 80059   60 months debt_consolidation               0.2381    CA
## 15825   36 months debt_consolidation               0.1430    KS
## 33182   36 months        credit_card               0.1878    NJ
## 62403   36 months              other               0.2005    CT
##       Home.Ownership Monthly.Income FICO.Range Open.CREDIT.Lines
## 81174       MORTGAGE           6542    735-739                14
## 99592       MORTGAGE           4583    715-719                12
## 80059       MORTGAGE          11500    690-694                14
## 15825       MORTGAGE           3833    695-699                10
## 33182           RENT           3195    695-699                11
## 62403            OWN           4892    670-674                17
##       Revolving.CREDIT.Balance Inquiries.in.the.Last.6.Months
## 81174                    14272                              2
## 99592                    11140                              1
## 80059                    21977                              1
## 15825                     9346                              0
## 33182                    14469                              0
## 62403                    10391                              2
##       Employment.Length FICO.numeric FICO.numeric2
## 81174          < 1 year           21           441
## 99592           2 years           17           289
## 80059           2 years           12           144
## 15825           5 years           13           169
## 33182           9 years           13           169
## 62403           3 years            8            64
##       Debt.To.Income.Ratio.Cuts Revolving.CREDIT.Balance.Cuts
## 81174               (0.14,0.16]                     [0,2e+04]
## 99592                (0.28,0.3]                     [0,2e+04]
## 80059               (0.22,0.24]                 (2e+04,4e+04]
## 15825               (0.14,0.16]                     [0,2e+04]
## 33182                (0.18,0.2]                     [0,2e+04]
## 62403                (0.2,0.22]                     [0,2e+04]
##       Monthly.Income.Cuts Amount.Funded.By.Investors.Cuts
## 81174           [0,1e+04]                 (1.8e+04,2e+04]
## 99592           [0,1e+04]                 (1.8e+04,2e+04]
## 80059       (1e+04,2e+04]               (3.4e+04,3.6e+04]
## 15825           [0,1e+04]                   (8e+03,1e+04]
## 33182           [0,1e+04]                 (1e+04,1.2e+04]
## 62403           [0,1e+04]                   (4e+03,6e+03]
##       Amount.Requested.Cuts
## 81174       (1.8e+04,2e+04]
## 99592       (1.8e+04,2e+04]
## 80059     (3.4e+04,3.6e+04]
## 15825         (8e+03,1e+04]
## 33182       (1e+04,1.2e+04]
## 62403         (4e+03,6e+03]
## 
## ----- summary: statistics for each variable
##  Amount.Requested Amount.Funded.By.Investors Interest.Rate   
##  Min.   : 1000    Min.   :    0              Min.   :0.0542  
##  1st Qu.: 6000    1st Qu.: 6000              1st Qu.:0.1016  
##  Median :10000    Median :10000              Median :0.1311  
##  Mean   :12406    Mean   :12002              Mean   :0.1307  
##  3rd Qu.:17000    3rd Qu.:16000              3rd Qu.:0.1580  
##  Max.   :35000    Max.   :35000              Max.   :0.2489  
##                                                              
##     Loan.Length               Loan.Purpose  Debt.To.Income.Ratio
##  36 months:1952   debt_consolidation:1307   Min.   :0.0000      
##  60 months: 548   credit_card       : 444   1st Qu.:0.0975      
##                   other             : 201   Median :0.1532      
##                   home_improvement  : 152   Mean   :0.1538      
##                   major_purchase    : 101   3rd Qu.:0.2067      
##                   small_business    :  87   Max.   :0.3491      
##                   (Other)           : 208                       
##      State       Home.Ownership Monthly.Income     FICO.Range  
##  CA     : 433   MORTGAGE:1148   Min.   :   588   670-674: 171  
##  NY     : 255   NONE    :   1   1st Qu.:  3500   675-679: 166  
##  TX     : 174   OTHER   :   5   Median :  5000   680-684: 157  
##  FL     : 169   OWN     : 200   Mean   :  5689   695-699: 153  
##  IL     : 101   RENT    :1146   3rd Qu.:  6800   665-669: 145  
##  GA     :  98                   Max.   :102750   690-694: 140  
##  (Other):1270                   NA's   :1        (Other):1568  
##  Open.CREDIT.Lines Revolving.CREDIT.Balance Inquiries.in.the.Last.6.Months
##  Min.   : 2.0      Min.   :     0           Min.   :0.000                 
##  1st Qu.: 7.0      1st Qu.:  5586           1st Qu.:0.000                 
##  Median : 9.0      Median : 10962           Median :0.000                 
##  Mean   :10.1      Mean   : 15245           Mean   :0.906                 
##  3rd Qu.:13.0      3rd Qu.: 18889           3rd Qu.:1.000                 
##  Max.   :38.0      Max.   :270800           Max.   :9.000                 
##  NA's   :2         NA's   :2                NA's   :2                     
##  Employment.Length  FICO.numeric  FICO.numeric2  Debt.To.Income.Ratio.Cuts
##  10+ years:653     Min.   : 2.0   Min.   :   4   (0.14,0.16]: 257         
##  < 1 year :250     1st Qu.:10.0   1st Qu.: 100   (0.16,0.18]: 245         
##  2 years  :244     Median :14.0   Median : 196   (0.12,0.14]: 222         
##  3 years  :235     Mean   :15.2   Mean   : 279   (0.1,0.12] : 219         
##  5 years  :202     3rd Qu.:19.0   3rd Qu.: 361   (0.2,0.22] : 217         
##  4 years  :192     Max.   :40.0   Max.   :1600   (0.18,0.2] : 208         
##  (Other)  :724                                   (Other)    :1132         
##  Revolving.CREDIT.Balance.Cuts    Monthly.Income.Cuts
##  [0,2e+04]    :1932            [0,1e+04]    :2297    
##  (2e+04,4e+04]: 435            (1e+04,2e+04]: 186    
##  (4e+04,6e+04]:  70            (2e+04,3e+04]:  13    
##  (6e+04,8e+04]:  28            (3e+04,4e+04]:   1    
##  (8e+04,1e+05]:  12            (6e+04,7e+04]:   1    
##  (Other)      :  21            (Other)      :   1    
##  NA's         :   2            NA's         :   1    
##   Amount.Funded.By.Investors.Cuts       Amount.Requested.Cuts
##  (4e+03,6e+03]    :366            (4e+03,6e+03]    :365      
##  (8e+03,1e+04]    :346            (8e+03,1e+04]    :353      
##  (6e+03,8e+03]    :280            (6e+03,8e+03]    :277      
##  (1e+04,1.2e+04]  :235            (1e+04,1.2e+04]  :234      
##  (1.4e+04,1.6e+04]:207            (1.4e+04,1.6e+04]:214      
##  (2e+03,4e+03]    :206            (2e+03,4e+03]    :199      
##  (Other)          :860            (Other)          :858

plot of chunk firstlook


Function to model every variable to Interest.Rate

# run against complete.cases so NAs don't mess us up
loansData.complete = loansData.clean[complete.cases(loansData.clean), ]

lm_test_individual_variables(loansData.complete, loansData.complete$Interest.Rate)
## 
## 
## ----start-------  Amount.Requested  --------------

plot of chunk indiv.model plot of chunk indiv.model

## 
## 
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
## 
## 
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.10376 -0.03010  0.00025  0.02717  0.11908 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.09e-01   1.48e-03    73.3   <2e-16 ***
## predictor_variable 1.78e-06   1.01e-07    17.6   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0394 on 2496 degrees of freedom
## Multiple R-squared:  0.11,   Adjusted R-squared:  0.11 
## F-statistic:  309 on 1 and 2496 DF,  p-value: <2e-16
## 
## 
## ----------F, F p, Adj R^2------------------------------
##   F_statistic F_statistic_p   adjusted_R2 
##      308.8471        0.0000        0.1098 
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
## 
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
## 
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
## 
## ---------Heteroskedasticity-----------------------
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 47.04, df = 1, p-value = 6.967e-12
## 
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
## 
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
## 
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
## 
## --------Mean Zero?-------------------------------
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.6400 -0.7640  0.0065  0.0000  0.6890  3.0300

plot of chunk indiv.model plot of chunk indiv.model

## 
## ----end-------  Amount.Requested  --------------
## 
## 
## ----start-------  Amount.Funded.By.Investors  --------------

plot of chunk indiv.model plot of chunk indiv.model

## 
## 
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
## 
## 
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.10529 -0.03023  0.00041  0.02734  0.11880 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.09e-01   1.45e-03    74.9   <2e-16 ***
## predictor_variable 1.82e-06   1.02e-07    17.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0393 on 2496 degrees of freedom
## Multiple R-squared:  0.113,  Adjusted R-squared:  0.113 
## F-statistic:  319 on 1 and 2496 DF,  p-value: <2e-16
## 
## 
## ----------F, F p, Adj R^2------------------------------
##   F_statistic F_statistic_p   adjusted_R2 
##      319.4228        0.0000        0.1131 
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
## 
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
## 
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
## 
## ---------Heteroskedasticity-----------------------
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 58.15, df = 1, p-value = 2.43e-14
## 
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
## 
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
## 
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
## 
## --------Mean Zero?-------------------------------
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.6800 -0.7680  0.0105  0.0000  0.6950  3.0300

plot of chunk indiv.model plot of chunk indiv.model

## 
## ----end-------  Amount.Funded.By.Investors  --------------
## 
## 
## ----start-------  Interest.Rate  --------------

plot of chunk indiv.model plot of chunk indiv.model

## 
## 
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
## 
## 
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -3.07e-16 -7.00e-18 -6.00e-18 -5.00e-18  1.51e-14 
## 
## Coefficients:
##                    Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)        9.42e-16   1.99e-17 4.74e+01   <2e-16 ***
## predictor_variable 1.00e+00   1.45e-16 6.91e+15   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.02e-16 on 2496 degrees of freedom
## Multiple R-squared:     1,   Adjusted R-squared:     1 
## F-statistic: 4.77e+31 on 1 and 2496 DF,  p-value: <2e-16
## 
## 
## ----------F, F p, Adj R^2------------------------------
##   F_statistic F_statistic_p   adjusted_R2 
##     4.771e+31     0.000e+00     1.000e+00 
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
## 
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
## 
## ---------Cook's Distance------------------------------
##  99592 
## 0.5254 
## [1] "Cook's Distances greater than than 0.5 indicate possible outlying Y's or Leveraged X's"

plot of chunk indiv.model

## 
## ---------Heteroskedasticity-----------------------
## [1] "Breusch-Pagan test for Heteroskedasticity indicates Constant Variance"
## 
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
## 
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
## 
## --------Mean Zero?-------------------------------
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    -1.0     0.0     0.0     0.9     0.0  2320.0

plot of chunk indiv.model plot of chunk indiv.model

## 
## ----end-------  Interest.Rate  --------------
## 
## 
## ----start-------  Loan.Length  --------------
## 
## ----end-------  Loan.Length  --------------
## 
## 
## ----start-------  Loan.Purpose  --------------
## 
## ----end-------  Loan.Purpose  --------------
## 
## 
## ----start-------  Debt.To.Income.Ratio  --------------

plot of chunk indiv.model plot of chunk indiv.model

## 
## 
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
## 
## 
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.08794 -0.03014 -0.00117  0.02605  0.12313 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.11595    0.00188   61.69   <2e-16 ***
## predictor_variable  0.09591    0.01098    8.73   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0412 on 2496 degrees of freedom
## Multiple R-squared:  0.0297, Adjusted R-squared:  0.0293 
## F-statistic: 76.3 on 1 and 2496 DF,  p-value: <2e-16
## 
## 
## ----------F, F p, Adj R^2------------------------------
##   F_statistic F_statistic_p   adjusted_R2 
##      76.27780       0.00000       0.02927 
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
## 
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
## 
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
## 
## ---------Heteroskedasticity-----------------------
## [1] "Breusch-Pagan test for Heteroskedasticity indicates Constant Variance"
## 
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
## 
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
## 
## --------Mean Zero?-------------------------------
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.1400 -0.7320 -0.0284  0.0001  0.6330  3.0000

plot of chunk indiv.model plot of chunk indiv.model

## 
## ----end-------  Debt.To.Income.Ratio  --------------
## 
## 
## ----start-------  State  --------------
## 
## ----end-------  State  --------------
## 
## 
## ----start-------  Home.Ownership  --------------
## 
## ----end-------  Home.Ownership  --------------
## 
## 
## ----start-------  Monthly.Income  --------------

plot of chunk indiv.model plot of chunk indiv.model

## 
## 
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
## 
## 
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.07709 -0.02951  0.00034  0.02742  0.11843 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.30e-01   1.46e-03   88.81   <2e-16 ***
## predictor_variable 1.36e-07   2.11e-07    0.65     0.52    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0418 on 2496 degrees of freedom
## Multiple R-squared:  0.000167,   Adjusted R-squared:  -0.000234 
## F-statistic: 0.417 on 1 and 2496 DF,  p-value: 0.519
## 
## 
## ----------F, F p, Adj R^2------------------------------
##   F_statistic F_statistic_p   adjusted_R2 
##     0.4167904     0.5186023    -0.0002336 
## [1] "F statistic p-value > 0.05 indicates none of the predictors are predictive"
## 
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
##   Estimate Std. Error    t value   Pr(>|t|) 
##  1.363e-07  2.112e-07  6.456e-01  5.186e-01 
## 
## ---------Cook's Distance------------------------------
## 54487 
##  0.55 
## [1] "Cook's Distances greater than than 0.5 indicate possible outlying Y's or Leveraged X's"

plot of chunk indiv.model

## 
## ---------Heteroskedasticity-----------------------
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 37.05, df = 1, p-value = 1.153e-09
## 
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
## 
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
## 
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
## 
## --------Mean Zero?-------------------------------
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.8600 -0.7060  0.0080 -0.0001  0.6560  2.8400

plot of chunk indiv.model plot of chunk indiv.model

## 
## ----end-------  Monthly.Income  --------------
## 
## 
## ----start-------  FICO.Range  --------------
## 
## ----end-------  FICO.Range  --------------
## 
## 
## ----start-------  Open.CREDIT.Lines  --------------

plot of chunk indiv.model plot of chunk indiv.model

## 
## 
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
## 
## 
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.08397 -0.03033 -0.00039  0.02664  0.11826 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.122272   0.002039   59.97  < 2e-16 ***
## predictor_variable 0.000837   0.000185    4.53  6.2e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0416 on 2496 degrees of freedom
## Multiple R-squared:  0.00816,    Adjusted R-squared:  0.00776 
## F-statistic: 20.5 on 1 and 2496 DF,  p-value: 6.17e-06
## 
## 
## ----------F, F p, Adj R^2------------------------------
##   F_statistic F_statistic_p   adjusted_R2 
##     2.052e+01     6.169e-06     7.758e-03 
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
## 
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
## 
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
## 
## ---------Heteroskedasticity-----------------------
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 13.87, df = 1, p-value = 0.0001958
## 
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
## 
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
## 
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
## 
## --------Mean Zero?-------------------------------
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.0200 -0.7300 -0.0094  0.0001  0.6400  2.8500

plot of chunk indiv.model plot of chunk indiv.model

## 
## ----end-------  Open.CREDIT.Lines  --------------
## 
## 
## ----start-------  Revolving.CREDIT.Balance  --------------

plot of chunk indiv.model plot of chunk indiv.model

## 
## 
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
## 
## 
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.0812 -0.0302  0.0002  0.0274  0.1193 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.29e-01   1.09e-03  118.39   <2e-16 ***
## predictor_variable 1.39e-07   4.56e-08    3.06   0.0022 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0417 on 2496 degrees of freedom
## Multiple R-squared:  0.00373,    Adjusted R-squared:  0.00334 
## F-statistic: 9.36 on 1 and 2496 DF,  p-value: 0.00225
## 
## 
## ----------F, F p, Adj R^2------------------------------
##   F_statistic F_statistic_p   adjusted_R2 
##      9.355737      0.002246      0.003335 
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
## 
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
## 
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
## 
## ---------Heteroskedasticity-----------------------
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 10.46, df = 1, p-value = 0.001218
## 
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
## 
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
## 
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
## 
## --------Mean Zero?-------------------------------
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -1.9500 -0.7240  0.0049  0.0000  0.6570  2.8600

plot of chunk indiv.model plot of chunk indiv.model

## 
## ----end-------  Revolving.CREDIT.Balance  --------------
## 
## 
## ----start-------  Inquiries.in.the.Last.6.Months  --------------
## Warning: pseudoinverse used at -0.045
## Warning: neighborhood radius 1.045
## Warning: reciprocal condition number  5.8242e-015
## Warning: There are other near singularities as well. 1
## Warning: pseudoinverse used at -0.045
## Warning: neighborhood radius 1.045
## Warning: reciprocal condition number  1.6475e-015
## Warning: There are other near singularities as well. 1
## Warning: pseudoinverse used at -0.045
## Warning: neighborhood radius 1.045
## Warning: reciprocal condition number  1.1496e-015
## Warning: There are other near singularities as well. 1
## Warning: pseudoinverse used at -0.045
## Warning: neighborhood radius 1.045
## Warning: reciprocal condition number  0
## Warning: There are other near singularities as well. 1
## Warning: pseudoinverse used at -0.045
## Warning: neighborhood radius 1.045
## Warning: reciprocal condition number  5.8242e-015
## Warning: There are other near singularities as well. 1

plot of chunk indiv.model plot of chunk indiv.model

## 
## 
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
## 
## 
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.08262 -0.02941 -0.00144  0.02670  0.11766 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.12564    0.00102  122.67   <2e-16 ***
## predictor_variable  0.00559    0.00067    8.34   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0412 on 2496 degrees of freedom
## Multiple R-squared:  0.0271, Adjusted R-squared:  0.0267 
## F-statistic: 69.5 on 1 and 2496 DF,  p-value: <2e-16
## 
## 
## ----------F, F p, Adj R^2------------------------------
##   F_statistic F_statistic_p   adjusted_R2 
##     6.955e+01     1.110e-16     2.672e-02 
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
## 
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
## 
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
## 
## ---------Heteroskedasticity-----------------------
## [1] "Breusch-Pagan test for Heteroskedasticity indicates Constant Variance"
## 
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
## 
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
## 
## --------Mean Zero?-------------------------------
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.0100 -0.7140 -0.0349  0.0000  0.6480  2.8600

plot of chunk indiv.model plot of chunk indiv.model

## 
## ----end-------  Inquiries.in.the.Last.6.Months  --------------
## 
## 
## ----start-------  Employment.Length  --------------
## 
## ----end-------  Employment.Length  --------------
## 
## 
## ----start-------  FICO.numeric  --------------

plot of chunk indiv.model plot of chunk indiv.model

## 
## 
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
## 
## 
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.07990 -0.02136 -0.00456  0.01835  0.10194 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.95e-01   1.41e-03   138.5   <2e-16 ***
## predictor_variable -4.23e-03   8.42e-05   -50.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0295 on 2496 degrees of freedom
## Multiple R-squared:  0.503,  Adjusted R-squared:  0.503 
## F-statistic: 2.53e+03 on 1 and 2496 DF,  p-value: <2e-16
## 
## 
## ----------F, F p, Adj R^2------------------------------
##   F_statistic F_statistic_p   adjusted_R2 
##     2526.0083        0.0000        0.5028 
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
## 
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
## 
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
## 
## ---------Heteroskedasticity-----------------------
## 
##  studentized Breusch-Pagan test
## 
## data:  model
## BP = 4.873, df = 1, p-value = 0.02729
## 
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
## 
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
## 
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
## 
## --------Mean Zero?-------------------------------
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -2.720  -0.725  -0.155   0.000   0.623   3.470

plot of chunk indiv.model plot of chunk indiv.model

## 
## ----end-------  FICO.numeric  --------------
## 
## 
## ----start-------  FICO.numeric2  --------------

plot of chunk indiv.model plot of chunk indiv.model

## 
## 
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
## 
## 
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.08231 -0.02286 -0.00487  0.01999  0.09814 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         1.60e-01   9.30e-04   172.1   <2e-16 ***
## predictor_variable -1.05e-04   2.44e-06   -43.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0316 on 2496 degrees of freedom
## Multiple R-squared:  0.427,  Adjusted R-squared:  0.427 
## F-statistic: 1.86e+03 on 1 and 2496 DF,  p-value: <2e-16
## 
## 
## ----------F, F p, Adj R^2------------------------------
##   F_statistic F_statistic_p   adjusted_R2 
##     1859.0926        0.0000        0.4266 
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
## 
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
## 
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
## 
## ---------Heteroskedasticity-----------------------
## [1] "Breusch-Pagan test for Heteroskedasticity indicates Constant Variance"
## 
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
## 
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
## 
## --------Mean Zero?-------------------------------
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -2.6100 -0.7230 -0.1540  0.0002  0.6330  3.1100

plot of chunk indiv.model plot of chunk indiv.model

## 
## ----end-------  FICO.numeric2  --------------
## 
## 
## ----start-------  Debt.To.Income.Ratio.Cuts  --------------
## 
## ----end-------  Debt.To.Income.Ratio.Cuts  --------------
## 
## 
## ----start-------  Revolving.CREDIT.Balance.Cuts  --------------
## 
## ----end-------  Revolving.CREDIT.Balance.Cuts  --------------
## 
## 
## ----start-------  Monthly.Income.Cuts  --------------
## 
## ----end-------  Monthly.Income.Cuts  --------------
## 
## 
## ----start-------  Amount.Funded.By.Investors.Cuts  --------------
## 
## ----end-------  Amount.Funded.By.Investors.Cuts  --------------
## 
## 
## ----start-------  Amount.Requested.Cuts  --------------
## 
## ----end-------  Amount.Requested.Cuts  --------------

Info about the system running this code

print(str(.Platform))
## List of 8
##  $ OS.type   : chr "windows"
##  $ file.sep  : chr "/"
##  $ dynlib.ext: chr ".dll"
##  $ GUI       : chr "RTerm"
##  $ endian    : chr "little"
##  $ pkgType   : chr "win.binary"
##  $ path.sep  : chr ";"
##  $ r_arch    : chr "x64"
## NULL
print(version)
##                _                           
## platform       x86_64-w64-mingw32          
## arch           x86_64                      
## os             mingw32                     
## system         x86_64, mingw32             
## status                                     
## major          3                           
## minor          0.2                         
## year           2013                        
## month          09                          
## day            25                          
## svn rev        63987                       
## language       R                           
## version.string R version 3.0.2 (2013-09-25)
## nickname       Frisbee Sailing
print(sessionInfo(), locale = FALSE)
## R version 3.0.2 (2013-09-25)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## 
## attached base packages:
## [1] splines   grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] HH_2.3-42           multcomp_1.3-0      survival_2.37-4    
##  [4] mvtnorm_0.9-9996    latticeExtra_0.6-26 RColorBrewer_1.0-5 
##  [7] lattice_0.20-24     randomizeBE_0.3-1   lmtest_0.9-32      
## [10] zoo_1.7-10          knitr_1.5          
## 
## loaded via a namespace (and not attached):
##  [1] colorspace_1.2-4 evaluate_0.5.1   formatR_0.10     leaps_2.9       
##  [5] MASS_7.3-29      reshape_0.8.4    sandwich_2.3-0   stringr_0.6.2   
##  [9] tools_3.0.2      vcd_1.3-1
print(Sys.time())
## [1] "2013-11-07 10:58:16 EST"