setwd("~/R/Data Analysis/Data Analysis Project 1")
source("~/Dropbox/R_functions/eda.R")
source("~/Dropbox/R_functions/lm_utilities.R")
load("loansData.clean.rda")
# c('Interest.Rate', 'Amount.Requested','Amount.Requested.Cuts',
# 'Amount.Funded.By.Investors','Amount.Funded.By.Investors.Cuts',
# 'Loan.Length', 'Loan.Purpose',
# 'Debt.To.Income.Ratio','Debt.To.Income.Ratio.Cuts', 'State',
# 'Home.Ownership', 'Monthly.Income','Monthly.Income.Cuts', 'FICO.Range',
# 'FICO.numeric', 'Open.CREDIT.Lines',
# 'Revolving.CREDIT.Balance','Revolving.CREDIT.Balance.Cuts',
# 'Inquiries.in.the.Last.6.Months', 'Employment.Length')
first.look(loansData.clean, pairs_columns = c("Loan.Purpose", "State", "Home.Ownership",
"Monthly.Income", "Open.CREDIT.Lines", "Revolving.CREDIT.Balance", "Employment.Length",
"Inquiries.in.the.Last.6.Months", "Debt.To.Income.Ratio", "Amount.Requested",
"Amount.Funded.By.Investors", "Loan.Length", "FICO.numeric", "Interest.Rate"))
## ----- str: look at data types and values -------
## 'data.frame': 2500 obs. of 21 variables:
## $ Amount.Requested : int 20000 19200 35000 10000 12000 6000 10000 33500 14675 7000 ...
## $ Amount.Funded.By.Investors : num 20000 19200 35000 9975 12000 ...
## $ Interest.Rate : num 0.089 0.1212 0.2198 0.0999 0.1171 ...
## $ Loan.Length : Factor w/ 2 levels "36 months","60 months": 1 1 2 1 1 1 1 2 1 1 ...
## $ Loan.Purpose : Factor w/ 14 levels "car","credit_card",..: 3 3 3 3 2 10 3 2 2 2 ...
## $ Debt.To.Income.Ratio : num 0.149 0.284 0.238 0.143 0.188 ...
## $ State : Factor w/ 46 levels "AK","AL","AR",..: 37 39 5 16 28 7 19 18 5 5 ...
## $ Home.Ownership : Factor w/ 5 levels "MORTGAGE","NONE",..: 1 1 1 1 5 4 5 1 5 5 ...
## $ Monthly.Income : num 6542 4583 11500 3833 3195 ...
## $ FICO.Range : Factor w/ 38 levels "640-644","645-649",..: 20 16 11 12 12 7 17 14 10 16 ...
## $ Open.CREDIT.Lines : int 14 12 14 10 11 17 10 12 9 8 ...
## $ Revolving.CREDIT.Balance : int 14272 11140 21977 9346 14469 10391 15957 27874 7246 7612 ...
## $ Inquiries.in.the.Last.6.Months : int 2 1 1 0 0 2 0 0 1 0 ...
## $ Employment.Length : Factor w/ 12 levels "< 1 year","1 year",..: 1 4 4 7 11 5 3 3 10 5 ...
## $ FICO.numeric : int 21 17 12 13 13 8 18 15 11 17 ...
## $ FICO.numeric2 : num 441 289 144 169 169 64 324 225 121 289 ...
## $ Debt.To.Income.Ratio.Cuts : Factor w/ 18 levels "[0,0.02]","(0.02,0.04]",..: 8 15 12 8 10 11 14 8 14 4 ...
## $ Revolving.CREDIT.Balance.Cuts : Factor w/ 14 levels "[0,2e+04]","(2e+04,4e+04]",..: 1 1 2 1 1 1 1 2 1 1 ...
## $ Monthly.Income.Cuts : Factor w/ 11 levels "[0,1e+04]","(1e+04,2e+04]",..: 1 1 2 1 1 1 1 2 1 1 ...
## $ Amount.Funded.By.Investors.Cuts: Factor w/ 19 levels "[-2e+03,0]","(0,2e+03]",..: 11 11 19 6 7 4 6 18 9 5 ...
## $ Amount.Requested.Cuts : Factor w/ 18 levels "[0,2e+03]","(2e+03,4e+03]",..: 10 10 18 5 6 3 5 17 8 4 ...
## NULL
##
## ----- sample size: 2500
## ----- sample size of complete cases: 2498
## ----- difference: 2
##
## ----- names: variable names
## [1] "Amount.Requested" "Amount.Funded.By.Investors"
## [3] "Interest.Rate" "Loan.Length"
## [5] "Loan.Purpose" "Debt.To.Income.Ratio"
## [7] "State" "Home.Ownership"
## [9] "Monthly.Income" "FICO.Range"
## [11] "Open.CREDIT.Lines" "Revolving.CREDIT.Balance"
## [13] "Inquiries.in.the.Last.6.Months" "Employment.Length"
## [15] "FICO.numeric" "FICO.numeric2"
## [17] "Debt.To.Income.Ratio.Cuts" "Revolving.CREDIT.Balance.Cuts"
## [19] "Monthly.Income.Cuts" "Amount.Funded.By.Investors.Cuts"
## [21] "Amount.Requested.Cuts"
##
## ----- head: first six rows
## Amount.Requested Amount.Funded.By.Investors Interest.Rate
## 81174 20000 20000 0.0890
## 99592 19200 19200 0.1212
## 80059 35000 35000 0.2198
## 15825 10000 9975 0.0999
## 33182 12000 12000 0.1171
## 62403 6000 6000 0.1531
## Loan.Length Loan.Purpose Debt.To.Income.Ratio State
## 81174 36 months debt_consolidation 0.1490 SC
## 99592 36 months debt_consolidation 0.2836 TX
## 80059 60 months debt_consolidation 0.2381 CA
## 15825 36 months debt_consolidation 0.1430 KS
## 33182 36 months credit_card 0.1878 NJ
## 62403 36 months other 0.2005 CT
## Home.Ownership Monthly.Income FICO.Range Open.CREDIT.Lines
## 81174 MORTGAGE 6542 735-739 14
## 99592 MORTGAGE 4583 715-719 12
## 80059 MORTGAGE 11500 690-694 14
## 15825 MORTGAGE 3833 695-699 10
## 33182 RENT 3195 695-699 11
## 62403 OWN 4892 670-674 17
## Revolving.CREDIT.Balance Inquiries.in.the.Last.6.Months
## 81174 14272 2
## 99592 11140 1
## 80059 21977 1
## 15825 9346 0
## 33182 14469 0
## 62403 10391 2
## Employment.Length FICO.numeric FICO.numeric2
## 81174 < 1 year 21 441
## 99592 2 years 17 289
## 80059 2 years 12 144
## 15825 5 years 13 169
## 33182 9 years 13 169
## 62403 3 years 8 64
## Debt.To.Income.Ratio.Cuts Revolving.CREDIT.Balance.Cuts
## 81174 (0.14,0.16] [0,2e+04]
## 99592 (0.28,0.3] [0,2e+04]
## 80059 (0.22,0.24] (2e+04,4e+04]
## 15825 (0.14,0.16] [0,2e+04]
## 33182 (0.18,0.2] [0,2e+04]
## 62403 (0.2,0.22] [0,2e+04]
## Monthly.Income.Cuts Amount.Funded.By.Investors.Cuts
## 81174 [0,1e+04] (1.8e+04,2e+04]
## 99592 [0,1e+04] (1.8e+04,2e+04]
## 80059 (1e+04,2e+04] (3.4e+04,3.6e+04]
## 15825 [0,1e+04] (8e+03,1e+04]
## 33182 [0,1e+04] (1e+04,1.2e+04]
## 62403 [0,1e+04] (4e+03,6e+03]
## Amount.Requested.Cuts
## 81174 (1.8e+04,2e+04]
## 99592 (1.8e+04,2e+04]
## 80059 (3.4e+04,3.6e+04]
## 15825 (8e+03,1e+04]
## 33182 (1e+04,1.2e+04]
## 62403 (4e+03,6e+03]
##
## ----- summary: statistics for each variable
## Amount.Requested Amount.Funded.By.Investors Interest.Rate
## Min. : 1000 Min. : 0 Min. :0.0542
## 1st Qu.: 6000 1st Qu.: 6000 1st Qu.:0.1016
## Median :10000 Median :10000 Median :0.1311
## Mean :12406 Mean :12002 Mean :0.1307
## 3rd Qu.:17000 3rd Qu.:16000 3rd Qu.:0.1580
## Max. :35000 Max. :35000 Max. :0.2489
##
## Loan.Length Loan.Purpose Debt.To.Income.Ratio
## 36 months:1952 debt_consolidation:1307 Min. :0.0000
## 60 months: 548 credit_card : 444 1st Qu.:0.0975
## other : 201 Median :0.1532
## home_improvement : 152 Mean :0.1538
## major_purchase : 101 3rd Qu.:0.2067
## small_business : 87 Max. :0.3491
## (Other) : 208
## State Home.Ownership Monthly.Income FICO.Range
## CA : 433 MORTGAGE:1148 Min. : 588 670-674: 171
## NY : 255 NONE : 1 1st Qu.: 3500 675-679: 166
## TX : 174 OTHER : 5 Median : 5000 680-684: 157
## FL : 169 OWN : 200 Mean : 5689 695-699: 153
## IL : 101 RENT :1146 3rd Qu.: 6800 665-669: 145
## GA : 98 Max. :102750 690-694: 140
## (Other):1270 NA's :1 (Other):1568
## Open.CREDIT.Lines Revolving.CREDIT.Balance Inquiries.in.the.Last.6.Months
## Min. : 2.0 Min. : 0 Min. :0.000
## 1st Qu.: 7.0 1st Qu.: 5586 1st Qu.:0.000
## Median : 9.0 Median : 10962 Median :0.000
## Mean :10.1 Mean : 15245 Mean :0.906
## 3rd Qu.:13.0 3rd Qu.: 18889 3rd Qu.:1.000
## Max. :38.0 Max. :270800 Max. :9.000
## NA's :2 NA's :2 NA's :2
## Employment.Length FICO.numeric FICO.numeric2 Debt.To.Income.Ratio.Cuts
## 10+ years:653 Min. : 2.0 Min. : 4 (0.14,0.16]: 257
## < 1 year :250 1st Qu.:10.0 1st Qu.: 100 (0.16,0.18]: 245
## 2 years :244 Median :14.0 Median : 196 (0.12,0.14]: 222
## 3 years :235 Mean :15.2 Mean : 279 (0.1,0.12] : 219
## 5 years :202 3rd Qu.:19.0 3rd Qu.: 361 (0.2,0.22] : 217
## 4 years :192 Max. :40.0 Max. :1600 (0.18,0.2] : 208
## (Other) :724 (Other) :1132
## Revolving.CREDIT.Balance.Cuts Monthly.Income.Cuts
## [0,2e+04] :1932 [0,1e+04] :2297
## (2e+04,4e+04]: 435 (1e+04,2e+04]: 186
## (4e+04,6e+04]: 70 (2e+04,3e+04]: 13
## (6e+04,8e+04]: 28 (3e+04,4e+04]: 1
## (8e+04,1e+05]: 12 (6e+04,7e+04]: 1
## (Other) : 21 (Other) : 1
## NA's : 2 NA's : 1
## Amount.Funded.By.Investors.Cuts Amount.Requested.Cuts
## (4e+03,6e+03] :366 (4e+03,6e+03] :365
## (8e+03,1e+04] :346 (8e+03,1e+04] :353
## (6e+03,8e+03] :280 (6e+03,8e+03] :277
## (1e+04,1.2e+04] :235 (1e+04,1.2e+04] :234
## (1.4e+04,1.6e+04]:207 (1.4e+04,1.6e+04]:214
## (2e+03,4e+03] :206 (2e+03,4e+03] :199
## (Other) :860 (Other) :858
# run against complete.cases so NAs don't mess us up
loansData.complete = loansData.clean[complete.cases(loansData.clean), ]
lm_test_individual_variables(loansData.complete, loansData.complete$Interest.Rate)
##
##
## ----start------- Amount.Requested --------------
##
##
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
##
##
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.10376 -0.03010 0.00025 0.02717 0.11908
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.09e-01 1.48e-03 73.3 <2e-16 ***
## predictor_variable 1.78e-06 1.01e-07 17.6 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0394 on 2496 degrees of freedom
## Multiple R-squared: 0.11, Adjusted R-squared: 0.11
## F-statistic: 309 on 1 and 2496 DF, p-value: <2e-16
##
##
## ----------F, F p, Adj R^2------------------------------
## F_statistic F_statistic_p adjusted_R2
## 308.8471 0.0000 0.1098
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
##
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
##
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
##
## ---------Heteroskedasticity-----------------------
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 47.04, df = 1, p-value = 6.967e-12
##
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
##
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
##
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
##
## --------Mean Zero?-------------------------------
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.6400 -0.7640 0.0065 0.0000 0.6890 3.0300
##
## ----end------- Amount.Requested --------------
##
##
## ----start------- Amount.Funded.By.Investors --------------
##
##
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
##
##
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.10529 -0.03023 0.00041 0.02734 0.11880
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.09e-01 1.45e-03 74.9 <2e-16 ***
## predictor_variable 1.82e-06 1.02e-07 17.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0393 on 2496 degrees of freedom
## Multiple R-squared: 0.113, Adjusted R-squared: 0.113
## F-statistic: 319 on 1 and 2496 DF, p-value: <2e-16
##
##
## ----------F, F p, Adj R^2------------------------------
## F_statistic F_statistic_p adjusted_R2
## 319.4228 0.0000 0.1131
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
##
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
##
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
##
## ---------Heteroskedasticity-----------------------
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 58.15, df = 1, p-value = 2.43e-14
##
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
##
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
##
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
##
## --------Mean Zero?-------------------------------
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.6800 -0.7680 0.0105 0.0000 0.6950 3.0300
##
## ----end------- Amount.Funded.By.Investors --------------
##
##
## ----start------- Interest.Rate --------------
##
##
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
##
##
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.07e-16 -7.00e-18 -6.00e-18 -5.00e-18 1.51e-14
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.42e-16 1.99e-17 4.74e+01 <2e-16 ***
## predictor_variable 1.00e+00 1.45e-16 6.91e+15 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.02e-16 on 2496 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 4.77e+31 on 1 and 2496 DF, p-value: <2e-16
##
##
## ----------F, F p, Adj R^2------------------------------
## F_statistic F_statistic_p adjusted_R2
## 4.771e+31 0.000e+00 1.000e+00
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
##
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
##
## ---------Cook's Distance------------------------------
## 99592
## 0.5254
## [1] "Cook's Distances greater than than 0.5 indicate possible outlying Y's or Leveraged X's"
##
## ---------Heteroskedasticity-----------------------
## [1] "Breusch-Pagan test for Heteroskedasticity indicates Constant Variance"
##
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
##
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
##
## --------Mean Zero?-------------------------------
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.0 0.0 0.0 0.9 0.0 2320.0
##
## ----end------- Interest.Rate --------------
##
##
## ----start------- Loan.Length --------------
##
## ----end------- Loan.Length --------------
##
##
## ----start------- Loan.Purpose --------------
##
## ----end------- Loan.Purpose --------------
##
##
## ----start------- Debt.To.Income.Ratio --------------
##
##
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
##
##
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.08794 -0.03014 -0.00117 0.02605 0.12313
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.11595 0.00188 61.69 <2e-16 ***
## predictor_variable 0.09591 0.01098 8.73 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0412 on 2496 degrees of freedom
## Multiple R-squared: 0.0297, Adjusted R-squared: 0.0293
## F-statistic: 76.3 on 1 and 2496 DF, p-value: <2e-16
##
##
## ----------F, F p, Adj R^2------------------------------
## F_statistic F_statistic_p adjusted_R2
## 76.27780 0.00000 0.02927
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
##
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
##
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
##
## ---------Heteroskedasticity-----------------------
## [1] "Breusch-Pagan test for Heteroskedasticity indicates Constant Variance"
##
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
##
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
##
## --------Mean Zero?-------------------------------
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.1400 -0.7320 -0.0284 0.0001 0.6330 3.0000
##
## ----end------- Debt.To.Income.Ratio --------------
##
##
## ----start------- State --------------
##
## ----end------- State --------------
##
##
## ----start------- Home.Ownership --------------
##
## ----end------- Home.Ownership --------------
##
##
## ----start------- Monthly.Income --------------
##
##
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
##
##
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.07709 -0.02951 0.00034 0.02742 0.11843
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.30e-01 1.46e-03 88.81 <2e-16 ***
## predictor_variable 1.36e-07 2.11e-07 0.65 0.52
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0418 on 2496 degrees of freedom
## Multiple R-squared: 0.000167, Adjusted R-squared: -0.000234
## F-statistic: 0.417 on 1 and 2496 DF, p-value: 0.519
##
##
## ----------F, F p, Adj R^2------------------------------
## F_statistic F_statistic_p adjusted_R2
## 0.4167904 0.5186023 -0.0002336
## [1] "F statistic p-value > 0.05 indicates none of the predictors are predictive"
##
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
## Estimate Std. Error t value Pr(>|t|)
## 1.363e-07 2.112e-07 6.456e-01 5.186e-01
##
## ---------Cook's Distance------------------------------
## 54487
## 0.55
## [1] "Cook's Distances greater than than 0.5 indicate possible outlying Y's or Leveraged X's"
##
## ---------Heteroskedasticity-----------------------
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 37.05, df = 1, p-value = 1.153e-09
##
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
##
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
##
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
##
## --------Mean Zero?-------------------------------
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.8600 -0.7060 0.0080 -0.0001 0.6560 2.8400
##
## ----end------- Monthly.Income --------------
##
##
## ----start------- FICO.Range --------------
##
## ----end------- FICO.Range --------------
##
##
## ----start------- Open.CREDIT.Lines --------------
##
##
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
##
##
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.08397 -0.03033 -0.00039 0.02664 0.11826
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.122272 0.002039 59.97 < 2e-16 ***
## predictor_variable 0.000837 0.000185 4.53 6.2e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0416 on 2496 degrees of freedom
## Multiple R-squared: 0.00816, Adjusted R-squared: 0.00776
## F-statistic: 20.5 on 1 and 2496 DF, p-value: 6.17e-06
##
##
## ----------F, F p, Adj R^2------------------------------
## F_statistic F_statistic_p adjusted_R2
## 2.052e+01 6.169e-06 7.758e-03
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
##
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
##
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
##
## ---------Heteroskedasticity-----------------------
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 13.87, df = 1, p-value = 0.0001958
##
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
##
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
##
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
##
## --------Mean Zero?-------------------------------
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.0200 -0.7300 -0.0094 0.0001 0.6400 2.8500
##
## ----end------- Open.CREDIT.Lines --------------
##
##
## ----start------- Revolving.CREDIT.Balance --------------
##
##
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
##
##
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.0812 -0.0302 0.0002 0.0274 0.1193
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.29e-01 1.09e-03 118.39 <2e-16 ***
## predictor_variable 1.39e-07 4.56e-08 3.06 0.0022 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0417 on 2496 degrees of freedom
## Multiple R-squared: 0.00373, Adjusted R-squared: 0.00334
## F-statistic: 9.36 on 1 and 2496 DF, p-value: 0.00225
##
##
## ----------F, F p, Adj R^2------------------------------
## F_statistic F_statistic_p adjusted_R2
## 9.355737 0.002246 0.003335
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
##
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
##
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
##
## ---------Heteroskedasticity-----------------------
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 10.46, df = 1, p-value = 0.001218
##
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
##
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
##
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
##
## --------Mean Zero?-------------------------------
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -1.9500 -0.7240 0.0049 0.0000 0.6570 2.8600
##
## ----end------- Revolving.CREDIT.Balance --------------
##
##
## ----start------- Inquiries.in.the.Last.6.Months --------------
## Warning: pseudoinverse used at -0.045
## Warning: neighborhood radius 1.045
## Warning: reciprocal condition number 5.8242e-015
## Warning: There are other near singularities as well. 1
## Warning: pseudoinverse used at -0.045
## Warning: neighborhood radius 1.045
## Warning: reciprocal condition number 1.6475e-015
## Warning: There are other near singularities as well. 1
## Warning: pseudoinverse used at -0.045
## Warning: neighborhood radius 1.045
## Warning: reciprocal condition number 1.1496e-015
## Warning: There are other near singularities as well. 1
## Warning: pseudoinverse used at -0.045
## Warning: neighborhood radius 1.045
## Warning: reciprocal condition number 0
## Warning: There are other near singularities as well. 1
## Warning: pseudoinverse used at -0.045
## Warning: neighborhood radius 1.045
## Warning: reciprocal condition number 5.8242e-015
## Warning: There are other near singularities as well. 1
##
##
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
##
##
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.08262 -0.02941 -0.00144 0.02670 0.11766
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.12564 0.00102 122.67 <2e-16 ***
## predictor_variable 0.00559 0.00067 8.34 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0412 on 2496 degrees of freedom
## Multiple R-squared: 0.0271, Adjusted R-squared: 0.0267
## F-statistic: 69.5 on 1 and 2496 DF, p-value: <2e-16
##
##
## ----------F, F p, Adj R^2------------------------------
## F_statistic F_statistic_p adjusted_R2
## 6.955e+01 1.110e-16 2.672e-02
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
##
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
##
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
##
## ---------Heteroskedasticity-----------------------
## [1] "Breusch-Pagan test for Heteroskedasticity indicates Constant Variance"
##
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
##
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
##
## --------Mean Zero?-------------------------------
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.0100 -0.7140 -0.0349 0.0000 0.6480 2.8600
##
## ----end------- Inquiries.in.the.Last.6.Months --------------
##
##
## ----start------- Employment.Length --------------
##
## ----end------- Employment.Length --------------
##
##
## ----start------- FICO.numeric --------------
##
##
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
##
##
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.07990 -0.02136 -0.00456 0.01835 0.10194
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.95e-01 1.41e-03 138.5 <2e-16 ***
## predictor_variable -4.23e-03 8.42e-05 -50.3 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0295 on 2496 degrees of freedom
## Multiple R-squared: 0.503, Adjusted R-squared: 0.503
## F-statistic: 2.53e+03 on 1 and 2496 DF, p-value: <2e-16
##
##
## ----------F, F p, Adj R^2------------------------------
## F_statistic F_statistic_p adjusted_R2
## 2526.0083 0.0000 0.5028
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
##
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
##
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
##
## ---------Heteroskedasticity-----------------------
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 4.873, df = 1, p-value = 0.02729
##
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
##
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
##
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
##
## --------Mean Zero?-------------------------------
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.720 -0.725 -0.155 0.000 0.623 3.470
##
## ----end------- FICO.numeric --------------
##
##
## ----start------- FICO.numeric2 --------------
##
##
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
##
##
## Call:
## lm(formula = response_variable ~ predictor_variable, data = data_frame)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.08231 -0.02286 -0.00487 0.01999 0.09814
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.60e-01 9.30e-04 172.1 <2e-16 ***
## predictor_variable -1.05e-04 2.44e-06 -43.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0316 on 2496 degrees of freedom
## Multiple R-squared: 0.427, Adjusted R-squared: 0.427
## F-statistic: 1.86e+03 on 1 and 2496 DF, p-value: <2e-16
##
##
## ----------F, F p, Adj R^2------------------------------
## F_statistic F_statistic_p adjusted_R2
## 1859.0926 0.0000 0.4266
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
##
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
##
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
##
## ---------Heteroskedasticity-----------------------
## [1] "Breusch-Pagan test for Heteroskedasticity indicates Constant Variance"
##
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
##
## -------Multicollinearity if GT 10---------------
## [1] "Multicollinearity test generated an error"
##
## --------Mean Zero?-------------------------------
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.6100 -0.7230 -0.1540 0.0002 0.6330 3.1100
##
## ----end------- FICO.numeric2 --------------
##
##
## ----start------- Debt.To.Income.Ratio.Cuts --------------
##
## ----end------- Debt.To.Income.Ratio.Cuts --------------
##
##
## ----start------- Revolving.CREDIT.Balance.Cuts --------------
##
## ----end------- Revolving.CREDIT.Balance.Cuts --------------
##
##
## ----start------- Monthly.Income.Cuts --------------
##
## ----end------- Monthly.Income.Cuts --------------
##
##
## ----start------- Amount.Funded.By.Investors.Cuts --------------
##
## ----end------- Amount.Funded.By.Investors.Cuts --------------
##
##
## ----start------- Amount.Requested.Cuts --------------
##
## ----end------- Amount.Requested.Cuts --------------
print(str(.Platform))
## List of 8
## $ OS.type : chr "windows"
## $ file.sep : chr "/"
## $ dynlib.ext: chr ".dll"
## $ GUI : chr "RTerm"
## $ endian : chr "little"
## $ pkgType : chr "win.binary"
## $ path.sep : chr ";"
## $ r_arch : chr "x64"
## NULL
print(version)
## _
## platform x86_64-w64-mingw32
## arch x86_64
## os mingw32
## system x86_64, mingw32
## status
## major 3
## minor 0.2
## year 2013
## month 09
## day 25
## svn rev 63987
## language R
## version.string R version 3.0.2 (2013-09-25)
## nickname Frisbee Sailing
print(sessionInfo(), locale = FALSE)
## R version 3.0.2 (2013-09-25)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
##
## attached base packages:
## [1] splines grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] HH_2.3-42 multcomp_1.3-0 survival_2.37-4
## [4] mvtnorm_0.9-9996 latticeExtra_0.6-26 RColorBrewer_1.0-5
## [7] lattice_0.20-24 randomizeBE_0.3-1 lmtest_0.9-32
## [10] zoo_1.7-10 knitr_1.5
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.2-4 evaluate_0.5.1 formatR_0.10 leaps_2.9
## [5] MASS_7.3-29 reshape_0.8.4 sandwich_2.3-0 stringr_0.6.2
## [9] tools_3.0.2 vcd_1.3-1
print(Sys.time())
## [1] "2013-11-07 10:58:16 EST"