The simple model provides a very accurate depiction of the relationship between FICO and Interest.Rate but the predictive power of the Elaborate model was 57% greater based upon the Mean Squared Error it produced when run against the test half of the dataset.
set.seed(1234)
train = sample(nrow(loansData.complete), nrow(loansData.complete)/2)
My thanks to Benjamin De Baets in post https://class.coursera.org/dataanalysis-002/forum/thread?thread_id=310
If you say lm(Y ~., data=df) you will get a model of Y against all the variables in the data.frame “df”.
It is much handier to deal with a large number of regression variables this way and it allows you to add and subtract variables programmatically, rather than by hand which quickly becomes unbearably tedious.
base.list = c("Debt.To.Income.Ratio", "Open.CREDIT.Lines", "Revolving.CREDIT.Balance",
"FICO.numeric", "FICO.numeric2", "Inquiries.in.the.Last.6.Months")
model.data.frame = loansData.complete[, base.list]
My thanks to http://stats.stackexchange.com/questions/29477/how-to-write-a-linear-model-formula-with-100-variables-in-r
# Loan.Length
model.data.frame = cbind(model.data.frame, lm_create.indicators(loansData.complete$Loan.Length,
echo = TRUE, ret = TRUE))
##
## indicators created: D.36months, D.60months
# model.data.frame = model.data.frame[,-ncol(model.data.frame)]
# Loan.Purpose
model.data.frame = cbind(model.data.frame, lm_create.indicators(loansData.complete$Loan.Purpose,
echo = TRUE, ret = TRUE))
##
## indicators created: D.car, D.credit_card, D.debt_consolidation, D.educational, D.home_improvement, D.house, D.major_purchase, D.medical, D.moving, D.other, D.renewable_energy, D.small_business, D.vacation, D.wedding
# model.data.frame = model.data.frame[,-ncol(model.data.frame)]
# State
model.data.frame = cbind(model.data.frame, lm_create.indicators(loansData.complete$State,
echo = TRUE, ret = TRUE))
##
## indicators created: D.AK, D.AL, D.AR, D.AZ, D.CA, D.CO, D.CT, D.DC, D.DE, D.FL, D.GA, D.HI, D.IA, D.IL, D.IN, D.KS, D.KY, D.LA, D.MA, D.MD, D.MI, D.MN, D.MO, D.MS, D.MT, D.NC, D.NH, D.NJ, D.NM, D.NV, D.NY, D.OH, D.OK, D.OR, D.PA, D.RI, D.SC, D.SD, D.TX, D.UT, D.VA, D.VT, D.WA, D.WI, D.WV, D.WY
# model.data.frame = model.data.frame[,-ncol(model.data.frame)]
# Home.Ownership
model.data.frame = cbind(model.data.frame, lm_create.indicators(loansData.complete$Home.Ownership,
echo = TRUE, ret = TRUE))
##
## indicators created: D.MORTGAGE, D.NONE, D.OTHER, D.OWN, D.RENT
# model.data.frame = model.data.frame[,-ncol(model.data.frame)]
# FICO.Range NOTE: factor replaced with numeric FICO.numeric
# model.data.frame = cbind(model.data.frame,
# lm_create.indicators(loansData.complete$FICO.Range, echo=TRUE, ret=TRUE))
# model.data.frame = model.data.frame[,-ncol(model.data.frame)]
# Employment.Length
model.data.frame = cbind(model.data.frame, lm_create.indicators(loansData.complete$Employment.Length,
echo = TRUE, ret = TRUE))
##
## indicators created: D.LT1year, D.1year, D.10PLUSyears, D.2years, D.3years, D.4years, D.5years, D.6years, D.7years, D.8years, D.9years, D.na
# model.data.frame = model.data.frame[,-ncol(model.data.frame)]
Loan.Amount = loansData.complete$Amount.Requested + loansData.complete$Amount.Funded.By.Investors
model.data.frame = cbind(model.data.frame, Loan.Amount)
Monthly.Income.Recip = 1/loansData.complete$Monthly.Income
model.data.frame = cbind(model.data.frame, Monthly.Income.Recip)
# keep track of the variables we start with
base.variable.list = names(model.data.frame)
model.base = lm(loansData.complete$Interest.Rate ~ ., data = model.data.frame,
subset = train)
# lm_assumptions_summary(model.base)
summary(model.base)
##
## Call:
## lm(formula = loansData.complete$Interest.Rate ~ ., data = model.data.frame,
## subset = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.06076 -0.01122 -0.00182 0.01073 0.09084
##
## Coefficients: (6 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.39e-01 2.01e-02 11.88 < 2e-16 ***
## Debt.To.Income.Ratio 9.60e-03 8.51e-03 1.13 0.260
## Open.CREDIT.Lines -3.18e-04 1.44e-04 -2.21 0.027 *
## Revolving.CREDIT.Balance -1.72e-08 3.73e-08 -0.46 0.644
## FICO.numeric -8.97e-03 3.64e-04 -24.68 < 2e-16 ***
## FICO.numeric2 1.27e-04 1.00e-05 12.72 < 2e-16 ***
## Inquiries.in.the.Last.6.Months 3.07e-03 4.72e-04 6.52 1.1e-10 ***
## D.36months -3.25e-02 1.53e-03 -21.22 < 2e-16 ***
## D.60months NA NA NA NA
## D.car -5.32e-04 5.80e-03 -0.09 0.927
## D.credit_card 3.72e-04 4.60e-03 0.08 0.936
## D.debt_consolidation 1.38e-03 4.47e-03 0.31 0.757
## D.educational 3.92e-03 8.10e-03 0.48 0.629
## D.home_improvement 1.68e-03 5.01e-03 0.34 0.737
## D.house -1.37e-03 8.46e-03 -0.16 0.871
## D.major_purchase 3.81e-03 5.13e-03 0.74 0.458
## D.medical -1.27e-03 7.07e-03 -0.18 0.858
## D.moving 1.45e-02 6.50e-03 2.22 0.026 *
## D.other 1.28e-02 4.75e-03 2.70 0.007 **
## D.renewable_energy -2.71e-03 1.44e-02 -0.19 0.851
## D.small_business 1.32e-02 5.24e-03 2.53 0.012 *
## D.vacation 1.59e-03 6.86e-03 0.23 0.817
## D.wedding NA NA NA NA
## D.AK 1.65e-03 2.04e-02 0.08 0.936
## D.AL -6.25e-03 1.94e-02 -0.32 0.748
## D.AR -9.83e-03 2.04e-02 -0.48 0.630
## D.AZ -1.83e-03 1.93e-02 -0.09 0.924
## D.CA -9.61e-03 1.89e-02 -0.51 0.612
## D.CO -1.09e-02 1.92e-02 -0.57 0.570
## D.CT -7.62e-03 1.93e-02 -0.40 0.693
## D.DC 1.25e-02 2.32e-02 0.54 0.592
## D.DE -1.10e-02 2.11e-02 -0.52 0.602
## D.FL -8.37e-03 1.90e-02 -0.44 0.660
## D.GA -5.65e-03 1.91e-02 -0.30 0.767
## D.HI -1.31e-03 2.02e-02 -0.06 0.948
## D.IA -3.33e-02 3.37e-02 -0.99 0.323
## D.IL -1.41e-02 1.91e-02 -0.74 0.461
## D.IN -4.61e-02 2.68e-02 -1.72 0.086 .
## D.KS -6.66e-04 2.08e-02 -0.03 0.974
## D.KY -1.07e-02 1.94e-02 -0.55 0.580
## D.LA -1.09e-02 2.01e-02 -0.54 0.588
## D.MA -9.09e-03 1.91e-02 -0.48 0.635
## D.MD -6.44e-03 1.92e-02 -0.33 0.738
## D.MI -1.23e-02 1.93e-02 -0.64 0.523
## D.MN -1.49e-02 1.93e-02 -0.77 0.441
## D.MO -1.09e-02 1.93e-02 -0.57 0.572
## D.MS 2.34e-02 3.33e-02 0.70 0.482
## D.MT -1.55e-02 2.11e-02 -0.73 0.463
## D.NC -8.39e-03 1.92e-02 -0.44 0.662
## D.NH -2.39e-02 2.04e-02 -1.17 0.242
## D.NJ -8.72e-03 1.91e-02 -0.46 0.648
## D.NM -8.44e-03 2.18e-02 -0.39 0.699
## D.NV -6.26e-03 1.97e-02 -0.32 0.750
## D.NY -9.59e-03 1.90e-02 -0.51 0.613
## D.OH -8.32e-03 1.92e-02 -0.43 0.665
## D.OK -9.80e-03 1.96e-02 -0.50 0.617
## D.OR 2.40e-03 1.95e-02 0.12 0.902
## D.PA -1.18e-02 1.91e-02 -0.62 0.538
## D.RI -9.68e-03 1.99e-02 -0.49 0.627
## D.SC -6.45e-03 1.95e-02 -0.33 0.741
## D.SD -2.14e-02 2.32e-02 -0.92 0.355
## D.TX -1.95e-03 1.90e-02 -0.10 0.918
## D.UT -2.14e-02 2.07e-02 -1.03 0.301
## D.VA -5.32e-03 1.91e-02 -0.28 0.781
## D.VT 4.05e-04 2.34e-02 0.02 0.986
## D.WA -6.96e-03 1.92e-02 -0.36 0.717
## D.WI -9.09e-03 1.97e-02 -0.46 0.644
## D.WV -6.97e-03 1.98e-02 -0.35 0.726
## D.WY NA NA NA NA
## D.MORTGAGE -9.06e-04 1.31e-03 -0.69 0.489
## D.NONE NA NA NA NA
## D.OTHER -5.92e-03 1.97e-02 -0.30 0.764
## D.OWN 6.06e-04 2.12e-03 0.29 0.775
## D.RENT NA NA NA NA
## D.LT1year 1.08e-03 3.53e-03 0.31 0.760
## D.1year 2.36e-04 3.67e-03 0.06 0.949
## D.10PLUSyears -2.29e-05 3.30e-03 -0.01 0.994
## D.2years 1.42e-03 3.53e-03 0.40 0.687
## D.3years 6.52e-04 3.53e-03 0.18 0.854
## D.4years 1.36e-03 3.68e-03 0.37 0.712
## D.5years 1.17e-03 3.59e-03 0.33 0.745
## D.6years 1.95e-03 3.76e-03 0.52 0.605
## D.7years 8.29e-05 3.96e-03 0.02 0.983
## D.8years 2.00e-03 4.00e-03 0.50 0.618
## D.9years -4.04e-03 4.57e-03 -0.89 0.376
## D.na NA NA NA NA
## Loan.Amount 8.80e-07 4.71e-08 18.68 < 2e-16 ***
## Monthly.Income.Recip 1.18e+01 5.19e+00 2.27 0.023 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0188 on 1167 degrees of freedom
## Multiple R-squared: 0.812, Adjusted R-squared: 0.799
## F-statistic: 62.3 on 81 and 1167 DF, p-value: <2e-16
# start with a list of non-NA variables in the model ex the Intercept
starting.variable.list = names(coef(model.base)[!is.na(coef(model.base))])
starting.variable.list = starting.variable.list[!starting.variable.list == "(Intercept)"]
# variable values to search for we want to find the max values
r2max = 0
r2sig.level = 1
r2names = ""
r2maxRES = 0
r2maxF = 0
r2maxMSE = 0
Fmax = 0
Fsig.level = 1
Fnames = ""
Fmaxr2 = 0
FmaxRES = 0
FmaxMSE = 0
## we want to find the min values
RESmin = 10000
RESsig.level = 1
RESnames = ""
RESminr2 = 0
RESminF = 0
RESminMSE = 0
MSEmin = 10000
MSEsig.level = 1
MSEnames = ""
MSEminr2 = 0
MSEminF = 0
MSEminRES = 0
# for every significance level of the t-stat p-value from 100% to 1%
for (sig.level in seq(1, 0.01, by = -0.01)) {
# find the list to exclude
exclude.variable.list = lm_print.model.hi_p(model.base, echo = FALSE, ret = TRUE,
sig.level = sig.level)
if (length(exclude.variable.list) == 0)
next
# create the list ex the exclude.variable.list
shortened.variable.list = starting.variable.list[!starting.variable.list %in%
exclude.variable.list]
# create the data.frame to model
smaller.model.data.frame = model.data.frame[, shortened.variable.list]
# model that smaller data.frame on the training list subset
smaller.model.base = lm(loansData.complete$Interest.Rate ~ ., data = smaller.model.data.frame,
subset = train)
# find the variables of interest
if (summary(smaller.model.base)$sigma < RESmin) {
RESmin = summary(smaller.model.base)$sigma
RESsig.level = sig.level
RESnames = shortened.variable.list
RESminr2 = summary(smaller.model.base)$adj.r.squared
RESminF = summary(smaller.model.base)$fstatistic[1]
RESminMSE = mean((loansData.complete$Interest.Rate - predict(smaller.model.base,
smaller.model.data.frame))[-train]^2)
}
if (summary(smaller.model.base)$adj.r.squared > r2max) {
r2max = summary(smaller.model.base)$adj.r.squared
r2sig.level = sig.level
r2names = shortened.variable.list
r2maxRES = summary(smaller.model.base)$sigma
r2maxF = summary(smaller.model.base)$fstatistic[1]
r2maxMSE = mean((loansData.complete$Interest.Rate - predict(smaller.model.base,
smaller.model.data.frame))[-train]^2)
}
if (summary(smaller.model.base)$fstatistic[1] > Fmax) {
Fmax = summary(smaller.model.base)$fstatistic[1]
Fsig.level = sig.level
Fnames = shortened.variable.list
Fmaxr2 = summary(smaller.model.base)$adj.r.squared
FmaxRES = summary(smaller.model.base)$sigma
FmaxMSE = mean((loansData.complete$Interest.Rate - predict(smaller.model.base,
smaller.model.data.frame))[-train]^2)
}
if (mean((loansData.complete$Interest.Rate - predict(smaller.model.base,
smaller.model.data.frame))[-train]^2) < MSEmin) {
MSEmin = mean((loansData.complete$Interest.Rate - predict(smaller.model.base,
smaller.model.data.frame))[-train]^2)
MSEsig.level = sig.level
MSEnames = shortened.variable.list
MSEminr2 = summary(smaller.model.base)$adj.r.squared
MSEminF = summary(smaller.model.base)$fstatistic[1]
MSEminRES = summary(smaller.model.base)$sigma
}
}
winning.levels = c(r2max, r2sig.level, r2maxRES, r2maxF, r2maxMSE, Fmax, Fsig.level,
Fmaxr2, FmaxRES, FmaxMSE, RESmin, RESsig.level, RESminr2, RESminF, RESminMSE,
MSEmin, MSEsig.level, MSEminr2, MSEminF, MSEminRES)
names(winning.levels) = c("r2max", "r2sig.level", "r2maxRES", "r2maxF", "r2maxMSE",
"Fmax", "Fsig.level", "Fmaxr2", "FmaxRES", "FmaxMSE", "RESmin", "RESsig.level",
"RESminr2", "RESminF", "RESminMSE", "MSEmin", "MSEsig.level", "MSEminr2",
"MSEminF", "MSEminRES")
for (i in 1:len(winning.levels)) cat(names(winning.levels)[i], format(winning.levels[i],
digits = 4), "\n")
## r2max 0.8036
## r2sig.level 0.62
## r2maxRES 0.0186
## r2maxF 142.8
## r2maxMSE 0.0003851
## Fmax 798.2
## Fsig.level 0.01
## Fmaxr2 0.7931
## FmaxRES 0.01909
## FmaxMSE 0.0003629
## RESmin 0.0186
## RESsig.level 0.62
## RESminr2 0.8036
## RESminF 142.8
## RESminMSE 0.0003851
## MSEmin 0.0003619
## MSEsig.level 0.24
## MSEminr2 0.8002
## MSEminF 455.5
## MSEminRES 0.01876
# variables excluded
cat("\n-------- variables excluded (potential confounders)----------------\n")
##
## -------- variables excluded (potential confounders)----------------
starting.variable.list[!(starting.variable.list %in% r2names)]
## [1] "Revolving.CREDIT.Balance" "D.car"
## [3] "D.credit_card" "D.debt_consolidation"
## [5] "D.educational" "D.home_improvement"
## [7] "D.house" "D.medical"
## [9] "D.renewable_energy" "D.vacation"
## [11] "D.AK" "D.AL"
## [13] "D.AR" "D.AZ"
## [15] "D.CT" "D.FL"
## [17] "D.GA" "D.HI"
## [19] "D.KS" "D.MA"
## [21] "D.MD" "D.NC"
## [23] "D.NJ" "D.NM"
## [25] "D.NV" "D.OH"
## [27] "D.OR" "D.RI"
## [29] "D.SC" "D.TX"
## [31] "D.VA" "D.VT"
## [33] "D.WA" "D.WI"
## [35] "D.WV" "D.OTHER"
## [37] "D.OWN" "D.LT1year"
## [39] "D.1year" "D.10PLUSyears"
## [41] "D.2years" "D.3years"
## [43] "D.4years" "D.5years"
## [45] "D.7years"
# the model
reduced.model.data.frame = model.data.frame[, r2names]
reduced.model = lm(loansData.complete$Interest.Rate ~ ., data = reduced.model.data.frame,
subset = train)
# test the assumptions underlying the model
lm_assumptions_summary(reduced.model)
##
##
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
##
##
## Call:
## lm(formula = loansData.complete$Interest.Rate ~ ., data = reduced.model.data.frame,
## subset = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.06211 -0.01144 -0.00163 0.01071 0.08928
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.34e-01 4.12e-03 56.75 < 2e-16 ***
## Debt.To.Income.Ratio 9.78e-03 8.10e-03 1.21 0.2274
## Open.CREDIT.Lines -3.22e-04 1.39e-04 -2.32 0.0204 *
## FICO.numeric -9.00e-03 3.51e-04 -25.62 < 2e-16 ***
## FICO.numeric2 1.27e-04 9.62e-06 13.25 < 2e-16 ***
## Inquiries.in.the.Last.6.Months 3.09e-03 4.58e-04 6.75 2.3e-11 ***
## D.36months -3.26e-02 1.47e-03 -22.23 < 2e-16 ***
## D.major_purchase 3.56e-03 2.73e-03 1.30 0.1938
## D.moving 1.36e-02 4.74e-03 2.87 0.0042 **
## D.other 1.19e-02 1.93e-03 6.19 8.1e-10 ***
## D.small_business 1.31e-02 2.83e-03 4.64 3.9e-06 ***
## D.CA -3.55e-03 1.47e-03 -2.41 0.0161 *
## D.CO -4.65e-03 3.39e-03 -1.37 0.1707
## D.DC 1.89e-02 1.33e-02 1.42 0.1554
## D.DE -4.84e-03 9.37e-03 -0.52 0.6058
## D.IA -3.61e-02 1.87e-02 -1.93 0.0539 .
## D.IL -7.91e-03 2.75e-03 -2.87 0.0041 **
## D.IN -3.94e-02 1.87e-02 -2.11 0.0355 *
## D.KY -4.46e-03 4.62e-03 -0.97 0.3345
## D.LA -4.87e-03 6.66e-03 -0.73 0.4650
## D.MI -6.36e-03 3.99e-03 -1.59 0.1112
## D.MN -9.10e-03 4.16e-03 -2.19 0.0290 *
## D.MO -5.02e-03 4.14e-03 -1.21 0.2254
## D.MS 2.27e-02 1.88e-02 1.21 0.2274
## D.MT -9.70e-03 9.39e-03 -1.03 0.3020
## D.NH -1.83e-02 7.69e-03 -2.38 0.0173 *
## D.NY -3.49e-03 1.86e-03 -1.87 0.0611 .
## D.OK -3.91e-03 5.06e-03 -0.77 0.4394
## D.PA -5.74e-03 2.72e-03 -2.11 0.0352 *
## D.SD -1.57e-02 1.32e-02 -1.19 0.2353
## D.UT -1.54e-02 8.39e-03 -1.84 0.0660 .
## D.MORTGAGE -9.00e-04 1.17e-03 -0.77 0.4426
## D.6years 1.00e-03 2.18e-03 0.46 0.6451
## D.8years 8.93e-04 2.54e-03 0.35 0.7255
## D.9years -4.98e-03 3.33e-03 -1.50 0.1344
## Loan.Amount 8.88e-07 4.44e-08 20.01 < 2e-16 ***
## Monthly.Income.Recip 1.31e+01 4.89e+00 2.68 0.0076 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0186 on 1212 degrees of freedom
## Multiple R-squared: 0.809, Adjusted R-squared: 0.804
## F-statistic: 143 on 36 and 1212 DF, p-value: <2e-16
##
##
## ----------F, F p, Adj R^2------------------------------
## F_statistic F_statistic_p adjusted_R2
## 142.8192 0.0000 0.8036
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
##
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
## Estimate Std. Error t value Pr(>|t|)
## D.8years 0.0008935 0.002544 0.3512 0.72551
## D.6years 0.0010025 0.002176 0.4607 0.64509
## D.DE -0.0048361 0.009367 -0.5163 0.60576
## D.LA -0.0048660 0.006658 -0.7309 0.46500
## D.MORTGAGE -0.0009001 0.001172 -0.7681 0.44256
## D.OK -0.0039123 0.005058 -0.7735 0.43938
## D.KY -0.0044604 0.004620 -0.9654 0.33453
## D.MT -0.0096967 0.009391 -1.0325 0.30202
## D.SD -0.0157310 0.013249 -1.1873 0.23534
## Debt.To.Income.Ratio 0.0097812 0.008099 1.2077 0.22739
## D.MS 0.0226581 0.018760 1.2078 0.22737
## D.MO -0.0050229 0.004141 -1.2130 0.22536
## D.major_purchase 0.0035558 0.002735 1.3002 0.19378
## D.CO -0.0046504 0.003393 -1.3707 0.17071
## D.DC 0.0188610 0.013269 1.4215 0.15544
## D.9years -0.0049824 0.003326 -1.4979 0.13441
## D.MI -0.0063573 0.003988 -1.5942 0.11116
## D.UT -0.0154389 0.008389 -1.8403 0.06597
## D.NY -0.0034922 0.001863 -1.8742 0.06114
## D.IA -0.0361494 0.018736 -1.9295 0.05391
##
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
##
## ---------Heteroskedasticity-----------------------
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 65.6, df = 36, p-value = 0.001849
##
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
##
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
##
## -------Multicollinearity if GT 10---------------
## FICO.numeric FICO.numeric2
## 20.51 20.44
## [1] "Multicollinearity test generated an error"
##
## --------Mean Zero?-------------------------------
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -3.400 -0.631 -0.094 0.000 0.582 4.880 3
## Warning: not plotting observations with leverage one:
## 71, 644, 1028
## Warning: not plotting observations with leverage one:
## 71, 644, 1028
# model coefficients in descending order
lm_print.ordered.coef(reduced.model, ret = FALSE, echo = TRUE)
## Estimate Std. Error t value Pr(>|t|)
## Monthly.Income.Recip 1.307e+01 4.886e+00 2.6760 7.552e-03
## (Intercept) 2.338e-01 4.120e-03 56.7476 0.000e+00
## D.IN -3.943e-02 1.873e-02 -2.1055 3.545e-02
## D.IA -3.615e-02 1.874e-02 -1.9295 5.391e-02
## D.36months -3.264e-02 1.468e-03 -22.2282 4.336e-92
## D.MS 2.266e-02 1.876e-02 1.2078 2.274e-01
## D.DC 1.886e-02 1.327e-02 1.4215 1.554e-01
## D.NH -1.833e-02 7.689e-03 -2.3842 1.727e-02
## D.SD -1.573e-02 1.325e-02 -1.1873 2.353e-01
## D.UT -1.544e-02 8.389e-03 -1.8403 6.597e-02
## D.moving 1.360e-02 4.737e-03 2.8719 4.152e-03
## D.small_business 1.311e-02 2.827e-03 4.6371 3.917e-06
## D.other 1.194e-02 1.927e-03 6.1932 8.060e-10
## Debt.To.Income.Ratio 9.781e-03 8.099e-03 1.2077 2.274e-01
## D.MT -9.697e-03 9.391e-03 -1.0325 3.020e-01
## D.MN -9.095e-03 4.160e-03 -2.1866 2.896e-02
## FICO.numeric -8.997e-03 3.512e-04 -25.6213 4.693e-116
## D.IL -7.907e-03 2.753e-03 -2.8725 4.143e-03
## D.MI -6.357e-03 3.988e-03 -1.5942 1.112e-01
## D.PA -5.744e-03 2.725e-03 -2.1080 3.523e-02
## D.MO -5.023e-03 4.141e-03 -1.2130 2.254e-01
## D.9years -4.982e-03 3.326e-03 -1.4979 1.344e-01
## D.LA -4.866e-03 6.658e-03 -0.7309 4.650e-01
## D.DE -4.836e-03 9.367e-03 -0.5163 6.058e-01
## D.CO -4.650e-03 3.393e-03 -1.3707 1.707e-01
## D.KY -4.460e-03 4.620e-03 -0.9654 3.345e-01
## D.OK -3.912e-03 5.058e-03 -0.7735 4.394e-01
## D.major_purchase 3.556e-03 2.735e-03 1.3002 1.938e-01
## D.CA -3.552e-03 1.474e-03 -2.4101 1.610e-02
## D.NY -3.492e-03 1.863e-03 -1.8742 6.114e-02
## Inquiries.in.the.Last.6.Months 3.089e-03 4.576e-04 6.7503 2.282e-11
## D.6years 1.002e-03 2.176e-03 0.4607 6.451e-01
## D.MORTGAGE -9.001e-04 1.172e-03 -0.7681 4.426e-01
## D.8years 8.935e-04 2.544e-03 0.3512 7.255e-01
## Open.CREDIT.Lines -3.216e-04 1.385e-04 -2.3213 2.043e-02
## FICO.numeric2 1.275e-04 9.620e-06 13.2522 1.560e-37
## Loan.Amount 8.879e-07 4.437e-08 20.0124 3.308e-77
# print out the formula
lm_print.model.function(reduced.model, EY = "E(Interest.Rate)")
## [1] "E(Interest.Rate) = 0.234 + 0.01*Debt.To.Income.Ratio 0*Open.CREDIT.Lines -0.009*FICO.numeric + 0*FICO.numeric2 + 0.003*Inquiries.in.the.Last.6.Months -0.033*D.36months + 0.004*D.major_purchase + 0.014*D.moving + 0.012*D.other + 0.013*D.small_business -0.004*D.CA -0.005*D.CO + 0.019*D.DC -0.005*D.DE -0.036*D.IA -0.008*D.IL -0.039*D.IN -0.004*D.KY -0.005*D.LA -0.006*D.MI -0.009*D.MN -0.005*D.MO + 0.023*D.MS -0.01*D.MT -0.018*D.NH -0.003*D.NY -0.004*D.OK -0.006*D.PA -0.016*D.SD -0.015*D.UT -0.001*D.MORTGAGE + 0.001*D.6years + 0.001*D.8years -0.005*D.9years + 0*Loan.Amount + 13.074*Monthly.Income.Recip"
fico.model = lm(Interest.Rate ~ FICO.numeric + FICO.numeric2, data = loansData.complete)
# test the assumptions underlying the model
lm_assumptions_summary(fico.model)
##
##
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
##
##
## Call:
## lm(formula = Interest.Rate ~ FICO.numeric + FICO.numeric2, data = loansData.complete)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.07626 -0.02055 -0.00515 0.01699 0.10661
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.26e-01 2.93e-03 76.9 <2e-16 ***
## FICO.numeric -8.35e-03 3.59e-04 -23.3 <2e-16 ***
## FICO.numeric2 1.14e-04 9.68e-06 11.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0287 on 2495 degrees of freedom
## Multiple R-squared: 0.529, Adjusted R-squared: 0.529
## F-statistic: 1.4e+03 on 2 and 2495 DF, p-value: <2e-16
##
##
## ----------F, F p, Adj R^2------------------------------
## F_statistic F_statistic_p adjusted_R2
## 1402.6669 0.0000 0.5289
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
##
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
##
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
##
## ---------Heteroskedasticity-----------------------
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 33.76, df = 2, p-value = 4.675e-08
##
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
##
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
##
## -------Multicollinearity if GT 10---------------
## FICO.numeric FICO.numeric2
## 19.13 19.13
## [1] "Multicollinearity test generated an error"
##
## --------Mean Zero?-------------------------------
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -2.660 -0.717 -0.180 0.000 0.593 3.730
# model coefficients in descending order
lm_print.ordered.coef(fico.model, ret = FALSE, echo = TRUE)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2255477 2.933e-03 76.9 0.000e+00
## FICO.numeric -0.0083547 3.586e-04 -23.3 8.718e-109
## FICO.numeric2 0.0001143 9.685e-06 11.8 2.499e-31
# print out the formula
lm_print.model.function(fico.model, EY = "E(Interest.Rate)")
## [1] "E(Interest.Rate) = 0.226 -0.008*FICO.numeric + 0*FICO.numeric2"
lm_compare(fico.model, reduced.model)
## Residual Standard Error
## fico.model 0.02868
## reduced.model 0.0186
## Decreased: -0.01008
## reduced.model preferred
## Adjusted R^2
## fico.model 0.5289
## reduced.model 0.8036
## Increased: 0.2747
## reduced.model preferred
## F Statistic
## fico.model 1403
## reduced.model 142.8
## Decreased: -1260
## fico.model preferred
## F Statistic p-value
## fico.model 0
## reduced.model 0
## Unchanged: 0
##
## Coeffcient Statistics
## FICO.numeric abs(t stat)
## fico.model 23.3
## reduced.model 1.208
## Decreased: -22.09
## fico.model preferred
## FICO.numeric t stat p-value
## fico.model 8.718e-109
## reduced.model 0.2274
## Increased: 0.2274
(MSE.reduced = mean((loansData.complete$Interest.Rate - predict(reduced.model,
loansData.complete))[-train]^2))
## [1] 0.0003851
(MSE.fico = mean((loansData.complete$Interest.Rate - predict(fico.model, loansData.complete))[-train]^2))
## [1] 0.0007985
cat("\n\nThe big reduced model produces a lower MSE than the simple quadratic FICO model ",
MSE.reduced < MSE.fico, "\n", (MSE.fico - MSE.reduced)/MSE.fico * 100, "% lower")
##
##
## The big reduced model produces a lower MSE than the simple quadratic FICO model TRUE
## 51.77 % lower
# thanks, as always, to Winston Chang, his book, page 94 and his website
# http://www.cookbook-r.com/Graphs/
plot.data.frame = loansData.complete[seq(1, nrow(loansData.complete), by = 2),
]
line.data.frame = data.frame(FICO.numeric = seq(1, 40, by = 0.03125))
line.data.frame$prediction = rev(sort(fitted(reduced.model)))
elaborate.png = ggplot(data = plot.data.frame, aes(x = FICO.numeric, y = Interest.Rate)) +
geom_point(shape = 1) + geom_line(data = line.data.frame, aes(x = FICO.numeric,
y = prediction), color = "red") + scale_x_discrete(name = "FICO Score",
breaks = seq(2, 43, by = 4), labels = FICO.levels[seq(2, 43, by = 4)]) +
scale_y_continuous(name = "Interest Rate on a Loan (%)", labels = percent) +
ggtitle("Many Factors Affect the Interest Rate you pay\nElaborate Model: 14 factors in a Multiple Linear Regression") +
theme(plot.title = element_text(face = "bold"))
simple.png = ggplot(data = loansData.complete, aes(x = FICO.numeric, y = Interest.Rate)) +
geom_point(shape = 1) + geom_smooth(method = lm, formula = y ~ poly(x, 2),
se = FALSE, colour = "red") +
scale_x_discrete(name = "FICO Score", breaks = seq(2, 43, by = 4), labels = FICO.levels[seq(2,
43, by = 4)]) +
scale_y_continuous(name = "Interest Rate on a Loan (%)", labels = percent) +
ggtitle("How Your FICO Score affects the Interest Rate you pay\nSimple Model: only FICO Score considered") +
theme(plot.title = element_text(face = "bold"))
simple.png
elaborate.png
multiplot(simple.png, elaborate.png)
png("~/R/Data Analysis/Data Analysis Project 1/project_figures/finalfigure.png")
print(multiplot(simple.png, elaborate.png))
## NULL
dev.off()
## pdf
## 2
pdf("~/R/Data Analysis/Data Analysis Project 1/project_figures/finalfigure.pdf")
print(multiplot(simple.png, elaborate.png))
## NULL
dev.off()
## pdf
## 2
model.base.stepped = step(model.base, trace = FALSE)
# the result of step
summary(model.base.stepped)
##
## Call:
## lm(formula = loansData.complete$Interest.Rate ~ Open.CREDIT.Lines +
## FICO.numeric + FICO.numeric2 + Inquiries.in.the.Last.6.Months +
## D.36months + D.moving + D.other + D.small_business + D.CA +
## D.DC + D.IA + D.IL + D.IN + D.MI + D.MN + D.NH + D.NY + D.PA +
## D.UT + D.9years + Loan.Amount + Monthly.Income.Recip, data = model.data.frame,
## subset = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.06212 -0.01137 -0.00159 0.01068 0.08965
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.34e-01 3.99e-03 58.71 < 2e-16 ***
## Open.CREDIT.Lines -2.61e-04 1.27e-04 -2.06 0.0399 *
## FICO.numeric -9.02e-03 3.49e-04 -25.82 < 2e-16 ***
## FICO.numeric2 1.28e-04 9.57e-06 13.33 < 2e-16 ***
## Inquiries.in.the.Last.6.Months 3.04e-03 4.54e-04 6.68 3.6e-11 ***
## D.36months -3.28e-02 1.45e-03 -22.58 < 2e-16 ***
## D.moving 1.39e-02 4.71e-03 2.96 0.0032 **
## D.other 1.17e-02 1.91e-03 6.16 9.9e-10 ***
## D.small_business 1.27e-02 2.81e-03 4.52 6.9e-06 ***
## D.CA -2.83e-03 1.43e-03 -1.98 0.0476 *
## D.DC 2.00e-02 1.32e-02 1.51 0.1306
## D.IA -3.60e-02 1.87e-02 -1.92 0.0548 .
## D.IL -7.12e-03 2.73e-03 -2.61 0.0092 **
## D.IN -3.90e-02 1.87e-02 -2.08 0.0373 *
## D.MI -5.67e-03 3.97e-03 -1.43 0.1534
## D.MN -8.44e-03 4.15e-03 -2.04 0.0419 *
## D.NH -1.82e-02 7.66e-03 -2.38 0.0174 *
## D.NY -2.71e-03 1.81e-03 -1.50 0.1346
## D.PA -4.77e-03 2.70e-03 -1.77 0.0774 .
## D.UT -1.55e-02 8.37e-03 -1.86 0.0636 .
## D.9years -5.08e-03 3.31e-03 -1.53 0.1251
## Loan.Amount 8.74e-07 4.33e-08 20.18 < 2e-16 ***
## Monthly.Income.Recip 1.40e+01 4.69e+00 2.98 0.0030 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0186 on 1226 degrees of freedom
## Multiple R-squared: 0.807, Adjusted R-squared: 0.804
## F-statistic: 233 on 22 and 1226 DF, p-value: <2e-16
lm_print.model.hi_p(model.base.stepped, sig.level = 0.05, ret = FALSE, echo = TRUE)
## Estimate Std. Error t value Pr(>|t|)
## D.MI -0.005675 0.003972 -1.429 0.15335
## D.NY -0.002713 0.001812 -1.497 0.13456
## D.DC 0.019967 0.013199 1.513 0.13060
## D.9years -0.005080 0.003310 -1.535 0.12507
## D.PA -0.004766 0.002696 -1.768 0.07736
## D.UT -0.015543 0.008371 -1.857 0.06360
## D.IA -0.035963 0.018711 -1.922 0.05484
# my result
summary(reduced.model)
##
## Call:
## lm(formula = loansData.complete$Interest.Rate ~ ., data = reduced.model.data.frame,
## subset = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.06211 -0.01144 -0.00163 0.01071 0.08928
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.34e-01 4.12e-03 56.75 < 2e-16 ***
## Debt.To.Income.Ratio 9.78e-03 8.10e-03 1.21 0.2274
## Open.CREDIT.Lines -3.22e-04 1.39e-04 -2.32 0.0204 *
## FICO.numeric -9.00e-03 3.51e-04 -25.62 < 2e-16 ***
## FICO.numeric2 1.27e-04 9.62e-06 13.25 < 2e-16 ***
## Inquiries.in.the.Last.6.Months 3.09e-03 4.58e-04 6.75 2.3e-11 ***
## D.36months -3.26e-02 1.47e-03 -22.23 < 2e-16 ***
## D.major_purchase 3.56e-03 2.73e-03 1.30 0.1938
## D.moving 1.36e-02 4.74e-03 2.87 0.0042 **
## D.other 1.19e-02 1.93e-03 6.19 8.1e-10 ***
## D.small_business 1.31e-02 2.83e-03 4.64 3.9e-06 ***
## D.CA -3.55e-03 1.47e-03 -2.41 0.0161 *
## D.CO -4.65e-03 3.39e-03 -1.37 0.1707
## D.DC 1.89e-02 1.33e-02 1.42 0.1554
## D.DE -4.84e-03 9.37e-03 -0.52 0.6058
## D.IA -3.61e-02 1.87e-02 -1.93 0.0539 .
## D.IL -7.91e-03 2.75e-03 -2.87 0.0041 **
## D.IN -3.94e-02 1.87e-02 -2.11 0.0355 *
## D.KY -4.46e-03 4.62e-03 -0.97 0.3345
## D.LA -4.87e-03 6.66e-03 -0.73 0.4650
## D.MI -6.36e-03 3.99e-03 -1.59 0.1112
## D.MN -9.10e-03 4.16e-03 -2.19 0.0290 *
## D.MO -5.02e-03 4.14e-03 -1.21 0.2254
## D.MS 2.27e-02 1.88e-02 1.21 0.2274
## D.MT -9.70e-03 9.39e-03 -1.03 0.3020
## D.NH -1.83e-02 7.69e-03 -2.38 0.0173 *
## D.NY -3.49e-03 1.86e-03 -1.87 0.0611 .
## D.OK -3.91e-03 5.06e-03 -0.77 0.4394
## D.PA -5.74e-03 2.72e-03 -2.11 0.0352 *
## D.SD -1.57e-02 1.32e-02 -1.19 0.2353
## D.UT -1.54e-02 8.39e-03 -1.84 0.0660 .
## D.MORTGAGE -9.00e-04 1.17e-03 -0.77 0.4426
## D.6years 1.00e-03 2.18e-03 0.46 0.6451
## D.8years 8.93e-04 2.54e-03 0.35 0.7255
## D.9years -4.98e-03 3.33e-03 -1.50 0.1344
## Loan.Amount 8.88e-07 4.44e-08 20.01 < 2e-16 ***
## Monthly.Income.Recip 1.31e+01 4.89e+00 2.68 0.0076 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0186 on 1212 degrees of freedom
## Multiple R-squared: 0.809, Adjusted R-squared: 0.804
## F-statistic: 143 on 36 and 1212 DF, p-value: <2e-16
lm_print.model.hi_p(reduced.model, sig.level = 0.05, ret = FALSE, echo = TRUE)
## Estimate Std. Error t value Pr(>|t|)
## D.8years 0.0008935 0.002544 0.3512 0.72551
## D.6years 0.0010025 0.002176 0.4607 0.64509
## D.DE -0.0048361 0.009367 -0.5163 0.60576
## D.LA -0.0048660 0.006658 -0.7309 0.46500
## D.MORTGAGE -0.0009001 0.001172 -0.7681 0.44256
## D.OK -0.0039123 0.005058 -0.7735 0.43938
## D.KY -0.0044604 0.004620 -0.9654 0.33453
## D.MT -0.0096967 0.009391 -1.0325 0.30202
## D.SD -0.0157310 0.013249 -1.1873 0.23534
## Debt.To.Income.Ratio 0.0097812 0.008099 1.2077 0.22739
## D.MS 0.0226581 0.018760 1.2078 0.22737
## D.MO -0.0050229 0.004141 -1.2130 0.22536
## D.major_purchase 0.0035558 0.002735 1.3002 0.19378
## D.CO -0.0046504 0.003393 -1.3707 0.17071
## D.DC 0.0188610 0.013269 1.4215 0.15544
## D.9years -0.0049824 0.003326 -1.4979 0.13441
## D.MI -0.0063573 0.003988 -1.5942 0.11116
## D.UT -0.0154389 0.008389 -1.8403 0.06597
## D.NY -0.0034922 0.001863 -1.8742 0.06114
## D.IA -0.0361494 0.018736 -1.9295 0.05391
lm_compare(model.base.stepped, reduced.model)
## Residual Standard Error
## model.base.stepped 0.01859
## reduced.model 0.0186
## Increased: 6.904e-06
## model.base.stepped preferred
## Adjusted R^2
## model.base.stepped 0.8037
## reduced.model 0.8036
## Decreased: -0.0001458
## model.base.stepped preferred
## F Statistic
## model.base.stepped 233.3
## reduced.model 142.8
## Decreased: -90.46
## model.base.stepped preferred
## F Statistic p-value
## model.base.stepped 0
## reduced.model 0
## Unchanged: 0
##
## Coeffcient Statistics
## Open.CREDIT.Lines abs(t stat)
## model.base.stepped 2.057
## reduced.model 1.208
## Decreased: -0.849
## model.base.stepped preferred
## Open.CREDIT.Lines t stat p-value
## model.base.stepped 0.03993
## reduced.model 0.2274
## Increased: 0.1875
(MSE.reduced = mean((loansData.complete$Interest.Rate - predict(reduced.model,
loansData.complete))[-train]^2))
## [1] 0.0003851
(MSE.stepped = mean((loansData.complete$Interest.Rate - predict(model.base.stepped,
loansData.complete))[-train]^2))
## [1] 0.0003829
lm_assumptions_summary(model.base.stepped)
##
##
## NOTE: in addition to this analysis, look at scatter.smooth plots of the residuals vs the main variables individually to see if quadratic transforms may be required
##
##
## Call:
## lm(formula = loansData.complete$Interest.Rate ~ Open.CREDIT.Lines +
## FICO.numeric + FICO.numeric2 + Inquiries.in.the.Last.6.Months +
## D.36months + D.moving + D.other + D.small_business + D.CA +
## D.DC + D.IA + D.IL + D.IN + D.MI + D.MN + D.NH + D.NY + D.PA +
## D.UT + D.9years + Loan.Amount + Monthly.Income.Recip, data = model.data.frame,
## subset = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.06212 -0.01137 -0.00159 0.01068 0.08965
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.34e-01 3.99e-03 58.71 < 2e-16 ***
## Open.CREDIT.Lines -2.61e-04 1.27e-04 -2.06 0.0399 *
## FICO.numeric -9.02e-03 3.49e-04 -25.82 < 2e-16 ***
## FICO.numeric2 1.28e-04 9.57e-06 13.33 < 2e-16 ***
## Inquiries.in.the.Last.6.Months 3.04e-03 4.54e-04 6.68 3.6e-11 ***
## D.36months -3.28e-02 1.45e-03 -22.58 < 2e-16 ***
## D.moving 1.39e-02 4.71e-03 2.96 0.0032 **
## D.other 1.17e-02 1.91e-03 6.16 9.9e-10 ***
## D.small_business 1.27e-02 2.81e-03 4.52 6.9e-06 ***
## D.CA -2.83e-03 1.43e-03 -1.98 0.0476 *
## D.DC 2.00e-02 1.32e-02 1.51 0.1306
## D.IA -3.60e-02 1.87e-02 -1.92 0.0548 .
## D.IL -7.12e-03 2.73e-03 -2.61 0.0092 **
## D.IN -3.90e-02 1.87e-02 -2.08 0.0373 *
## D.MI -5.67e-03 3.97e-03 -1.43 0.1534
## D.MN -8.44e-03 4.15e-03 -2.04 0.0419 *
## D.NH -1.82e-02 7.66e-03 -2.38 0.0174 *
## D.NY -2.71e-03 1.81e-03 -1.50 0.1346
## D.PA -4.77e-03 2.70e-03 -1.77 0.0774 .
## D.UT -1.55e-02 8.37e-03 -1.86 0.0636 .
## D.9years -5.08e-03 3.31e-03 -1.53 0.1251
## Loan.Amount 8.74e-07 4.33e-08 20.18 < 2e-16 ***
## Monthly.Income.Recip 1.40e+01 4.69e+00 2.98 0.0030 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0186 on 1226 degrees of freedom
## Multiple R-squared: 0.807, Adjusted R-squared: 0.804
## F-statistic: 233 on 22 and 1226 DF, p-value: <2e-16
##
##
## ----------F, F p, Adj R^2------------------------------
## F_statistic F_statistic_p adjusted_R2
## 233.2823 0.0000 0.8037
## [1] "F statistic p-value <= 0.05 indicates at least one predictor is predictive"
##
## ---------p-values > 0.05------------------------------
## [1] "Below are listed, in descending order, the individual p-values > 0.05"
## Estimate Std. Error t value Pr(>|t|)
## D.MI -0.005675 0.003972 -1.429 0.15335
## D.NY -0.002713 0.001812 -1.497 0.13456
## D.DC 0.019967 0.013199 1.513 0.13060
## D.9years -0.005080 0.003310 -1.535 0.12507
## D.PA -0.004766 0.002696 -1.768 0.07736
## D.UT -0.015543 0.008371 -1.857 0.06360
## D.IA -0.035963 0.018711 -1.922 0.05484
##
## ---------Cook's Distance------------------------------
## [1] "Cook's Distances less than 0.5 indicate no outlying Y's or Leveraged X's"
##
## ---------Heteroskedasticity-----------------------
##
## studentized Breusch-Pagan test
##
## data: model
## BP = 58.47, df = 22, p-value = 3.752e-05
##
## [1] "Breusch-Pagan test indicates possible Heteroskedasticity"
##
## --------Autocorrelation--------------------------
## [1] "Autocorrelation not indicated"
##
## -------Multicollinearity if GT 10---------------
## FICO.numeric FICO.numeric2
## 20.29 20.24
## [1] "Multicollinearity test generated an error"
##
## --------Mean Zero?-------------------------------
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -3.400 -0.617 -0.098 0.000 0.579 4.900 2
## Warning: not plotting observations with leverage one:
## 71, 644
## Warning: not plotting observations with leverage one:
## 71, 644
print(str(.Platform))
## List of 8
## $ OS.type : chr "windows"
## $ file.sep : chr "/"
## $ dynlib.ext: chr ".dll"
## $ GUI : chr "RTerm"
## $ endian : chr "little"
## $ pkgType : chr "win.binary"
## $ path.sep : chr ";"
## $ r_arch : chr "x64"
## NULL
print(version)
## _
## platform x86_64-w64-mingw32
## arch x86_64
## os mingw32
## system x86_64, mingw32
## status
## major 3
## minor 0.2
## year 2013
## month 09
## day 25
## svn rev 63987
## language R
## version.string R version 3.0.2 (2013-09-25)
## nickname Frisbee Sailing
print(sessionInfo(), locale = FALSE)
## R version 3.0.2 (2013-09-25)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
##
## attached base packages:
## [1] splines grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] HH_2.3-42 multcomp_1.3-0 survival_2.37-4
## [4] mvtnorm_0.9-9996 latticeExtra_0.6-26 RColorBrewer_1.0-5
## [7] lattice_0.20-24 randomizeBE_0.3-1 lmtest_0.9-32
## [10] zoo_1.7-10 scales_0.2.3 ggplot2_0.9.3.1
## [13] knitr_1.5
##
## loaded via a namespace (and not attached):
## [1] colorspace_1.2-4 dichromat_2.0-0 digest_0.6.3 evaluate_0.5.1
## [5] formatR_0.10 gtable_0.1.2 labeling_0.2 leaps_2.9
## [9] MASS_7.3-29 munsell_0.4.2 plyr_1.8 proto_0.3-10
## [13] reshape_0.8.4 reshape2_1.2.2 sandwich_2.3-0 stringr_0.6.2
## [17] tools_3.0.2 vcd_1.3-1
print(Sys.time())
## [1] "2013-11-14 16:17:30 EST"