Dusty Turner
April 25, 2017
While model building, always monitor your four assumptions
setwd("//usmaedu/apollo/math/Userdirs/Turner/MA206x/Lessons/Block 4/Lesson 35 Assessing Model Adequacy II (Transformations)")
lsn36 = read.csv("cadet16.csv", header = TRUE)
attach(lsn36)head(lsn36)## X y x1 x2 x3 x4 x5
## 1 1 407.4573 108.1452 0.6826014 2.036272 2 -99.34688
## 2 2 403.3364 100.8490 0.7362477 1.956096 2 -101.17498
## 3 3 356.4369 116.8831 0.4324598 1.460949 1 -100.87674
## 4 4 343.8545 105.3687 0.4921432 1.361149 2 -100.02094
## 5 5 546.6986 104.5681 1.2583366 2.292721 1 -99.63886
## 6 6 481.4378 105.5678 1.0564692 2.102945 2 -98.38004
lsn36 = lsn36[,-1]
summary(lsn36)## y x1 x2 x3
## Min. : 238.7 Min. : 80.3 Min. :0.02071 Min. :1.003
## 1st Qu.: 372.0 1st Qu.:100.0 1st Qu.:0.49252 1st Qu.:1.546
## Median : 440.3 Median :105.1 Median :0.85012 Median :2.040
## Mean : 480.7 Mean :105.2 Mean :1.00342 Mean :2.022
## 3rd Qu.: 556.4 3rd Qu.:110.1 3rd Qu.:1.29954 3rd Qu.:2.514
## Max. :1367.6 Max. :131.7 Max. :4.74919 Max. :2.999
## x4 x5
## Min. :0.000 Min. :-107.22
## 1st Qu.:1.000 1st Qu.:-101.33
## Median :2.000 Median : -99.96
## Mean :1.501 Mean : -99.97
## 3rd Qu.:2.000 3rd Qu.: -98.64
## Max. :2.000 Max. : -93.16
library(GGally)
ggpairs(lsn36)mod.x1 = lm(y~x1)
summary(mod.x1)##
## Call:
## lm(formula = y ~ x1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -247.13 -108.49 -36.15 73.62 867.52
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 236.4076 73.1302 3.233 0.001266 **
## x1 2.3226 0.6938 3.347 0.000846 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 153.7 on 998 degrees of freedom
## Multiple R-squared: 0.0111, Adjusted R-squared: 0.01011
## F-statistic: 11.21 on 1 and 998 DF, p-value: 0.0008461
mod.x2 = lm(y~x2)
summary(mod.x2)##
## Call:
## lm(formula = y ~ x2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -100.870 -24.056 -2.174 23.581 135.764
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 261.135 1.822 143.3 <2e-16 ***
## x2 218.782 1.497 146.2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32.65 on 998 degrees of freedom
## Multiple R-squared: 0.9554, Adjusted R-squared: 0.9553
## F-statistic: 2.137e+04 on 1 and 998 DF, p-value: < 2.2e-16
mod.x3 = lm(y~x3)
summary(mod.x3)##
## Call:
## lm(formula = y ~ x3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -221.89 -111.69 -39.17 70.25 909.00
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 409.935 17.840 22.98 < 2e-16 ***
## x3 34.985 8.492 4.12 4.11e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 153.3 on 998 degrees of freedom
## Multiple R-squared: 0.01672, Adjusted R-squared: 0.01574
## F-statistic: 16.97 on 1 and 998 DF, p-value: 4.11e-05
mod.x4 = lm(y~as.factor(x4))
summary(mod.x4)##
## Call:
## lm(formula = y ~ as.factor(x4))
##
## Residuals:
## Min 1Q Median 3Q Max
## -241.98 -108.46 -37.63 73.80 886.90
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 506.53 18.87 26.842 <2e-16 ***
## as.factor(x4)1 -30.62 20.53 -1.492 0.136
## as.factor(x4)2 -25.85 19.95 -1.296 0.195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 154.5 on 997 degrees of freedom
## Multiple R-squared: 0.002227, Adjusted R-squared: 0.0002253
## F-statistic: 1.113 on 2 and 997 DF, p-value: 0.3291
mod.x5 = lm(y~x5)
summary(mod.x5)##
## Call:
## lm(formula = y ~ x5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -251.17 -108.95 -39.40 74.35 873.77
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 123.220 241.899 0.509 0.611
## x5 -3.575 2.419 -1.478 0.140
##
## Residual standard error: 154.4 on 998 degrees of freedom
## Multiple R-squared: 0.002184, Adjusted R-squared: 0.001184
## F-statistic: 2.184 on 1 and 998 DF, p-value: 0.1397
full.mod = lm(y~(x1+x2+x3+as.factor(x4)+x5)^2)
summary(full.mod)##
## Call:
## lm(formula = y ~ (x1 + x2 + x3 + as.factor(x4) + x5)^2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.3931 -3.6358 0.1204 3.7687 17.9787
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 530.871870 141.463356 3.753 0.000185 ***
## x1 -2.794896 1.264551 -2.210 0.027323 *
## x2 8.056210 12.740270 0.632 0.527311
## x3 35.894342 16.356107 2.195 0.028430 *
## as.factor(x4)1 -31.533878 37.086882 -0.850 0.395383
## as.factor(x4)2 -42.345294 36.176262 -1.171 0.242074
## x5 4.762028 1.399833 3.402 0.000696 ***
## x1:x2 2.040457 0.037334 54.655 < 2e-16 ***
## x1:x3 -0.057793 0.045983 -1.257 0.209109
## x1:as.factor(x4)1 -0.049636 0.101554 -0.489 0.625117
## x1:as.factor(x4)2 -0.001523 0.098004 -0.016 0.987601
## x1:x5 -0.039672 0.012472 -3.181 0.001514 **
## x2:x3 -0.391679 0.469339 -0.835 0.404184
## x2:as.factor(x4)1 0.883615 1.075839 0.821 0.411660
## x2:as.factor(x4)2 0.731151 1.039301 0.704 0.481910
## x2:x5 0.023918 0.118399 0.202 0.839947
## x3:as.factor(x4)1 -1.036078 1.308250 -0.792 0.428577
## x3:as.factor(x4)2 -0.252031 1.262359 -0.200 0.841795
## x3:x5 -0.111999 0.159492 -0.702 0.482707
## as.factor(x4)1:x5 -0.376500 0.356740 -1.055 0.291507
## as.factor(x4)2:x5 -0.421979 0.346067 -1.219 0.223003
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.648 on 979 degrees of freedom
## Multiple R-squared: 0.9987, Adjusted R-squared: 0.9987
## F-statistic: 3.732e+04 on 20 and 979 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = y ~ (x1 + x2 + x3 + as.factor(x4) + x5 + x1 * x2 +
## x1 * x3 + x1 * as.factor(x4) + x1 * x5 + x2 * x3 + x2 * as.factor(x4) +
## x3 * as.factor(x4) + x3 * x5 + as.factor(x4) * x5))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.4177 -3.6139 0.1201 3.7799 18.0037
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 533.329140 140.870440 3.786 0.000162 ***
## x1 -2.796134 1.263917 -2.212 0.027178 *
## x2 5.637983 4.359141 1.293 0.196188
## x3 35.907360 16.347974 2.196 0.028294 *
## as.factor(x4)1 -31.315712 37.053009 -0.845 0.398228
## as.factor(x4)2 -42.092802 36.136967 -1.165 0.244378
## x5 4.786621 1.393846 3.434 0.000619 ***
## x1:x2 2.040331 0.037310 54.686 < 2e-16 ***
## x1:x3 -0.057639 0.045954 -1.254 0.210040
## x1:as.factor(x4)1 -0.049800 0.101501 -0.491 0.623797
## x1:as.factor(x4)2 -0.001873 0.097941 -0.019 0.984745
## x1:x5 -0.039686 0.012465 -3.184 0.001500 **
## x2:x3 -0.374838 0.461649 -0.812 0.417015
## x2:as.factor(x4)1 0.885415 1.075275 0.823 0.410463
## x2:as.factor(x4)2 0.726116 1.038493 0.699 0.484592
## x3:as.factor(x4)1 -1.049851 1.305833 -0.804 0.421609
## x3:as.factor(x4)2 -0.265634 1.259945 -0.211 0.833064
## x3:x5 -0.111693 0.159407 -0.701 0.483669
## as.factor(x4)1:x5 -0.374712 0.356455 -1.051 0.293418
## as.factor(x4)2:x5 -0.420069 0.345769 -1.215 0.224703
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.645 on 980 degrees of freedom
## Multiple R-squared: 0.9987, Adjusted R-squared: 0.9987
## F-statistic: 3.933e+04 on 19 and 980 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = y ~ (x1 + x2 + x3 + as.factor(x4) + x5 + x1 * x2 +
## x1 * x3 + x1 * x5 + x2 * x3 + x2 * as.factor(x4) + x3 * as.factor(x4) +
## x3 * x5 + as.factor(x4) * x5))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.3510 -3.7314 0.1233 3.7924 18.0890
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 544.54306 138.36926 3.935 8.89e-05 ***
## x1 -2.90414 1.23687 -2.348 0.019074 *
## x2 5.71187 4.35473 1.312 0.189946
## x3 36.11253 16.33634 2.211 0.027296 *
## as.factor(x4)1 -35.37173 35.93952 -0.984 0.325259
## as.factor(x4)2 -41.86535 34.89978 -1.200 0.230589
## x5 4.88350 1.38188 3.534 0.000428 ***
## x1:x2 2.04009 0.03728 54.717 < 2e-16 ***
## x1:x3 -0.05979 0.04586 -1.304 0.192569
## x1:x5 -0.04062 0.01233 -3.294 0.001023 **
## x2:x3 -0.39208 0.46097 -0.851 0.395219
## x2:as.factor(x4)1 0.90091 1.07317 0.839 0.401406
## x2:as.factor(x4)2 0.71194 1.03627 0.687 0.492231
## x3:as.factor(x4)1 -1.01841 1.30077 -0.783 0.433860
## x3:as.factor(x4)2 -0.25473 1.25446 -0.203 0.839134
## x3:x5 -0.11186 0.15931 -0.702 0.482731
## as.factor(x4)1:x5 -0.36215 0.35570 -1.018 0.308869
## as.factor(x4)2:x5 -0.41580 0.34533 -1.204 0.228860
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.641 on 982 degrees of freedom
## Multiple R-squared: 0.9987, Adjusted R-squared: 0.9987
## F-statistic: 4.401e+04 on 17 and 982 DF, p-value: < 2.2e-16
3.remove x3*x4
##
## Call:
## lm(formula = y ~ (x1 + x2 + x3 + as.factor(x4) + x5 + x1 * x2 +
## x1 * x3 + x1 * x5 + x2 * x3 + x2 * as.factor(x4) + x3 * x5 +
## as.factor(x4) * x5))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.2661 -3.7261 0.0607 3.8993 18.3106
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 547.13160 138.28658 3.957 8.15e-05 ***
## x1 -2.89628 1.23651 -2.342 0.01936 *
## x2 5.83878 4.35228 1.342 0.18005
## x3 34.34425 16.18568 2.122 0.03410 *
## as.factor(x4)1 -37.87554 35.81522 -1.058 0.29053
## as.factor(x4)2 -41.94412 34.77820 -1.206 0.22809
## x5 4.89823 1.38143 3.546 0.00041 ***
## x1:x2 2.03861 0.03725 54.722 < 2e-16 ***
## x1:x3 -0.05903 0.04581 -1.288 0.19791
## x1:x5 -0.04054 0.01233 -3.288 0.00104 **
## x2:x3 -0.37233 0.46053 -0.808 0.41900
## x2:as.factor(x4)1 0.91538 1.07273 0.853 0.39369
## x2:as.factor(x4)2 0.70708 1.03590 0.683 0.49503
## x3:x5 -0.12350 0.15882 -0.778 0.43697
## as.factor(x4)1:x5 -0.36641 0.35559 -1.030 0.30307
## as.factor(x4)2:x5 -0.41141 0.34521 -1.192 0.23365
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.64 on 984 degrees of freedom
## Multiple R-squared: 0.9987, Adjusted R-squared: 0.9987
## F-statistic: 4.99e+04 on 15 and 984 DF, p-value: < 2.2e-16
4.remove x2*x3
##
## Call:
## lm(formula = y ~ (x1 + x2 + x3 + as.factor(x4) + x5 + x1 * x2 +
## x1 * x3 + x1 * x5 + x2 * as.factor(x4) + x3 * x5 + as.factor(x4) *
## x5))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.2985 -3.7530 0.0978 3.8673 18.4095
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 547.90882 138.25893 3.963 7.94e-05 ***
## x1 -2.92455 1.23580 -2.367 0.018148 *
## x2 4.56079 4.05437 1.125 0.260902
## x3 34.56488 16.18054 2.136 0.032909 *
## as.factor(x4)1 -36.32265 35.75739 -1.016 0.309970
## as.factor(x4)2 -40.48493 34.72523 -1.166 0.243951
## x5 4.89350 1.38118 3.543 0.000414 ***
## x1:x2 2.04314 0.03682 55.487 < 2e-16 ***
## x1:x3 -0.05900 0.04581 -1.288 0.198043
## x1:x5 -0.04077 0.01232 -3.309 0.000970 ***
## x2:as.factor(x4)1 0.97040 1.07038 0.907 0.364846
## x2:as.factor(x4)2 0.76183 1.03350 0.737 0.461216
## x3:x5 -0.11755 0.15862 -0.741 0.458811
## as.factor(x4)1:x5 -0.35048 0.35498 -0.987 0.323724
## as.factor(x4)2:x5 -0.39629 0.34465 -1.150 0.250479
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.639 on 985 degrees of freedom
## Multiple R-squared: 0.9987, Adjusted R-squared: 0.9987
## F-statistic: 5.348e+04 on 14 and 985 DF, p-value: < 2.2e-16
5.remove x2*x4
##
## Call:
## lm(formula = y ~ (x1 + x2 + x3 + as.factor(x4) + x5 + x1 * x2 +
## x1 * x3 + x1 * x5 + x3 * x5 + as.factor(x4) * x5))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.3851 -3.7656 0.0504 3.8684 18.4020
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 544.50618 138.12098 3.942 8.64e-05 ***
## x1 -2.91892 1.23478 -2.364 0.018275 *
## x2 5.61552 3.86277 1.454 0.146332
## x3 33.77689 16.14753 2.092 0.036714 *
## as.factor(x4)1 -31.90684 35.36297 -0.902 0.367135
## as.factor(x4)2 -36.19666 34.35465 -1.054 0.292316
## x5 4.86807 1.37991 3.528 0.000438 ***
## x1:x2 2.04054 0.03669 55.617 < 2e-16 ***
## x1:x3 -0.05814 0.04574 -1.271 0.203966
## x1:x5 -0.04072 0.01231 -3.307 0.000976 ***
## x3:x5 -0.12446 0.15832 -0.786 0.431976
## as.factor(x4)1:x5 -0.31701 0.35250 -0.899 0.368709
## as.factor(x4)2:x5 -0.36205 0.34230 -1.058 0.290450
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.636 on 987 degrees of freedom
## Multiple R-squared: 0.9987, Adjusted R-squared: 0.9987
## F-statistic: 6.247e+04 on 12 and 987 DF, p-value: < 2.2e-16
6.remove x3*x5
##
## Call:
## lm(formula = y ~ (x1 + x2 + x3 + as.factor(x4) + x5 + x1 * x2 +
## x1 * x3 + x1 * x5 + as.factor(x4) * x5))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.3649 -3.7606 0.0664 3.8277 18.7300
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 518.64992 134.12189 3.867 0.000117 ***
## x1 -2.92859 1.23448 -2.372 0.017867 *
## x2 5.69140 3.86082 1.474 0.140762
## x3 45.89739 4.79853 9.565 < 2e-16 ***
## as.factor(x4)1 -29.65644 35.24010 -0.842 0.400241
## as.factor(x4)2 -34.63480 34.29053 -1.010 0.312723
## x5 4.60305 1.33784 3.441 0.000605 ***
## x1:x2 2.03972 0.03667 55.628 < 2e-16 ***
## x1:x3 -0.05501 0.04555 -1.208 0.227473
## x1:x5 -0.04075 0.01231 -3.311 0.000964 ***
## as.factor(x4)1:x5 -0.29449 0.35127 -0.838 0.402031
## as.factor(x4)2:x5 -0.34638 0.34165 -1.014 0.310906
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.634 on 988 degrees of freedom
## Multiple R-squared: 0.9987, Adjusted R-squared: 0.9987
## F-statistic: 6.818e+04 on 11 and 988 DF, p-value: < 2.2e-16
7.remove x4*x5
##
## Call:
## lm(formula = y ~ (x1 + x2 + x3 + as.factor(x4) + x5 + x1 * x2 +
## x1 * x3 + x1 * x5))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.3505 -3.7964 0.0813 3.8120 18.8757
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 487.74797 130.49176 3.738 0.000196 ***
## x1 -2.92156 1.23323 -2.369 0.018026 *
## x2 5.64258 3.85771 1.463 0.143874
## x3 45.89092 4.79617 9.568 < 2e-16 ***
## as.factor(x4)1 -0.08795 0.75128 -0.117 0.906829
## as.factor(x4)2 0.12132 0.72904 0.166 0.867866
## x5 4.29407 1.30153 3.299 0.001004 **
## x1:x2 2.04035 0.03663 55.696 < 2e-16 ***
## x1:x3 -0.05500 0.04553 -1.208 0.227322
## x1:x5 -0.04067 0.01230 -3.308 0.000974 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.632 on 990 degrees of freedom
## Multiple R-squared: 0.9987, Adjusted R-squared: 0.9987
## F-statistic: 8.341e+04 on 9 and 990 DF, p-value: < 2.2e-16
8.remove x4
##
## Call:
## lm(formula = y ~ (x1 + x2 + x3 + x5 + x1 * x2 + x1 * x3 + x1 *
## x5))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.4713 -3.7879 0.0671 3.8169 18.9612
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 483.40821 130.13689 3.715 0.000215 ***
## x1 -2.88133 1.23004 -2.342 0.019354 *
## x2 5.56036 3.85042 1.444 0.149029
## x3 45.92148 4.79130 9.584 < 2e-16 ***
## x5 4.25025 1.29800 3.274 0.001095 **
## x1:x2 2.04113 0.03656 55.828 < 2e-16 ***
## x1:x3 -0.05528 0.04549 -1.215 0.224527
## x1:x5 -0.04027 0.01226 -3.284 0.001061 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.627 on 992 degrees of freedom
## Multiple R-squared: 0.9987, Adjusted R-squared: 0.9987
## F-statistic: 1.074e+05 on 7 and 992 DF, p-value: < 2.2e-16
9.remove x1*x3
##
## Call:
## lm(formula = y ~ (x1 + x2 + x3 + x5 + x1 * x2 + x1 * x5))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.4956 -3.7021 0.0265 3.8025 18.7154
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 492.44574 129.95546 3.789 0.00016 ***
## x1 -2.97051 1.22815 -2.419 0.01576 *
## x2 5.48855 3.85089 1.425 0.15439
## x3 40.11084 0.31261 128.310 < 2e-16 ***
## x5 4.22101 1.29809 3.252 0.00119 **
## x1:x2 2.04184 0.03657 55.841 < 2e-16 ***
## x1:x5 -0.04002 0.01226 -3.263 0.00114 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.628 on 993 degrees of freedom
## Multiple R-squared: 0.9987, Adjusted R-squared: 0.9987
## F-statistic: 1.253e+05 on 6 and 993 DF, p-value: < 2.2e-16
library(car)
residualPlots(full.mod.final, type = "rstandard",layout = c(1,1) ,test = FALSE)qqPlot(full.mod.final)full.mod.final.1 = lm(y~(x1+x2+poly(x3,2)+x5+x1*x2+x1*x5))
summary(full.mod.final.1)##
## Call:
## lm(formula = y ~ (x1 + x2 + poly(x3, 2) + x5 + x1 * x2 + x1 *
## x5))
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.3960 -3.2202 -0.0167 3.0723 14.6984
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 434.19265 111.11736 3.908 9.96e-05 ***
## x1 -1.61351 1.05015 -1.536 0.1247
## x2 7.80546 3.28759 2.374 0.0178 *
## poly(x3, 2)1 723.63582 4.81312 150.347 < 2e-16 ***
## poly(x3, 2)2 92.94391 4.81725 19.294 < 2e-16 ***
## x5 2.85703 1.10972 2.575 0.0102 *
## x1:x2 2.01997 0.03122 64.709 < 2e-16 ***
## x1:x5 -0.02673 0.01049 -2.549 0.0109 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.802 on 992 degrees of freedom
## Multiple R-squared: 0.999, Adjusted R-squared: 0.999
## F-statistic: 1.476e+05 on 7 and 992 DF, p-value: < 2.2e-16
residualPlots(full.mod.final.1, type = "rstandard",layout = c(1,1) ,test = FALSE)qqPlot(full.mod.final.1)Not too bad…
alternate.model = lm(y~x1+x2+x3)
summary(alternate.model)##
## Call:
## lm(formula = y ~ x1 + x2 + x3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -68.980 -5.853 -0.107 5.979 73.821
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -143.07792 5.69878 -25.11 <2e-16 ***
## x1 3.06499 0.05173 59.25 <2e-16 ***
## x2 220.02667 0.52479 419.27 <2e-16 ***
## x3 39.88070 0.63476 62.83 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.44 on 996 degrees of freedom
## Multiple R-squared: 0.9945, Adjusted R-squared: 0.9945
## F-statistic: 6.036e+04 on 3 and 996 DF, p-value: < 2.2e-16
…how about our assumptions
residualPlots(alternate.model, type = "rstandard",layout = c(1,1) ,test = FALSE)qqPlot(alternate.model)alternate.model.trans = lm(y~x1+x2+poly(x3,2))
summary(alternate.model.trans)##
## Call:
## lm(formula = y ~ x1 + x2 + poly(x3, 2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -67.282 -5.556 0.339 5.404 75.511
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -63.13755 5.26038 -12.002 <2e-16 ***
## x1 3.07136 0.04955 61.990 <2e-16 ***
## x2 220.04566 0.50259 437.819 <2e-16 ***
## poly(x3, 2)1 719.78684 10.97089 65.609 <2e-16 ***
## poly(x3, 2)2 104.50250 10.95938 9.535 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.96 on 995 degrees of freedom
## Multiple R-squared: 0.995, Adjusted R-squared: 0.995
## F-statistic: 4.938e+04 on 4 and 995 DF, p-value: < 2.2e-16
and now look at our assumptions
residualPlots(alternate.model.trans, type = "rstandard",layout = c(1,1) ,test = FALSE)qqPlot(alternate.model.trans)Lets say we needed to pick between these two models? Which is best?
summary(full.mod.final.1)##
## Call:
## lm(formula = y ~ (x1 + x2 + poly(x3, 2) + x5 + x1 * x2 + x1 *
## x5))
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.3960 -3.2202 -0.0167 3.0723 14.6984
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 434.19265 111.11736 3.908 9.96e-05 ***
## x1 -1.61351 1.05015 -1.536 0.1247
## x2 7.80546 3.28759 2.374 0.0178 *
## poly(x3, 2)1 723.63582 4.81312 150.347 < 2e-16 ***
## poly(x3, 2)2 92.94391 4.81725 19.294 < 2e-16 ***
## x5 2.85703 1.10972 2.575 0.0102 *
## x1:x2 2.01997 0.03122 64.709 < 2e-16 ***
## x1:x5 -0.02673 0.01049 -2.549 0.0109 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.802 on 992 degrees of freedom
## Multiple R-squared: 0.999, Adjusted R-squared: 0.999
## F-statistic: 1.476e+05 on 7 and 992 DF, p-value: < 2.2e-16
summary(alternate.model.trans)##
## Call:
## lm(formula = y ~ x1 + x2 + poly(x3, 2))
##
## Residuals:
## Min 1Q Median 3Q Max
## -67.282 -5.556 0.339 5.404 75.511
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -63.13755 5.26038 -12.002 <2e-16 ***
## x1 3.07136 0.04955 61.990 <2e-16 ***
## x2 220.04566 0.50259 437.819 <2e-16 ***
## poly(x3, 2)1 719.78684 10.97089 65.609 <2e-16 ***
## poly(x3, 2)2 104.50250 10.95938 9.535 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.96 on 995 degrees of freedom
## Multiple R-squared: 0.995, Adjusted R-squared: 0.995
## F-statistic: 4.938e+04 on 4 and 995 DF, p-value: < 2.2e-16
criteria =
matrix(c(
summary(full.mod.final.1)$r.squared,
summary(alternate.model.trans)$r.squared,
summary(full.mod.final.1)$adj.r.squared,
summary(alternate.model.trans)$adj.r.squared,
AIC(full.mod.final.1),
AIC(alternate.model.trans),
BIC(full.mod.final.1),
BIC(alternate.model.trans)
),
nrow = 4,
byrow = TRUE
)
rownames(criteria) = c("R^2", "Adj R^2", "AIC", "BIC")
colnames(criteria) = c("1st Model", "2nd Model")
print(criteria)## 1st Model 2nd Model
## R^2 0.9990406 0.9949881
## Adj R^2 0.9990338 0.9949679
## AIC 5985.7889607 7633.0624780
## BIC 6029.9587582 7662.5090097