CHAPTER 7 OUTPUT

This shows the outputs from Chapter 7 using R. The textbook is “A Modern Approach to Regression with R” by Simon J. Sheather (2008). The R code provided with the book has been updated.

bridge <- read_excel("MARData.xlsx", sheet = "bridge")
attach(bridge)
head(bridge)
## # A tibble: 6 x 7
##    Case  Time DArea CCost  Dwgs Length Spans
##   <dbl> <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl>
## 1     1  78.8  3.6   82.4     6     90     1
## 2     2 310.   5.33 422.     12    126     2
## 3     3 184.   6.29 180.      9     78     1
## 4     4  69.6  2.2  100       5     60     1
## 5     5  68.8  1.44 103       5     60     1
## 6     6  95.7  5.4  134.      5     60     1
#Figure 7.1 on page 235
m1 <- lm(log(Time)~log(DArea)+log(CCost)+log(Dwgs)+log(Length)+log(Spans))
logDArea <- log(DArea)
logCCost <- log(CCost)
logDwgs <- log(Dwgs)
logLength <- log(Length)
logSpans <- log(Spans)
X <- cbind(logDArea,logCCost,logDwgs,logLength,logSpans)
#install.packages("leaps")
library(leaps)
b <- regsubsets(as.matrix(X),log(Time))
rs <- summary(b)
par(mfrow = c(1,2))
plot(1:5,rs$adjr2,xlab = "Subset Size",ylab = "Adjusted R-squared")
library(car)
##subsets(b,statistic = c("adjr2"))

#Table 7.1 on page 235
#Calculate adjusted R-squared
rs$adjr2
## [1] 0.7022401 0.7530191 0.7582178 0.7534273 0.7475037
om1 <- lm(log(Time)~log(Dwgs))
om2 <- lm(log(Time)~log(Dwgs)+log(Spans))
om3 <- lm(log(Time)~log(Dwgs)+log(Spans)+log(CCost))
om4 <- lm(log(Time)~log(Dwgs)+log(Spans)+log(CCost)+log(DArea))
om5 <- m1

#Subset size = 1
n <- length(om1$residuals)
npar <- length(om1$coefficients) + 1
#Calculate AIC
extractAIC(om1,k = 2)
## [1]   2.00000 -94.89754
#Calculate AICc
extractAIC(om1,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]   2.585366 -94.312171
#Calculate BIC
extractAIC(om1,k = log(n))
## [1]   2.00000 -91.28421
#Subset size = 2
npar <- length(om2$coefficients) +1
#Calculate AIC
extractAIC(om2,k = 2)
## [1]    3.0000 -102.3703
#Calculate AICc
extractAIC(om2,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]    4.0000 -101.3703
#Calculate BIC
extractAIC(om2,k = log(n))
## [1]   3.00000 -96.95036
#Subset size = 3
npar <- length(om3$coefficients) +1
#Calculate AIC
extractAIC(om3,k = 2)
## [1]    4.0000 -102.4121
#Calculate AICc
extractAIC(om3,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]    5.538462 -100.873608
#Calculate BIC
extractAIC(om3,k = log(n))
## [1]   4.00000 -95.18542
#Subset size = 4
npar <- length(om4$coefficients) +1
#Calculate AIC
extractAIC(om4,k = 2)
## [1]    5.0000 -100.6403
#Calculate AICc
extractAIC(om4,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]   7.210526 -98.429816
#Calculate BIC
extractAIC(om4,k = log(n))
## [1]   5.00000 -91.60703
#Subset size = 5
npar <- length(om5$coefficients) +1
#Calculate AIC
extractAIC(om5,k = 2)
## [1]   6.00000 -98.71136
#Calculate AICc
extractAIC(om5,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]   9.027027 -95.684330
#Calculate BIC
extractAIC(om5,k = log(n))
## [1]   6.00000 -87.87138
#Regression output on pages 235 and 236
summary(om2)
## 
## Call:
## lm(formula = log(Time) ~ log(Dwgs) + log(Spans))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.68649 -0.24728 -0.05988  0.26050  0.63759 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.66173    0.26871   9.905 1.49e-12 ***
## log(Dwgs)    1.04163    0.15420   6.755 3.26e-08 ***
## log(Spans)   0.28530    0.09095   3.137  0.00312 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3105 on 42 degrees of freedom
## Multiple R-squared:  0.7642, Adjusted R-squared:  0.753 
## F-statistic: 68.08 on 2 and 42 DF,  p-value: 6.632e-14
summary(om3)
## 
## Call:
## lm(formula = log(Time) ~ log(Dwgs) + log(Spans) + log(CCost))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69415 -0.17456 -0.03566  0.22739  0.64945 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.3317     0.3577   6.519  7.9e-08 ***
## log(Dwgs)     0.8356     0.2135   3.914 0.000336 ***
## log(Spans)    0.1963     0.1107   1.773 0.083710 .  
## log(CCost)    0.1483     0.1075   1.380 0.175212    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3072 on 41 degrees of freedom
## Multiple R-squared:  0.7747, Adjusted R-squared:  0.7582 
## F-statistic: 46.99 on 3 and 41 DF,  p-value: 2.484e-13
#Output from R on page 237
backAIC <- step(m1,direction = "backward", data = bridge)
## Start:  AIC=-98.71
## log(Time) ~ log(DArea) + log(CCost) + log(Dwgs) + log(Length) + 
##     log(Spans)
## 
##               Df Sum of Sq    RSS      AIC
## - log(Length)  1   0.00607 3.8497 -100.640
## - log(DArea)   1   0.01278 3.8564 -100.562
## <none>                     3.8436  -98.711
## - log(CCost)   1   0.18162 4.0252  -98.634
## - log(Spans)   1   0.26616 4.1098  -97.698
## - log(Dwgs)    1   1.45358 5.2972  -86.277
## 
## Step:  AIC=-100.64
## log(Time) ~ log(DArea) + log(CCost) + log(Dwgs) + log(Spans)
## 
##              Df Sum of Sq    RSS      AIC
## - log(DArea)  1   0.01958 3.8693 -102.412
## <none>                    3.8497 -100.640
## - log(CCost)  1   0.18064 4.0303 -100.577
## - log(Spans)  1   0.31501 4.1647  -99.101
## - log(Dwgs)   1   1.44946 5.2991  -88.260
## 
## Step:  AIC=-102.41
## log(Time) ~ log(CCost) + log(Dwgs) + log(Spans)
## 
##              Df Sum of Sq    RSS      AIC
## <none>                    3.8693 -102.412
## - log(CCost)  1   0.17960 4.0488 -102.370
## - log(Spans)  1   0.29656 4.1658 -101.089
## - log(Dwgs)   1   1.44544 5.3147  -90.128
backBIC <- step(m1,direction = "backward", data = bridge, k = log(n))
## Start:  AIC=-87.87
## log(Time) ~ log(DArea) + log(CCost) + log(Dwgs) + log(Length) + 
##     log(Spans)
## 
##               Df Sum of Sq    RSS     AIC
## - log(Length)  1   0.00607 3.8497 -91.607
## - log(DArea)   1   0.01278 3.8564 -91.529
## - log(CCost)   1   0.18162 4.0252 -89.600
## - log(Spans)   1   0.26616 4.1098 -88.665
## <none>                     3.8436 -87.871
## - log(Dwgs)    1   1.45358 5.2972 -77.244
## 
## Step:  AIC=-91.61
## log(Time) ~ log(DArea) + log(CCost) + log(Dwgs) + log(Spans)
## 
##              Df Sum of Sq    RSS     AIC
## - log(DArea)  1   0.01958 3.8693 -95.185
## - log(CCost)  1   0.18064 4.0303 -93.350
## - log(Spans)  1   0.31501 4.1647 -91.874
## <none>                    3.8497 -91.607
## - log(Dwgs)   1   1.44946 5.2991 -81.034
## 
## Step:  AIC=-95.19
## log(Time) ~ log(CCost) + log(Dwgs) + log(Spans)
## 
##              Df Sum of Sq    RSS     AIC
## - log(CCost)  1   0.17960 4.0488 -96.950
## - log(Spans)  1   0.29656 4.1658 -95.669
## <none>                    3.8693 -95.185
## - log(Dwgs)   1   1.44544 5.3147 -84.708
## 
## Step:  AIC=-96.95
## log(Time) ~ log(Dwgs) + log(Spans)
## 
##              Df Sum of Sq    RSS     AIC
## <none>                    4.0488 -96.950
## - log(Spans)  1    0.9487 4.9975 -91.284
## - log(Dwgs)   1    4.3989 8.4478 -67.661
#Output from R on page 238
mint <- lm(log(Time)~1,data = bridge)
forwardAIC <- step(mint,scope = list(lower = ~1, 
upper = ~log(DArea)+log(CCost)+log(Dwgs)+log(Length)+log(Spans)),
direction = "forward", data = bridge)
## Start:  AIC=-41.35
## log(Time) ~ 1
## 
##               Df Sum of Sq     RSS     AIC
## + log(Dwgs)    1   12.1765  4.9975 -94.898
## + log(CCost)   1   11.6147  5.5593 -90.104
## + log(DArea)   1   10.2943  6.8797 -80.514
## + log(Length)  1   10.0120  7.1620 -78.704
## + log(Spans)   1    8.7262  8.4478 -71.274
## <none>                     17.1740 -41.347
## 
## Step:  AIC=-94.9
## log(Time) ~ log(Dwgs)
## 
##               Df Sum of Sq    RSS      AIC
## + log(Spans)   1   0.94866 4.0488 -102.370
## + log(CCost)   1   0.83170 4.1658 -101.089
## + log(Length)  1   0.66914 4.3284  -99.366
## + log(DArea)   1   0.47568 4.5218  -97.399
## <none>                     4.9975  -94.898
## 
## Step:  AIC=-102.37
## log(Time) ~ log(Dwgs) + log(Spans)
## 
##               Df Sum of Sq    RSS     AIC
## + log(CCost)   1  0.179598 3.8693 -102.41
## <none>                     4.0488 -102.37
## + log(DArea)   1  0.018535 4.0303 -100.58
## + log(Length)  1  0.016924 4.0319 -100.56
## 
## Step:  AIC=-102.41
## log(Time) ~ log(Dwgs) + log(Spans) + log(CCost)
## 
##               Df Sum of Sq    RSS     AIC
## <none>                     3.8693 -102.41
## + log(DArea)   1  0.019578 3.8497 -100.64
## + log(Length)  1  0.012868 3.8564 -100.56
forwardBIC <- step(mint,scope = list(lower = ~1, 
upper = ~log(DArea)+log(CCost)+log(Dwgs)+log(Length)+log(Spans)),
direction = "forward", data = bridge,k = log(n))
## Start:  AIC=-39.54
## log(Time) ~ 1
## 
##               Df Sum of Sq     RSS     AIC
## + log(Dwgs)    1   12.1765  4.9975 -91.284
## + log(CCost)   1   11.6147  5.5593 -86.490
## + log(DArea)   1   10.2943  6.8797 -76.901
## + log(Length)  1   10.0120  7.1620 -75.091
## + log(Spans)   1    8.7262  8.4478 -67.661
## <none>                     17.1740 -39.540
## 
## Step:  AIC=-91.28
## log(Time) ~ log(Dwgs)
## 
##               Df Sum of Sq    RSS     AIC
## + log(Spans)   1   0.94866 4.0488 -96.950
## + log(CCost)   1   0.83170 4.1658 -95.669
## + log(Length)  1   0.66914 4.3284 -93.946
## + log(DArea)   1   0.47568 4.5218 -91.979
## <none>                     4.9975 -91.284
## 
## Step:  AIC=-96.95
## log(Time) ~ log(Dwgs) + log(Spans)
## 
##               Df Sum of Sq    RSS     AIC
## <none>                     4.0488 -96.950
## + log(CCost)   1  0.179598 3.8693 -95.185
## + log(DArea)   1  0.018535 4.0303 -93.350
## + log(Length)  1  0.016924 4.0319 -93.332
detach(bridge)
prostateTraining <- read_excel("MARData.xlsx", sheet = "prostateTraining")
attach(prostateTraining)
head(prostateTraining)
## # A tibble: 6 x 10
##   original_case lcavol lweight   age  lbph   svi   lcp gleason pgg45   lpsa
##           <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl>  <dbl>
## 1             1 -0.580    2.77    50 -1.39     0 -1.39       6     0 -0.431
## 2             2 -0.994    3.32    58 -1.39     0 -1.39       6     0 -0.163
## 3             3 -0.511    2.69    74 -1.39     0 -1.39       7    20 -0.163
## 4             4 -1.20     3.28    58 -1.39     0 -1.39       6     0 -0.163
## 5             5  0.751    3.43    62 -1.39     0 -1.39       6     0  0.372
## 6             6 -1.05     3.23    50 -1.39     0 -1.39       6     0  0.765
#Figure 7.2 on page 240
pairs(lpsa~lcavol+lweight+age+lbph+svi+lcp+gleason+pgg45)

#Figure 7.3 on page 241
m1 <- lm(lpsa~lcavol+lweight+age+lbph+svi+lcp+gleason+pgg45)
StanRes1 <- rstandard(m1)
par(mfrow = c(3,3))
plot(lcavol,StanRes1, ylab = "Standardized Residuals")
plot(lweight,StanRes1, ylab = "Standardized Residuals")
plot(age,StanRes1, ylab = "Standardized Residuals")
plot(lbph,StanRes1, ylab = "Standardized Residuals")
plot(svi,StanRes1, ylab = "Standardized Residuals")
plot(lcp,StanRes1, ylab = "Standardized Residuals")
plot(gleason,StanRes1, ylab = "Standardized Residuals")
plot(pgg45,StanRes1, ylab = "Standardized Residuals")
plot(m1$fitted.values,StanRes1, ylab = "Standardized Residuals",xlab = "Fitted values")

#Figure 7.4 on page 241
par(mfrow = c(1,1))
plot(m1$fitted.values,lpsa,xlab = "Fitted Values")
abline(lsfit(m1$fitted.values,lpsa))

#Figure 7.5 on page 242
par(mfrow = c(2,2))
plot(m1)
abline(v = 2*9/67,lty = 2)

#Regression output on pages 242 and 243
summary(m1)
## 
## Call:
## lm(formula = lpsa ~ lcavol + lweight + age + lbph + svi + lcp + 
##     gleason + pgg45)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.64870 -0.34147 -0.05424  0.44941  1.48675 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.429170   1.553588   0.276  0.78334    
## lcavol       0.576543   0.107438   5.366 1.47e-06 ***
## lweight      0.614020   0.223216   2.751  0.00792 ** 
## age         -0.019001   0.013612  -1.396  0.16806    
## lbph         0.144848   0.070457   2.056  0.04431 *  
## svi          0.737209   0.298555   2.469  0.01651 *  
## lcp         -0.206324   0.110516  -1.867  0.06697 .  
## gleason     -0.029503   0.201136  -0.147  0.88389    
## pgg45        0.009465   0.005447   1.738  0.08755 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7123 on 58 degrees of freedom
## Multiple R-squared:  0.6944, Adjusted R-squared:  0.6522 
## F-statistic: 16.47 on 8 and 58 DF,  p-value: 2.042e-12
#Figure 7.6 page 243
library(alr3)
par(mfrow = c(3,3))
mmp(m1,lcavol,key = NULL)
mmp(m1,lweight,key = NULL)
mmp(m1,age,key = NULL)
mmp(m1,lbph,key = NULL)
mmp(m1,lcp,key = NULL)
mmp(m1,gleason,key = NULL)
mmp(m1,pgg45,key = NULL)
mmp(m1,m1$fitted.values,xlab = "Fitted Values",key = NULL)

#R output on page 244
library(car)
vif(m1)
##   lcavol  lweight      age     lbph      svi      lcp  gleason    pgg45 
## 2.318496 1.472295 1.356604 1.383429 2.045313 3.117451 2.644480 3.313288
#Figure 7.7 on page 244
library(car)
par(mfrow = c(2,4))

avPlot(m1,variable = lcavol,ask = FALSE,identify.points = FALSE, main = "")
avPlot(m1,variable = lweight,ask = FALSE,identify.points = TRUE, main = "")
avPlot(m1,variable = age,ask = FALSE,identify.points = FALSE, main = "")
avPlot(m1,variable = lbph,ask = FALSE,identify.points = FALSE, main = "")
avPlot(m1,variable = svi,ask = FALSE,identify.points = FALSE, main = "")
avPlot(m1,variable = lcp,ask = FALSE,identify.points = FALSE, main = "")
avPlot(m1,variable = gleason,ask = FALSE,identify.points = FALSE, main = "")
avPlot(m1,variable = pgg45,ask = FALSE,identify.points = FALSE, main = "")

#Figure 7.8 on page 245
X <- cbind(lcavol,lweight,age,lbph,svi,lcp,gleason,pgg45)
library(leaps)
b <- regsubsets(as.matrix(X),lpsa)
rs <- summary(b)
par(mfrow = c(1,2))
library(car)
#subsets(b,statistic = c("adjr2"),min.size = 1,max.size = 4,cex.subsets = 0.7)
#subsets(b,statistic = c("adjr2"),min.size = 5,max.size = 8,cex.subsets = 0.7,legend = FALSE)

#Table 7.2 on page 245
#Calculate adjusted R-squared
rs$adjr2
## [1] 0.5304013 0.6027172 0.6201758 0.6371877 0.6396181 0.6510880 0.6579833
## [8] 0.6522155
om1 <- lm(lpsa~lcavol)
om2 <- lm(lpsa~lcavol+lweight)
om3 <- lm(lpsa~lcavol+lweight+svi)
om4 <- lm(lpsa~lcavol+lweight+svi+lbph)
om5 <- lm(lpsa~lcavol+lweight+svi+lbph+pgg45)
om6 <- lm(lpsa~lcavol+lweight+svi+lbph+pgg45+lcp)
om7 <- lm(lpsa~lcavol+lweight+svi+lbph+pgg45+lcp+age)
om8 <- m1
#Subset size = 1
n <- length(om1$residuals)
npar <- length(om1$coefficients) +1
#Calculate AIC
extractAIC(om1,k = 2)
## [1]   2.00000 -23.37361
#Calculate AICc
extractAIC(om1,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]   2.380952 -22.992657
#Calculate BIC
extractAIC(om1,k = log(n))
## [1]   2.00000 -18.96422
#Subset size = 2
npar <- length(om2$coefficients) +1
#Calculate AIC
extractAIC(om2,k = 2)
## [1]   3.0000 -33.6168
#Calculate AICc
extractAIC(om2,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]   3.645161 -32.971635
#Calculate BIC
extractAIC(om2,k = log(n))
## [1]   3.00000 -27.00272
#Subset size = 3
npar <- length(om3$coefficients) +1
#Calculate AIC
extractAIC(om3,k = 2)
## [1]   4.00000 -35.68291
#Calculate AICc
extractAIC(om3,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]   4.983607 -34.699307
#Calculate BIC
extractAIC(om3,k = log(n))
## [1]   4.00000 -26.86414
#Subset size = 4
npar <- length(om4$coefficients) +1
#Calculate AIC
extractAIC(om4,k = 2)
## [1]   5.00000 -37.82507
#Calculate AICc
extractAIC(om4,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]   6.40000 -36.42507
#Calculate BIC
extractAIC(om4,k = log(n))
## [1]   5.00000 -26.80161
#Subset size = 5
npar <- length(om5$coefficients) +1
#Calculate AIC
extractAIC(om5,k = 2)
## [1]   6.00000 -37.36485
#Calculate AICc
extractAIC(om5,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]   7.898305 -35.466547
#Calculate BIC
extractAIC(om5,k = log(n))
## [1]   6.0000 -24.1367
#Subset size = 6
npar <- length(om6$coefficients) +1
#Calculate AIC
extractAIC(om6,k = 2)
## [1]   7.00000 -38.63939
#Calculate AICc
extractAIC(om6,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]   9.482759 -36.156635
#Calculate BIC
extractAIC(om6,k = log(n))
## [1]   7.00000 -23.20654
#Subset size = 7
npar <- length(om7$coefficients) +1
#Calculate AIC
extractAIC(om7,k = 2)
## [1]   8.00000 -39.10281
#Calculate AICc
extractAIC(om7,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]  11.15789 -35.94492
#Calculate BIC
extractAIC(om7,k = log(n))
## [1]   8.00000 -21.46527
#Subset size = 8
npar <- length(om8$coefficients) +1
#Calculate AIC
extractAIC(om8,k = 2)
## [1]   9.00000 -37.12766
#Calculate AICc
extractAIC(om8,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]  12.92857 -33.19909
#Calculate BIC
extractAIC(om8,k = log(n))
## [1]   9.00000 -17.28543
#Regression output on page 246
summary(om2)
## 
## Call:
## lm(formula = lpsa ~ lcavol + lweight)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.58852 -0.44174  0.01304  0.52613  1.93127 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.04944    0.72904  -1.439 0.154885    
## lcavol       0.62761    0.07906   7.938 4.14e-11 ***
## lweight      0.73838    0.20613   3.582 0.000658 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7613 on 64 degrees of freedom
## Multiple R-squared:  0.6148, Adjusted R-squared:  0.6027 
## F-statistic: 51.06 on 2 and 64 DF,  p-value: 5.54e-14
summary(om4)
## 
## Call:
## lm(formula = lpsa ~ lcavol + lweight + svi + lbph)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8709 -0.3903 -0.0172  0.5676  1.4227 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.32592    0.77998  -0.418   0.6775    
## lcavol       0.50552    0.09256   5.461 8.85e-07 ***
## lweight      0.53883    0.22071   2.441   0.0175 *  
## svi          0.67185    0.27323   2.459   0.0167 *  
## lbph         0.14001    0.07041   1.988   0.0512 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7275 on 62 degrees of freedom
## Multiple R-squared:  0.6592, Adjusted R-squared:  0.6372 
## F-statistic: 29.98 on 4 and 62 DF,  p-value: 6.911e-14
summary(om7)
## 
## Call:
## lm(formula = lpsa ~ lcavol + lweight + svi + lbph + pgg45 + lcp + 
##     age)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.65425 -0.34471 -0.05615  0.44380  1.48952 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.259062   1.025170   0.253   0.8014    
## lcavol       0.573930   0.105069   5.462 9.88e-07 ***
## lweight      0.619209   0.218560   2.833   0.0063 ** 
## svi          0.741781   0.294451   2.519   0.0145 *  
## lbph         0.144426   0.069812   2.069   0.0430 *  
## pgg45        0.008945   0.004099   2.182   0.0331 *  
## lcp         -0.205417   0.109424  -1.877   0.0654 .  
## age         -0.019480   0.013105  -1.486   0.1425    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7064 on 59 degrees of freedom
## Multiple R-squared:  0.6943, Adjusted R-squared:  0.658 
## F-statistic: 19.14 on 7 and 59 DF,  p-value: 4.496e-13
detach(prostateTraining)
prostateTest <- read_excel("MARData.xlsx", sheet = "prostateTest")
attach(prostateTest)
head(prostateTest)
## # A tibble: 6 x 10
##   original_case lcavol lweight   age   lbph   svi    lcp gleason pgg45  lpsa
##           <dbl>  <dbl>   <dbl> <dbl>  <dbl> <dbl>  <dbl>   <dbl> <dbl> <dbl>
## 1             7  0.737    3.47    64  0.615     0 -1.39        6     0 0.765
## 2             9 -0.777    3.54    47 -1.39      0 -1.39        6     0 1.05 
## 3            10  0.223    3.24    63 -1.39      0 -1.39        6     0 1.05 
## 4            15  1.21     3.44    57 -1.39      0 -0.431       7     5 1.40 
## 5            22  2.06     3.50    60  1.47      0  1.35        7    20 1.66 
## 6            25  0.385    3.67    69  1.60      0 -1.39        6     0 1.73
#Regression output on page 247
om2 <- lm(lpsa~lcavol+lweight)
summary(om2)
## 
## Call:
## lm(formula = lpsa ~ lcavol + lweight)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3061 -0.4442 -0.0595  0.3221  1.5651 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.7354     0.9572   0.768    0.449    
## lcavol        0.7478     0.1294   5.778 3.81e-06 ***
## lweight       0.1968     0.2473   0.796    0.433    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.721 on 27 degrees of freedom
## Multiple R-squared:  0.5542, Adjusted R-squared:  0.5212 
## F-statistic: 16.78 on 2 and 27 DF,  p-value: 1.833e-05
om4 <- lm(lpsa~lcavol+lweight+svi+lbph)
summary(om4)
## 
## Call:
## lm(formula = lpsa ~ lcavol + lweight + svi + lbph)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.08087 -0.40229 -0.05645  0.49322  1.01647 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.52957    0.93066   0.569   0.5744    
## lcavol       0.59555    0.12655   4.706 7.98e-05 ***
## lweight      0.26215    0.24492   1.070   0.2947    
## svi          0.95051    0.32214   2.951   0.0068 ** 
## lbph        -0.05337    0.09237  -0.578   0.5686    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6445 on 25 degrees of freedom
## Multiple R-squared:  0.6703, Adjusted R-squared:  0.6175 
## F-statistic:  12.7 on 4 and 25 DF,  p-value: 8.894e-06
om7 <- lm(lpsa~lcavol+lweight+svi+lbph+pgg45+lcp+age)
summary(om7)
## 
## Call:
## lm(formula = lpsa ~ lcavol + lweight + svi + lbph + pgg45 + lcp + 
##     age)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.14292 -0.36227  0.04168  0.48056  0.99825 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.873329   1.490194   0.586  0.56381   
## lcavol       0.481237   0.165881   2.901  0.00828 **
## lweight      0.313601   0.257112   1.220  0.23549   
## svi          0.619278   0.423109   1.464  0.15744   
## lbph        -0.090696   0.121368  -0.747  0.46281   
## pgg45        0.001316   0.006370   0.207  0.83819   
## lcp          0.180850   0.166970   1.083  0.29048   
## age         -0.004958   0.022220  -0.223  0.82550   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6589 on 22 degrees of freedom
## Multiple R-squared:  0.6967, Adjusted R-squared:  0.6001 
## F-statistic: 7.218 on 7 and 22 DF,  p-value: 0.0001546
detach(prostateTest)
prostateTraining <- read_excel("MARData.xlsx", sheet = "prostateTraining")
attach(prostateTraining)
head(prostateTraining)
## # A tibble: 6 x 10
##   original_case lcavol lweight   age  lbph   svi   lcp gleason pgg45   lpsa
##           <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl>  <dbl>
## 1             1 -0.580    2.77    50 -1.39     0 -1.39       6     0 -0.431
## 2             2 -0.994    3.32    58 -1.39     0 -1.39       6     0 -0.163
## 3             3 -0.511    2.69    74 -1.39     0 -1.39       7    20 -0.163
## 4             4 -1.20     3.28    58 -1.39     0 -1.39       6     0 -0.163
## 5             5  0.751    3.43    62 -1.39     0 -1.39       6     0  0.372
## 6             6 -1.05     3.23    50 -1.39     0 -1.39       6     0  0.765
#Figure 7.9 on page 249
X <- cbind(lcavol,lweight,age,lbph,svi,lcp,gleason,pgg45)
library(leaps)
b <- regsubsets(as.matrix(X),lpsa)
rs <- summary(b)
par(mfrow = c(1,2))
library(car)
#subsets(b,statistic = c("adjr2"),main = "With Case 45",min.size = 1,max.size = 5,cex.subsets = 0.7)

m2 <- update(m1, subset = (1:67)[-c(45)])
lcavol1 <- lcavol[-c(45)]
lweight1 <- lweight[-c(45)]
age1 <- age[-c(45)]
lbph1 <- lbph[-c(45)]
svi1 <- svi[-c(45)]
lcp1 <- lcp[-c(45)]
gleason1 <- gleason[-c(45)]
pgg451 <- pgg45[-c(45)]
X <- cbind(lcavol1,lweight1,age1,lbph1,svi1,lcp1,gleason1,pgg451)

library(leaps)
b <- regsubsets(as.matrix(X),lpsa[-c(45)])
rs <- summary(b)
library(car)
subsets(b,statistic = c("adjr2"),main = "Without Case 45",min.size = 1,max.size = 5,cex.subsets = 0.7,legend = FALSE)
##          Abbreviation
## lcavol1          lcv1
## lweight1          lw1
## age1                a
## lbph1             lb1
## svi1                s
## lcp1             lcp1
## gleason1            g
## pgg451              p
detach(prostateTraining)

prostateAlldata <- read_excel("MARData.xlsx", sheet = "prostateAlldata")
attach(prostateAlldata)
head(prostateAlldata)
## # A tibble: 6 x 11
##    case lcavol lweight   age  lbph   svi   lcp gleason pgg45   lpsa train
##   <dbl>  <dbl>   <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl> <dbl>  <dbl> <chr>
## 1     1 -0.580    2.77    50 -1.39     0 -1.39       6     0 -0.431 T    
## 2     2 -0.994    3.32    58 -1.39     0 -1.39       6     0 -0.163 T    
## 3     3 -0.511    2.69    74 -1.39     0 -1.39       7    20 -0.163 T    
## 4     4 -1.20     3.28    58 -1.39     0 -1.39       6     0 -0.163 T    
## 5     5  0.751    3.43    62 -1.39     0 -1.39       6     0  0.372 T    
## 6     6 -1.05     3.23    50 -1.39     0 -1.39       6     0  0.765 T
#Figure 7.10 on page 249
# par(mfrow = c(1,1))
# plot(lweight[train == FALSE],lpsa[train == FALSE],pch = 2,col = 1,xlab = "lweight",ylab = "lpsa",ylim = c(-1,6),xlim = c(2,6.5))
# abline(lsfit(lweight[train == FALSE],lpsa[train == FALSE]),lty = 1,col = 1)
# points(lweight[train == TRUE],lpsa[train == TRUE],pch = 3,col = 2)
# abline(lsfit(lweight[train == TRUE],lpsa[train == TRUE]),lty = 2,col = 2)
# legend(4.5,2,legend = c("Training","Test"),pch = 3:2,col = 2:1,title = "Data Set")
# 
# detach(prostateAlldata)
# prostateTest <- read_excel("MARData.xlsx", sheet = "prostateTest")
# attach(prostateTest)
# head(prostateTest)
# 
# #Figure 7.11 on page 250
# m1 <- lm(lpsa~lcavol+lweight+age+lbph+svi+lcp+gleason+pgg45)
# library(car)
# par(mfrow = c(1,1))
# avp(m1,variable = lweight,ask = FALSE,identify.points = TRUE, main = "")
# 
# detach(prostateTest)

#################EXERCISES
#Exercise 7.5.1

library(alr3)
data(mantel)
attach(mantel)
head(mantel)
##    Y  X1   X2   X3
## 1  5   1 1004  6.0
## 2  6 200  806  7.3
## 3  8 -50 1058 11.0
## 4  9 909  100 13.0
## 5 11 506  505 13.1
#Output from R: correlations on page 253
X <- cbind(X1,X2,X3)
cor(X)
##            X1         X2         X3
## X1  1.0000000 -0.9999887  0.6858141
## X2 -0.9999887  1.0000000 -0.6826107
## X3  0.6858141 -0.6826107  1.0000000
#Figure 7.13 on page 254
library(leaps)
b <- regsubsets(as.matrix(X),Y)
rs <- summary(b)
par(mfrow = c(1,1))
library(car)
subsets(b,statistic = c("adjr2"),legend = FALSE)

##    Abbreviation
## X1           X1
## X2           X2
## X3           X3
#Table 7.4 on page 254
rs$adjr2
## [1] 0.8764847 1.0000000 1.0000000
om1 <- lm(Y~X3)
om2 <- lm(Y~X1+X2)
om3 <- lm(Y~X1+X2+X3)
#Subset size = 1
n <- length(om1$residuals)
npar <- length(om1$coefficients) +1
#Calculate AIC
extractAIC(om1,k = 2)
## [1]  2.0000000 -0.3087485
#Calculate BIC
extractAIC(om1,k = log(n))
## [1]  2.000000 -1.089873
#Subset size = 2
npar <- length(om2$coefficients) +1
#Calculate AIC
extractAIC(om2,k = 2)
## [1]    3.0000 -287.7494
#Calculate BIC
extractAIC(om2,k = log(n))
## [1]    3.0000 -288.9211
#Subset size = 3
npar <- length(om3$coefficients) +1
#Calculate AIC
extractAIC(om3,k = 2)
## [1]    4.0000 -285.7684
#Calculate BIC
extractAIC(om3,k = log(n))
## [1]    4.0000 -287.3306
#Forward selection on pages 253 and 254
mint <- lm(Y~1,data = mantel)
forwardAIC <- step(mint,scope = list(lower = ~1, 
upper = ~X1+X2+X3),
direction = "forward", data = mantel)
## Start:  AIC=9.59
## Y ~ 1
## 
##        Df Sum of Sq     RSS     AIC
## + X3    1   20.6879  2.1121 -0.3087
## + X1    1    8.6112 14.1888  9.2151
## + X2    1    8.5064 14.2936  9.2519
## <none>              22.8000  9.5866
## 
## Step:  AIC=-0.31
## Y ~ X3
## 
##        Df Sum of Sq    RSS      AIC
## <none>              2.1121 -0.30875
## + X2    1  0.066328 2.0458  1.53172
## + X1    1  0.064522 2.0476  1.53613
forwardBIC <- step(mint,scope = list(lower = ~1, 
upper = ~X1+X2+X3),
direction = "forward", data = mantel,k = log(n))
## Start:  AIC=9.2
## Y ~ 1
## 
##        Df Sum of Sq     RSS     AIC
## + X3    1   20.6879  2.1121 -1.0899
## + X1    1    8.6112 14.1888  8.4339
## + X2    1    8.5064 14.2936  8.4707
## <none>              22.8000  9.1961
## 
## Step:  AIC=-1.09
## Y ~ X3
## 
##        Df Sum of Sq    RSS      AIC
## <none>              2.1121 -1.08987
## + X2    1  0.066328 2.0458  0.36003
## + X1    1  0.064522 2.0476  0.36444
#Regression output on page 255
summary(om1)
## 
## Call:
## lm(formula = Y ~ X3)
## 
## Residuals:
##        1        2        3        4        5 
##  0.03434  0.13124 -0.43912 -0.82850  1.10203 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   0.7975     1.3452   0.593   0.5950  
## X3            0.6947     0.1282   5.421   0.0123 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8391 on 3 degrees of freedom
## Multiple R-squared:  0.9074, Adjusted R-squared:  0.8765 
## F-statistic: 29.38 on 1 and 3 DF,  p-value: 0.01232
summary(om2)
## 
## Call:
## lm(formula = Y ~ X1 + X2)
## 
## Residuals:
##          1          2          3          4          5 
## -2.545e-13  2.854e-13  5.287e-14 -2.928e-14 -5.448e-14 
## 
## Coefficients:
##               Estimate Std. Error    t value Pr(>|t|)    
## (Intercept) -1.000e+03  7.388e-11 -1.354e+13   <2e-16 ***
## X1           1.000e+00  7.312e-14  1.368e+13   <2e-16 ***
## X2           1.000e+00  7.339e-14  1.363e+13   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.764e-13 on 2 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 1.492e+26 on 2 and 2 DF,  p-value: < 2.2e-16
summary(om3)
## 
## Call:
## lm(formula = Y ~ X1 + X2 + X3)
## 
## Residuals:
##          1          2          3          4          5 
## -2.493e-13  2.924e-13  3.733e-14 -3.893e-14 -4.144e-14 
## 
## Coefficients:
##               Estimate Std. Error    t value Pr(>|t|)    
## (Intercept) -1.000e+03  2.728e-10 -3.665e+12 1.74e-13 ***
## X1           1.000e+00  2.728e-13  3.666e+12 1.74e-13 ***
## X2           1.000e+00  2.727e-13  3.667e+12 1.74e-13 ***
## X3           1.330e-14  2.156e-13  6.200e-02    0.961    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.902e-13 on 1 degrees of freedom
## Multiple R-squared:      1,  Adjusted R-squared:      1 
## F-statistic: 4.992e+25 on 3 and 1 DF,  p-value: 1.04e-13
detach(mantel)
#Exercise 7.5.2
Hald <- read_excel("MARData.xlsx", sheet = "Haldcement")
attach(Hald)
head(Hald)
## # A tibble: 6 x 5
##       Y    x1    x2    x3    x4
##   <dbl> <dbl> <dbl> <dbl> <dbl>
## 1  78.5     7    26     6    60
## 2  74.3     1    29    15    52
## 3 104.     11    56     8    20
## 4  87.6    11    31     8    47
## 5  95.9     7    52     6    33
## 6 109.     11    55     9    22
#Output from R: correlations on page 256
X <- cbind(x1,x2,x3,x4)
cor(X)
##            x1         x2         x3         x4
## x1  1.0000000  0.2285795 -0.8241338 -0.2454451
## x2  0.2285795  1.0000000 -0.1392424 -0.9729550
## x3 -0.8241338 -0.1392424  1.0000000  0.0295370
## x4 -0.2454451 -0.9729550  0.0295370  1.0000000
#Figure 7.14 on page 257
library(leaps)
b <- regsubsets(as.matrix(X),Y)
rs <- summary(b)
par(mfrow = c(1,1))
library(car)
subsets(b,statistic = c("adjr2"),legend = FALSE)

##    Abbreviation
## x1           x1
## x2           x2
## x3           x3
## x4           x4
#Table 7.6 on page 257
rs$adjr2
## [1] 0.6449549 0.9744140 0.9764473 0.9735634
om1 <- lm(Y~x4)
om2 <- lm(Y~x1+x2)
om3 <- lm(Y~x1+x2+x4)
om4 <- lm(Y~x1+x2+x3+x4)
#Subset size = 1
n <- length(om1$residuals)
npar <- length(om1$coefficients) +1
#Calculate AIC
extractAIC(om1,k = 2)
## [1]  2.00000 58.85164
#Calculate AICc
extractAIC(om1,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]  4.666667 61.518310
#Calculate BIC
extractAIC(om1,k = log(n))
## [1]  2.00000 59.98154
#Subset size = 2
npar <- length(om2$coefficients) +1
#Calculate AIC
extractAIC(om2,k = 2)
## [1]  3.00000 25.41999
#Calculate AICc
extractAIC(om2,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1]  8.00000 30.41999
#Calculate BIC
extractAIC(om2,k = log(n))
## [1]  3.00000 27.11484
#Subset size = 3
npar <- length(om3$coefficients) +1
#Calculate AIC
extractAIC(om3,k = 2)
## [1]  4.00000 24.97388
#Calculate AICc
extractAIC(om3,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 12.57143 33.54531
#Calculate BIC
extractAIC(om3,k = log(n))
## [1]  4.00000 27.23368
#Subset size = 4
npar <- length(om4$coefficients) +1
#Calculate AIC
extractAIC(om4,k = 2)
## [1]  5.00000 26.94429
#Calculate AICc
extractAIC(om4,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 19.00000 40.94429
#Calculate BIC
extractAIC(om4,k = log(n))
## [1]  5.00000 29.76903
#Backward elimination based on AIC on page 257
backAIC <- step(om4,direction = "backward", data = Hald)
## Start:  AIC=26.94
## Y ~ x1 + x2 + x3 + x4
## 
##        Df Sum of Sq    RSS    AIC
## - x3    1    0.1091 47.973 24.974
## - x4    1    0.2470 48.111 25.011
## - x2    1    2.9725 50.836 25.728
## <none>              47.864 26.944
## - x1    1   25.9509 73.815 30.576
## 
## Step:  AIC=24.97
## Y ~ x1 + x2 + x4
## 
##        Df Sum of Sq    RSS    AIC
## <none>               47.97 24.974
## - x4    1      9.93  57.90 25.420
## - x2    1     26.79  74.76 28.742
## - x1    1    820.91 868.88 60.629
#Backward elimination based on BIC on page 258
backBIC <- step(om4,direction = "backward", data = Hald, k = log(n))
## Start:  AIC=29.77
## Y ~ x1 + x2 + x3 + x4
## 
##        Df Sum of Sq    RSS    AIC
## - x3    1    0.1091 47.973 27.234
## - x4    1    0.2470 48.111 27.271
## - x2    1    2.9725 50.836 27.987
## <none>              47.864 29.769
## - x1    1   25.9509 73.815 32.836
## 
## Step:  AIC=27.23
## Y ~ x1 + x2 + x4
## 
##        Df Sum of Sq    RSS    AIC
## - x4    1      9.93  57.90 27.115
## <none>               47.97 27.234
## - x2    1     26.79  74.76 30.437
## - x1    1    820.91 868.88 62.324
## 
## Step:  AIC=27.11
## Y ~ x1 + x2
## 
##        Df Sum of Sq     RSS    AIC
## <none>                57.90 27.115
## - x1    1    848.43  906.34 60.308
## - x2    1   1207.78 1265.69 64.649
#Forward selection based on AIC on pages 258-259
mint <- lm(Y~1,data = Hald)
forwardAIC <- step(mint,scope = list(lower = ~1, 
upper = ~x1+x2+x3+x4),
direction = "forward", data = Hald)
## Start:  AIC=71.44
## Y ~ 1
## 
##        Df Sum of Sq     RSS    AIC
## + x4    1   1831.90  883.87 58.852
## + x2    1   1809.43  906.34 59.178
## + x1    1   1450.08 1265.69 63.519
## + x3    1    776.36 1939.40 69.067
## <none>              2715.76 71.444
## 
## Step:  AIC=58.85
## Y ~ x4
## 
##        Df Sum of Sq    RSS    AIC
## + x1    1    809.10  74.76 28.742
## + x3    1    708.13 175.74 39.853
## <none>              883.87 58.852
## + x2    1     14.99 868.88 60.629
## 
## Step:  AIC=28.74
## Y ~ x4 + x1
## 
##        Df Sum of Sq    RSS    AIC
## + x2    1    26.789 47.973 24.974
## + x3    1    23.926 50.836 25.728
## <none>              74.762 28.742
## 
## Step:  AIC=24.97
## Y ~ x4 + x1 + x2
## 
##        Df Sum of Sq    RSS    AIC
## <none>              47.973 24.974
## + x3    1   0.10909 47.864 26.944
#Forward selection based on BIC on page 259
forwardBIC <- step(mint,scope = list(lower = ~1, 
upper = ~x1+x2+x3+x4),
direction = "forward", data = Hald,k = log(n))
## Start:  AIC=72.01
## Y ~ 1
## 
##        Df Sum of Sq     RSS    AIC
## + x4    1   1831.90  883.87 59.982
## + x2    1   1809.43  906.34 60.308
## + x1    1   1450.08 1265.69 64.649
## + x3    1    776.36 1939.40 70.197
## <none>              2715.76 72.009
## 
## Step:  AIC=59.98
## Y ~ x4
## 
##        Df Sum of Sq    RSS    AIC
## + x1    1    809.10  74.76 30.437
## + x3    1    708.13 175.74 41.547
## <none>              883.87 59.982
## + x2    1     14.99 868.88 62.324
## 
## Step:  AIC=30.44
## Y ~ x4 + x1
## 
##        Df Sum of Sq    RSS    AIC
## + x2    1    26.789 47.973 27.234
## + x3    1    23.926 50.836 27.987
## <none>              74.762 30.437
## 
## Step:  AIC=27.23
## Y ~ x4 + x1 + x2
## 
##        Df Sum of Sq    RSS    AIC
## <none>              47.973 27.234
## + x3    1   0.10909 47.864 29.769
#Regression output from R on pages 259 and 260
summary(om1)
## 
## Call:
## lm(formula = Y ~ x4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.589  -8.228   1.495   4.726  17.524 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 117.5679     5.2622  22.342 1.62e-10 ***
## x4           -0.7382     0.1546  -4.775 0.000576 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.964 on 11 degrees of freedom
## Multiple R-squared:  0.6745, Adjusted R-squared:  0.645 
## F-statistic:  22.8 on 1 and 11 DF,  p-value: 0.0005762
summary(om2)
## 
## Call:
## lm(formula = Y ~ x1 + x2)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -2.893 -1.574 -1.302  1.363  4.048 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 52.57735    2.28617   23.00 5.46e-10 ***
## x1           1.46831    0.12130   12.11 2.69e-07 ***
## x2           0.66225    0.04585   14.44 5.03e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.406 on 10 degrees of freedom
## Multiple R-squared:  0.9787, Adjusted R-squared:  0.9744 
## F-statistic: 229.5 on 2 and 10 DF,  p-value: 4.407e-09
library(car)
vif(om2)
##       x1       x2 
## 1.055129 1.055129
summary(om3)
## 
## Call:
## lm(formula = Y ~ x1 + x2 + x4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0919 -1.8016  0.2562  1.2818  3.8982 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  71.6483    14.1424   5.066 0.000675 ***
## x1            1.4519     0.1170  12.410 5.78e-07 ***
## x2            0.4161     0.1856   2.242 0.051687 .  
## x4           -0.2365     0.1733  -1.365 0.205395    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.309 on 9 degrees of freedom
## Multiple R-squared:  0.9823, Adjusted R-squared:  0.9764 
## F-statistic: 166.8 on 3 and 9 DF,  p-value: 3.323e-08
vif(om3)
##       x1       x2       x4 
##  1.06633 18.78031 18.94008
summary(om4)
## 
## Call:
## lm(formula = Y ~ x1 + x2 + x3 + x4)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1750 -1.6709  0.2508  1.3783  3.9254 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  62.4054    70.0710   0.891   0.3991  
## x1            1.5511     0.7448   2.083   0.0708 .
## x2            0.5102     0.7238   0.705   0.5009  
## x3            0.1019     0.7547   0.135   0.8959  
## x4           -0.1441     0.7091  -0.203   0.8441  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.446 on 8 degrees of freedom
## Multiple R-squared:  0.9824, Adjusted R-squared:  0.9736 
## F-statistic: 111.5 on 4 and 8 DF,  p-value: 4.756e-07
vif(om4)
##        x1        x2        x3        x4 
##  38.49621 254.42317  46.86839 282.51286
detach(Hald)