This shows the outputs from Chapter 7 using R. The textbook is “A Modern Approach to Regression with R” by Simon J. Sheather (2008). The R code provided with the book has been updated.
bridge <- read_excel("MARData.xlsx", sheet = "bridge")
attach(bridge)
head(bridge)
## # A tibble: 6 x 7
## Case Time DArea CCost Dwgs Length Spans
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 78.8 3.6 82.4 6 90 1
## 2 2 310. 5.33 422. 12 126 2
## 3 3 184. 6.29 180. 9 78 1
## 4 4 69.6 2.2 100 5 60 1
## 5 5 68.8 1.44 103 5 60 1
## 6 6 95.7 5.4 134. 5 60 1
#Figure 7.1 on page 235
m1 <- lm(log(Time)~log(DArea)+log(CCost)+log(Dwgs)+log(Length)+log(Spans))
logDArea <- log(DArea)
logCCost <- log(CCost)
logDwgs <- log(Dwgs)
logLength <- log(Length)
logSpans <- log(Spans)
X <- cbind(logDArea,logCCost,logDwgs,logLength,logSpans)
#install.packages("leaps")
library(leaps)
b <- regsubsets(as.matrix(X),log(Time))
rs <- summary(b)
par(mfrow = c(1,2))
plot(1:5,rs$adjr2,xlab = "Subset Size",ylab = "Adjusted R-squared")
library(car)
##subsets(b,statistic = c("adjr2"))
#Table 7.1 on page 235
#Calculate adjusted R-squared
rs$adjr2
## [1] 0.7022401 0.7530191 0.7582178 0.7534273 0.7475037
om1 <- lm(log(Time)~log(Dwgs))
om2 <- lm(log(Time)~log(Dwgs)+log(Spans))
om3 <- lm(log(Time)~log(Dwgs)+log(Spans)+log(CCost))
om4 <- lm(log(Time)~log(Dwgs)+log(Spans)+log(CCost)+log(DArea))
om5 <- m1
#Subset size = 1
n <- length(om1$residuals)
npar <- length(om1$coefficients) + 1
#Calculate AIC
extractAIC(om1,k = 2)
## [1] 2.00000 -94.89754
#Calculate AICc
extractAIC(om1,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 2.585366 -94.312171
#Calculate BIC
extractAIC(om1,k = log(n))
## [1] 2.00000 -91.28421
#Subset size = 2
npar <- length(om2$coefficients) +1
#Calculate AIC
extractAIC(om2,k = 2)
## [1] 3.0000 -102.3703
#Calculate AICc
extractAIC(om2,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 4.0000 -101.3703
#Calculate BIC
extractAIC(om2,k = log(n))
## [1] 3.00000 -96.95036
#Subset size = 3
npar <- length(om3$coefficients) +1
#Calculate AIC
extractAIC(om3,k = 2)
## [1] 4.0000 -102.4121
#Calculate AICc
extractAIC(om3,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 5.538462 -100.873608
#Calculate BIC
extractAIC(om3,k = log(n))
## [1] 4.00000 -95.18542
#Subset size = 4
npar <- length(om4$coefficients) +1
#Calculate AIC
extractAIC(om4,k = 2)
## [1] 5.0000 -100.6403
#Calculate AICc
extractAIC(om4,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 7.210526 -98.429816
#Calculate BIC
extractAIC(om4,k = log(n))
## [1] 5.00000 -91.60703
#Subset size = 5
npar <- length(om5$coefficients) +1
#Calculate AIC
extractAIC(om5,k = 2)
## [1] 6.00000 -98.71136
#Calculate AICc
extractAIC(om5,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 9.027027 -95.684330
#Calculate BIC
extractAIC(om5,k = log(n))
## [1] 6.00000 -87.87138
#Regression output on pages 235 and 236
summary(om2)
##
## Call:
## lm(formula = log(Time) ~ log(Dwgs) + log(Spans))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68649 -0.24728 -0.05988 0.26050 0.63759
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.66173 0.26871 9.905 1.49e-12 ***
## log(Dwgs) 1.04163 0.15420 6.755 3.26e-08 ***
## log(Spans) 0.28530 0.09095 3.137 0.00312 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3105 on 42 degrees of freedom
## Multiple R-squared: 0.7642, Adjusted R-squared: 0.753
## F-statistic: 68.08 on 2 and 42 DF, p-value: 6.632e-14
summary(om3)
##
## Call:
## lm(formula = log(Time) ~ log(Dwgs) + log(Spans) + log(CCost))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.69415 -0.17456 -0.03566 0.22739 0.64945
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.3317 0.3577 6.519 7.9e-08 ***
## log(Dwgs) 0.8356 0.2135 3.914 0.000336 ***
## log(Spans) 0.1963 0.1107 1.773 0.083710 .
## log(CCost) 0.1483 0.1075 1.380 0.175212
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3072 on 41 degrees of freedom
## Multiple R-squared: 0.7747, Adjusted R-squared: 0.7582
## F-statistic: 46.99 on 3 and 41 DF, p-value: 2.484e-13
#Output from R on page 237
backAIC <- step(m1,direction = "backward", data = bridge)
## Start: AIC=-98.71
## log(Time) ~ log(DArea) + log(CCost) + log(Dwgs) + log(Length) +
## log(Spans)
##
## Df Sum of Sq RSS AIC
## - log(Length) 1 0.00607 3.8497 -100.640
## - log(DArea) 1 0.01278 3.8564 -100.562
## <none> 3.8436 -98.711
## - log(CCost) 1 0.18162 4.0252 -98.634
## - log(Spans) 1 0.26616 4.1098 -97.698
## - log(Dwgs) 1 1.45358 5.2972 -86.277
##
## Step: AIC=-100.64
## log(Time) ~ log(DArea) + log(CCost) + log(Dwgs) + log(Spans)
##
## Df Sum of Sq RSS AIC
## - log(DArea) 1 0.01958 3.8693 -102.412
## <none> 3.8497 -100.640
## - log(CCost) 1 0.18064 4.0303 -100.577
## - log(Spans) 1 0.31501 4.1647 -99.101
## - log(Dwgs) 1 1.44946 5.2991 -88.260
##
## Step: AIC=-102.41
## log(Time) ~ log(CCost) + log(Dwgs) + log(Spans)
##
## Df Sum of Sq RSS AIC
## <none> 3.8693 -102.412
## - log(CCost) 1 0.17960 4.0488 -102.370
## - log(Spans) 1 0.29656 4.1658 -101.089
## - log(Dwgs) 1 1.44544 5.3147 -90.128
backBIC <- step(m1,direction = "backward", data = bridge, k = log(n))
## Start: AIC=-87.87
## log(Time) ~ log(DArea) + log(CCost) + log(Dwgs) + log(Length) +
## log(Spans)
##
## Df Sum of Sq RSS AIC
## - log(Length) 1 0.00607 3.8497 -91.607
## - log(DArea) 1 0.01278 3.8564 -91.529
## - log(CCost) 1 0.18162 4.0252 -89.600
## - log(Spans) 1 0.26616 4.1098 -88.665
## <none> 3.8436 -87.871
## - log(Dwgs) 1 1.45358 5.2972 -77.244
##
## Step: AIC=-91.61
## log(Time) ~ log(DArea) + log(CCost) + log(Dwgs) + log(Spans)
##
## Df Sum of Sq RSS AIC
## - log(DArea) 1 0.01958 3.8693 -95.185
## - log(CCost) 1 0.18064 4.0303 -93.350
## - log(Spans) 1 0.31501 4.1647 -91.874
## <none> 3.8497 -91.607
## - log(Dwgs) 1 1.44946 5.2991 -81.034
##
## Step: AIC=-95.19
## log(Time) ~ log(CCost) + log(Dwgs) + log(Spans)
##
## Df Sum of Sq RSS AIC
## - log(CCost) 1 0.17960 4.0488 -96.950
## - log(Spans) 1 0.29656 4.1658 -95.669
## <none> 3.8693 -95.185
## - log(Dwgs) 1 1.44544 5.3147 -84.708
##
## Step: AIC=-96.95
## log(Time) ~ log(Dwgs) + log(Spans)
##
## Df Sum of Sq RSS AIC
## <none> 4.0488 -96.950
## - log(Spans) 1 0.9487 4.9975 -91.284
## - log(Dwgs) 1 4.3989 8.4478 -67.661
#Output from R on page 238
mint <- lm(log(Time)~1,data = bridge)
forwardAIC <- step(mint,scope = list(lower = ~1,
upper = ~log(DArea)+log(CCost)+log(Dwgs)+log(Length)+log(Spans)),
direction = "forward", data = bridge)
## Start: AIC=-41.35
## log(Time) ~ 1
##
## Df Sum of Sq RSS AIC
## + log(Dwgs) 1 12.1765 4.9975 -94.898
## + log(CCost) 1 11.6147 5.5593 -90.104
## + log(DArea) 1 10.2943 6.8797 -80.514
## + log(Length) 1 10.0120 7.1620 -78.704
## + log(Spans) 1 8.7262 8.4478 -71.274
## <none> 17.1740 -41.347
##
## Step: AIC=-94.9
## log(Time) ~ log(Dwgs)
##
## Df Sum of Sq RSS AIC
## + log(Spans) 1 0.94866 4.0488 -102.370
## + log(CCost) 1 0.83170 4.1658 -101.089
## + log(Length) 1 0.66914 4.3284 -99.366
## + log(DArea) 1 0.47568 4.5218 -97.399
## <none> 4.9975 -94.898
##
## Step: AIC=-102.37
## log(Time) ~ log(Dwgs) + log(Spans)
##
## Df Sum of Sq RSS AIC
## + log(CCost) 1 0.179598 3.8693 -102.41
## <none> 4.0488 -102.37
## + log(DArea) 1 0.018535 4.0303 -100.58
## + log(Length) 1 0.016924 4.0319 -100.56
##
## Step: AIC=-102.41
## log(Time) ~ log(Dwgs) + log(Spans) + log(CCost)
##
## Df Sum of Sq RSS AIC
## <none> 3.8693 -102.41
## + log(DArea) 1 0.019578 3.8497 -100.64
## + log(Length) 1 0.012868 3.8564 -100.56
forwardBIC <- step(mint,scope = list(lower = ~1,
upper = ~log(DArea)+log(CCost)+log(Dwgs)+log(Length)+log(Spans)),
direction = "forward", data = bridge,k = log(n))
## Start: AIC=-39.54
## log(Time) ~ 1
##
## Df Sum of Sq RSS AIC
## + log(Dwgs) 1 12.1765 4.9975 -91.284
## + log(CCost) 1 11.6147 5.5593 -86.490
## + log(DArea) 1 10.2943 6.8797 -76.901
## + log(Length) 1 10.0120 7.1620 -75.091
## + log(Spans) 1 8.7262 8.4478 -67.661
## <none> 17.1740 -39.540
##
## Step: AIC=-91.28
## log(Time) ~ log(Dwgs)
##
## Df Sum of Sq RSS AIC
## + log(Spans) 1 0.94866 4.0488 -96.950
## + log(CCost) 1 0.83170 4.1658 -95.669
## + log(Length) 1 0.66914 4.3284 -93.946
## + log(DArea) 1 0.47568 4.5218 -91.979
## <none> 4.9975 -91.284
##
## Step: AIC=-96.95
## log(Time) ~ log(Dwgs) + log(Spans)
##
## Df Sum of Sq RSS AIC
## <none> 4.0488 -96.950
## + log(CCost) 1 0.179598 3.8693 -95.185
## + log(DArea) 1 0.018535 4.0303 -93.350
## + log(Length) 1 0.016924 4.0319 -93.332
detach(bridge)
prostateTraining <- read_excel("MARData.xlsx", sheet = "prostateTraining")
attach(prostateTraining)
head(prostateTraining)
## # A tibble: 6 x 10
## original_case lcavol lweight age lbph svi lcp gleason pgg45 lpsa
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 -0.580 2.77 50 -1.39 0 -1.39 6 0 -0.431
## 2 2 -0.994 3.32 58 -1.39 0 -1.39 6 0 -0.163
## 3 3 -0.511 2.69 74 -1.39 0 -1.39 7 20 -0.163
## 4 4 -1.20 3.28 58 -1.39 0 -1.39 6 0 -0.163
## 5 5 0.751 3.43 62 -1.39 0 -1.39 6 0 0.372
## 6 6 -1.05 3.23 50 -1.39 0 -1.39 6 0 0.765
#Figure 7.2 on page 240
pairs(lpsa~lcavol+lweight+age+lbph+svi+lcp+gleason+pgg45)
#Figure 7.3 on page 241
m1 <- lm(lpsa~lcavol+lweight+age+lbph+svi+lcp+gleason+pgg45)
StanRes1 <- rstandard(m1)
par(mfrow = c(3,3))
plot(lcavol,StanRes1, ylab = "Standardized Residuals")
plot(lweight,StanRes1, ylab = "Standardized Residuals")
plot(age,StanRes1, ylab = "Standardized Residuals")
plot(lbph,StanRes1, ylab = "Standardized Residuals")
plot(svi,StanRes1, ylab = "Standardized Residuals")
plot(lcp,StanRes1, ylab = "Standardized Residuals")
plot(gleason,StanRes1, ylab = "Standardized Residuals")
plot(pgg45,StanRes1, ylab = "Standardized Residuals")
plot(m1$fitted.values,StanRes1, ylab = "Standardized Residuals",xlab = "Fitted values")
#Figure 7.4 on page 241
par(mfrow = c(1,1))
plot(m1$fitted.values,lpsa,xlab = "Fitted Values")
abline(lsfit(m1$fitted.values,lpsa))
#Figure 7.5 on page 242
par(mfrow = c(2,2))
plot(m1)
abline(v = 2*9/67,lty = 2)
#Regression output on pages 242 and 243
summary(m1)
##
## Call:
## lm(formula = lpsa ~ lcavol + lweight + age + lbph + svi + lcp +
## gleason + pgg45)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.64870 -0.34147 -0.05424 0.44941 1.48675
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.429170 1.553588 0.276 0.78334
## lcavol 0.576543 0.107438 5.366 1.47e-06 ***
## lweight 0.614020 0.223216 2.751 0.00792 **
## age -0.019001 0.013612 -1.396 0.16806
## lbph 0.144848 0.070457 2.056 0.04431 *
## svi 0.737209 0.298555 2.469 0.01651 *
## lcp -0.206324 0.110516 -1.867 0.06697 .
## gleason -0.029503 0.201136 -0.147 0.88389
## pgg45 0.009465 0.005447 1.738 0.08755 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7123 on 58 degrees of freedom
## Multiple R-squared: 0.6944, Adjusted R-squared: 0.6522
## F-statistic: 16.47 on 8 and 58 DF, p-value: 2.042e-12
#Figure 7.6 page 243
library(alr3)
par(mfrow = c(3,3))
mmp(m1,lcavol,key = NULL)
mmp(m1,lweight,key = NULL)
mmp(m1,age,key = NULL)
mmp(m1,lbph,key = NULL)
mmp(m1,lcp,key = NULL)
mmp(m1,gleason,key = NULL)
mmp(m1,pgg45,key = NULL)
mmp(m1,m1$fitted.values,xlab = "Fitted Values",key = NULL)
#R output on page 244
library(car)
vif(m1)
## lcavol lweight age lbph svi lcp gleason pgg45
## 2.318496 1.472295 1.356604 1.383429 2.045313 3.117451 2.644480 3.313288
#Figure 7.7 on page 244
library(car)
par(mfrow = c(2,4))
avPlot(m1,variable = lcavol,ask = FALSE,identify.points = FALSE, main = "")
avPlot(m1,variable = lweight,ask = FALSE,identify.points = TRUE, main = "")
avPlot(m1,variable = age,ask = FALSE,identify.points = FALSE, main = "")
avPlot(m1,variable = lbph,ask = FALSE,identify.points = FALSE, main = "")
avPlot(m1,variable = svi,ask = FALSE,identify.points = FALSE, main = "")
avPlot(m1,variable = lcp,ask = FALSE,identify.points = FALSE, main = "")
avPlot(m1,variable = gleason,ask = FALSE,identify.points = FALSE, main = "")
avPlot(m1,variable = pgg45,ask = FALSE,identify.points = FALSE, main = "")
#Figure 7.8 on page 245
X <- cbind(lcavol,lweight,age,lbph,svi,lcp,gleason,pgg45)
library(leaps)
b <- regsubsets(as.matrix(X),lpsa)
rs <- summary(b)
par(mfrow = c(1,2))
library(car)
#subsets(b,statistic = c("adjr2"),min.size = 1,max.size = 4,cex.subsets = 0.7)
#subsets(b,statistic = c("adjr2"),min.size = 5,max.size = 8,cex.subsets = 0.7,legend = FALSE)
#Table 7.2 on page 245
#Calculate adjusted R-squared
rs$adjr2
## [1] 0.5304013 0.6027172 0.6201758 0.6371877 0.6396181 0.6510880 0.6579833
## [8] 0.6522155
om1 <- lm(lpsa~lcavol)
om2 <- lm(lpsa~lcavol+lweight)
om3 <- lm(lpsa~lcavol+lweight+svi)
om4 <- lm(lpsa~lcavol+lweight+svi+lbph)
om5 <- lm(lpsa~lcavol+lweight+svi+lbph+pgg45)
om6 <- lm(lpsa~lcavol+lweight+svi+lbph+pgg45+lcp)
om7 <- lm(lpsa~lcavol+lweight+svi+lbph+pgg45+lcp+age)
om8 <- m1
#Subset size = 1
n <- length(om1$residuals)
npar <- length(om1$coefficients) +1
#Calculate AIC
extractAIC(om1,k = 2)
## [1] 2.00000 -23.37361
#Calculate AICc
extractAIC(om1,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 2.380952 -22.992657
#Calculate BIC
extractAIC(om1,k = log(n))
## [1] 2.00000 -18.96422
#Subset size = 2
npar <- length(om2$coefficients) +1
#Calculate AIC
extractAIC(om2,k = 2)
## [1] 3.0000 -33.6168
#Calculate AICc
extractAIC(om2,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 3.645161 -32.971635
#Calculate BIC
extractAIC(om2,k = log(n))
## [1] 3.00000 -27.00272
#Subset size = 3
npar <- length(om3$coefficients) +1
#Calculate AIC
extractAIC(om3,k = 2)
## [1] 4.00000 -35.68291
#Calculate AICc
extractAIC(om3,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 4.983607 -34.699307
#Calculate BIC
extractAIC(om3,k = log(n))
## [1] 4.00000 -26.86414
#Subset size = 4
npar <- length(om4$coefficients) +1
#Calculate AIC
extractAIC(om4,k = 2)
## [1] 5.00000 -37.82507
#Calculate AICc
extractAIC(om4,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 6.40000 -36.42507
#Calculate BIC
extractAIC(om4,k = log(n))
## [1] 5.00000 -26.80161
#Subset size = 5
npar <- length(om5$coefficients) +1
#Calculate AIC
extractAIC(om5,k = 2)
## [1] 6.00000 -37.36485
#Calculate AICc
extractAIC(om5,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 7.898305 -35.466547
#Calculate BIC
extractAIC(om5,k = log(n))
## [1] 6.0000 -24.1367
#Subset size = 6
npar <- length(om6$coefficients) +1
#Calculate AIC
extractAIC(om6,k = 2)
## [1] 7.00000 -38.63939
#Calculate AICc
extractAIC(om6,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 9.482759 -36.156635
#Calculate BIC
extractAIC(om6,k = log(n))
## [1] 7.00000 -23.20654
#Subset size = 7
npar <- length(om7$coefficients) +1
#Calculate AIC
extractAIC(om7,k = 2)
## [1] 8.00000 -39.10281
#Calculate AICc
extractAIC(om7,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 11.15789 -35.94492
#Calculate BIC
extractAIC(om7,k = log(n))
## [1] 8.00000 -21.46527
#Subset size = 8
npar <- length(om8$coefficients) +1
#Calculate AIC
extractAIC(om8,k = 2)
## [1] 9.00000 -37.12766
#Calculate AICc
extractAIC(om8,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 12.92857 -33.19909
#Calculate BIC
extractAIC(om8,k = log(n))
## [1] 9.00000 -17.28543
#Regression output on page 246
summary(om2)
##
## Call:
## lm(formula = lpsa ~ lcavol + lweight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.58852 -0.44174 0.01304 0.52613 1.93127
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.04944 0.72904 -1.439 0.154885
## lcavol 0.62761 0.07906 7.938 4.14e-11 ***
## lweight 0.73838 0.20613 3.582 0.000658 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7613 on 64 degrees of freedom
## Multiple R-squared: 0.6148, Adjusted R-squared: 0.6027
## F-statistic: 51.06 on 2 and 64 DF, p-value: 5.54e-14
summary(om4)
##
## Call:
## lm(formula = lpsa ~ lcavol + lweight + svi + lbph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8709 -0.3903 -0.0172 0.5676 1.4227
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.32592 0.77998 -0.418 0.6775
## lcavol 0.50552 0.09256 5.461 8.85e-07 ***
## lweight 0.53883 0.22071 2.441 0.0175 *
## svi 0.67185 0.27323 2.459 0.0167 *
## lbph 0.14001 0.07041 1.988 0.0512 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7275 on 62 degrees of freedom
## Multiple R-squared: 0.6592, Adjusted R-squared: 0.6372
## F-statistic: 29.98 on 4 and 62 DF, p-value: 6.911e-14
summary(om7)
##
## Call:
## lm(formula = lpsa ~ lcavol + lweight + svi + lbph + pgg45 + lcp +
## age)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.65425 -0.34471 -0.05615 0.44380 1.48952
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.259062 1.025170 0.253 0.8014
## lcavol 0.573930 0.105069 5.462 9.88e-07 ***
## lweight 0.619209 0.218560 2.833 0.0063 **
## svi 0.741781 0.294451 2.519 0.0145 *
## lbph 0.144426 0.069812 2.069 0.0430 *
## pgg45 0.008945 0.004099 2.182 0.0331 *
## lcp -0.205417 0.109424 -1.877 0.0654 .
## age -0.019480 0.013105 -1.486 0.1425
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7064 on 59 degrees of freedom
## Multiple R-squared: 0.6943, Adjusted R-squared: 0.658
## F-statistic: 19.14 on 7 and 59 DF, p-value: 4.496e-13
detach(prostateTraining)
prostateTest <- read_excel("MARData.xlsx", sheet = "prostateTest")
attach(prostateTest)
head(prostateTest)
## # A tibble: 6 x 10
## original_case lcavol lweight age lbph svi lcp gleason pgg45 lpsa
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 7 0.737 3.47 64 0.615 0 -1.39 6 0 0.765
## 2 9 -0.777 3.54 47 -1.39 0 -1.39 6 0 1.05
## 3 10 0.223 3.24 63 -1.39 0 -1.39 6 0 1.05
## 4 15 1.21 3.44 57 -1.39 0 -0.431 7 5 1.40
## 5 22 2.06 3.50 60 1.47 0 1.35 7 20 1.66
## 6 25 0.385 3.67 69 1.60 0 -1.39 6 0 1.73
#Regression output on page 247
om2 <- lm(lpsa~lcavol+lweight)
summary(om2)
##
## Call:
## lm(formula = lpsa ~ lcavol + lweight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.3061 -0.4442 -0.0595 0.3221 1.5651
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7354 0.9572 0.768 0.449
## lcavol 0.7478 0.1294 5.778 3.81e-06 ***
## lweight 0.1968 0.2473 0.796 0.433
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.721 on 27 degrees of freedom
## Multiple R-squared: 0.5542, Adjusted R-squared: 0.5212
## F-statistic: 16.78 on 2 and 27 DF, p-value: 1.833e-05
om4 <- lm(lpsa~lcavol+lweight+svi+lbph)
summary(om4)
##
## Call:
## lm(formula = lpsa ~ lcavol + lweight + svi + lbph)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.08087 -0.40229 -0.05645 0.49322 1.01647
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.52957 0.93066 0.569 0.5744
## lcavol 0.59555 0.12655 4.706 7.98e-05 ***
## lweight 0.26215 0.24492 1.070 0.2947
## svi 0.95051 0.32214 2.951 0.0068 **
## lbph -0.05337 0.09237 -0.578 0.5686
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6445 on 25 degrees of freedom
## Multiple R-squared: 0.6703, Adjusted R-squared: 0.6175
## F-statistic: 12.7 on 4 and 25 DF, p-value: 8.894e-06
om7 <- lm(lpsa~lcavol+lweight+svi+lbph+pgg45+lcp+age)
summary(om7)
##
## Call:
## lm(formula = lpsa ~ lcavol + lweight + svi + lbph + pgg45 + lcp +
## age)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.14292 -0.36227 0.04168 0.48056 0.99825
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.873329 1.490194 0.586 0.56381
## lcavol 0.481237 0.165881 2.901 0.00828 **
## lweight 0.313601 0.257112 1.220 0.23549
## svi 0.619278 0.423109 1.464 0.15744
## lbph -0.090696 0.121368 -0.747 0.46281
## pgg45 0.001316 0.006370 0.207 0.83819
## lcp 0.180850 0.166970 1.083 0.29048
## age -0.004958 0.022220 -0.223 0.82550
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6589 on 22 degrees of freedom
## Multiple R-squared: 0.6967, Adjusted R-squared: 0.6001
## F-statistic: 7.218 on 7 and 22 DF, p-value: 0.0001546
detach(prostateTest)
prostateTraining <- read_excel("MARData.xlsx", sheet = "prostateTraining")
attach(prostateTraining)
head(prostateTraining)
## # A tibble: 6 x 10
## original_case lcavol lweight age lbph svi lcp gleason pgg45 lpsa
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 -0.580 2.77 50 -1.39 0 -1.39 6 0 -0.431
## 2 2 -0.994 3.32 58 -1.39 0 -1.39 6 0 -0.163
## 3 3 -0.511 2.69 74 -1.39 0 -1.39 7 20 -0.163
## 4 4 -1.20 3.28 58 -1.39 0 -1.39 6 0 -0.163
## 5 5 0.751 3.43 62 -1.39 0 -1.39 6 0 0.372
## 6 6 -1.05 3.23 50 -1.39 0 -1.39 6 0 0.765
#Figure 7.9 on page 249
X <- cbind(lcavol,lweight,age,lbph,svi,lcp,gleason,pgg45)
library(leaps)
b <- regsubsets(as.matrix(X),lpsa)
rs <- summary(b)
par(mfrow = c(1,2))
library(car)
#subsets(b,statistic = c("adjr2"),main = "With Case 45",min.size = 1,max.size = 5,cex.subsets = 0.7)
m2 <- update(m1, subset = (1:67)[-c(45)])
lcavol1 <- lcavol[-c(45)]
lweight1 <- lweight[-c(45)]
age1 <- age[-c(45)]
lbph1 <- lbph[-c(45)]
svi1 <- svi[-c(45)]
lcp1 <- lcp[-c(45)]
gleason1 <- gleason[-c(45)]
pgg451 <- pgg45[-c(45)]
X <- cbind(lcavol1,lweight1,age1,lbph1,svi1,lcp1,gleason1,pgg451)
library(leaps)
b <- regsubsets(as.matrix(X),lpsa[-c(45)])
rs <- summary(b)
library(car)
subsets(b,statistic = c("adjr2"),main = "Without Case 45",min.size = 1,max.size = 5,cex.subsets = 0.7,legend = FALSE)
## Abbreviation
## lcavol1 lcv1
## lweight1 lw1
## age1 a
## lbph1 lb1
## svi1 s
## lcp1 lcp1
## gleason1 g
## pgg451 p
detach(prostateTraining)
prostateAlldata <- read_excel("MARData.xlsx", sheet = "prostateAlldata")
attach(prostateAlldata)
head(prostateAlldata)
## # A tibble: 6 x 11
## case lcavol lweight age lbph svi lcp gleason pgg45 lpsa train
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 1 -0.580 2.77 50 -1.39 0 -1.39 6 0 -0.431 T
## 2 2 -0.994 3.32 58 -1.39 0 -1.39 6 0 -0.163 T
## 3 3 -0.511 2.69 74 -1.39 0 -1.39 7 20 -0.163 T
## 4 4 -1.20 3.28 58 -1.39 0 -1.39 6 0 -0.163 T
## 5 5 0.751 3.43 62 -1.39 0 -1.39 6 0 0.372 T
## 6 6 -1.05 3.23 50 -1.39 0 -1.39 6 0 0.765 T
#Figure 7.10 on page 249
# par(mfrow = c(1,1))
# plot(lweight[train == FALSE],lpsa[train == FALSE],pch = 2,col = 1,xlab = "lweight",ylab = "lpsa",ylim = c(-1,6),xlim = c(2,6.5))
# abline(lsfit(lweight[train == FALSE],lpsa[train == FALSE]),lty = 1,col = 1)
# points(lweight[train == TRUE],lpsa[train == TRUE],pch = 3,col = 2)
# abline(lsfit(lweight[train == TRUE],lpsa[train == TRUE]),lty = 2,col = 2)
# legend(4.5,2,legend = c("Training","Test"),pch = 3:2,col = 2:1,title = "Data Set")
#
# detach(prostateAlldata)
# prostateTest <- read_excel("MARData.xlsx", sheet = "prostateTest")
# attach(prostateTest)
# head(prostateTest)
#
# #Figure 7.11 on page 250
# m1 <- lm(lpsa~lcavol+lweight+age+lbph+svi+lcp+gleason+pgg45)
# library(car)
# par(mfrow = c(1,1))
# avp(m1,variable = lweight,ask = FALSE,identify.points = TRUE, main = "")
#
# detach(prostateTest)
#################EXERCISES
#Exercise 7.5.1
library(alr3)
data(mantel)
attach(mantel)
head(mantel)
## Y X1 X2 X3
## 1 5 1 1004 6.0
## 2 6 200 806 7.3
## 3 8 -50 1058 11.0
## 4 9 909 100 13.0
## 5 11 506 505 13.1
#Output from R: correlations on page 253
X <- cbind(X1,X2,X3)
cor(X)
## X1 X2 X3
## X1 1.0000000 -0.9999887 0.6858141
## X2 -0.9999887 1.0000000 -0.6826107
## X3 0.6858141 -0.6826107 1.0000000
#Figure 7.13 on page 254
library(leaps)
b <- regsubsets(as.matrix(X),Y)
rs <- summary(b)
par(mfrow = c(1,1))
library(car)
subsets(b,statistic = c("adjr2"),legend = FALSE)
## Abbreviation
## X1 X1
## X2 X2
## X3 X3
#Table 7.4 on page 254
rs$adjr2
## [1] 0.8764847 1.0000000 1.0000000
om1 <- lm(Y~X3)
om2 <- lm(Y~X1+X2)
om3 <- lm(Y~X1+X2+X3)
#Subset size = 1
n <- length(om1$residuals)
npar <- length(om1$coefficients) +1
#Calculate AIC
extractAIC(om1,k = 2)
## [1] 2.0000000 -0.3087485
#Calculate BIC
extractAIC(om1,k = log(n))
## [1] 2.000000 -1.089873
#Subset size = 2
npar <- length(om2$coefficients) +1
#Calculate AIC
extractAIC(om2,k = 2)
## [1] 3.0000 -287.7494
#Calculate BIC
extractAIC(om2,k = log(n))
## [1] 3.0000 -288.9211
#Subset size = 3
npar <- length(om3$coefficients) +1
#Calculate AIC
extractAIC(om3,k = 2)
## [1] 4.0000 -285.7684
#Calculate BIC
extractAIC(om3,k = log(n))
## [1] 4.0000 -287.3306
#Forward selection on pages 253 and 254
mint <- lm(Y~1,data = mantel)
forwardAIC <- step(mint,scope = list(lower = ~1,
upper = ~X1+X2+X3),
direction = "forward", data = mantel)
## Start: AIC=9.59
## Y ~ 1
##
## Df Sum of Sq RSS AIC
## + X3 1 20.6879 2.1121 -0.3087
## + X1 1 8.6112 14.1888 9.2151
## + X2 1 8.5064 14.2936 9.2519
## <none> 22.8000 9.5866
##
## Step: AIC=-0.31
## Y ~ X3
##
## Df Sum of Sq RSS AIC
## <none> 2.1121 -0.30875
## + X2 1 0.066328 2.0458 1.53172
## + X1 1 0.064522 2.0476 1.53613
forwardBIC <- step(mint,scope = list(lower = ~1,
upper = ~X1+X2+X3),
direction = "forward", data = mantel,k = log(n))
## Start: AIC=9.2
## Y ~ 1
##
## Df Sum of Sq RSS AIC
## + X3 1 20.6879 2.1121 -1.0899
## + X1 1 8.6112 14.1888 8.4339
## + X2 1 8.5064 14.2936 8.4707
## <none> 22.8000 9.1961
##
## Step: AIC=-1.09
## Y ~ X3
##
## Df Sum of Sq RSS AIC
## <none> 2.1121 -1.08987
## + X2 1 0.066328 2.0458 0.36003
## + X1 1 0.064522 2.0476 0.36444
#Regression output on page 255
summary(om1)
##
## Call:
## lm(formula = Y ~ X3)
##
## Residuals:
## 1 2 3 4 5
## 0.03434 0.13124 -0.43912 -0.82850 1.10203
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.7975 1.3452 0.593 0.5950
## X3 0.6947 0.1282 5.421 0.0123 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8391 on 3 degrees of freedom
## Multiple R-squared: 0.9074, Adjusted R-squared: 0.8765
## F-statistic: 29.38 on 1 and 3 DF, p-value: 0.01232
summary(om2)
##
## Call:
## lm(formula = Y ~ X1 + X2)
##
## Residuals:
## 1 2 3 4 5
## -2.545e-13 2.854e-13 5.287e-14 -2.928e-14 -5.448e-14
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.000e+03 7.388e-11 -1.354e+13 <2e-16 ***
## X1 1.000e+00 7.312e-14 1.368e+13 <2e-16 ***
## X2 1.000e+00 7.339e-14 1.363e+13 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.764e-13 on 2 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 1.492e+26 on 2 and 2 DF, p-value: < 2.2e-16
summary(om3)
##
## Call:
## lm(formula = Y ~ X1 + X2 + X3)
##
## Residuals:
## 1 2 3 4 5
## -2.493e-13 2.924e-13 3.733e-14 -3.893e-14 -4.144e-14
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.000e+03 2.728e-10 -3.665e+12 1.74e-13 ***
## X1 1.000e+00 2.728e-13 3.666e+12 1.74e-13 ***
## X2 1.000e+00 2.727e-13 3.667e+12 1.74e-13 ***
## X3 1.330e-14 2.156e-13 6.200e-02 0.961
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.902e-13 on 1 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 1
## F-statistic: 4.992e+25 on 3 and 1 DF, p-value: 1.04e-13
detach(mantel)
#Exercise 7.5.2
Hald <- read_excel("MARData.xlsx", sheet = "Haldcement")
attach(Hald)
head(Hald)
## # A tibble: 6 x 5
## Y x1 x2 x3 x4
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 78.5 7 26 6 60
## 2 74.3 1 29 15 52
## 3 104. 11 56 8 20
## 4 87.6 11 31 8 47
## 5 95.9 7 52 6 33
## 6 109. 11 55 9 22
#Output from R: correlations on page 256
X <- cbind(x1,x2,x3,x4)
cor(X)
## x1 x2 x3 x4
## x1 1.0000000 0.2285795 -0.8241338 -0.2454451
## x2 0.2285795 1.0000000 -0.1392424 -0.9729550
## x3 -0.8241338 -0.1392424 1.0000000 0.0295370
## x4 -0.2454451 -0.9729550 0.0295370 1.0000000
#Figure 7.14 on page 257
library(leaps)
b <- regsubsets(as.matrix(X),Y)
rs <- summary(b)
par(mfrow = c(1,1))
library(car)
subsets(b,statistic = c("adjr2"),legend = FALSE)
## Abbreviation
## x1 x1
## x2 x2
## x3 x3
## x4 x4
#Table 7.6 on page 257
rs$adjr2
## [1] 0.6449549 0.9744140 0.9764473 0.9735634
om1 <- lm(Y~x4)
om2 <- lm(Y~x1+x2)
om3 <- lm(Y~x1+x2+x4)
om4 <- lm(Y~x1+x2+x3+x4)
#Subset size = 1
n <- length(om1$residuals)
npar <- length(om1$coefficients) +1
#Calculate AIC
extractAIC(om1,k = 2)
## [1] 2.00000 58.85164
#Calculate AICc
extractAIC(om1,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 4.666667 61.518310
#Calculate BIC
extractAIC(om1,k = log(n))
## [1] 2.00000 59.98154
#Subset size = 2
npar <- length(om2$coefficients) +1
#Calculate AIC
extractAIC(om2,k = 2)
## [1] 3.00000 25.41999
#Calculate AICc
extractAIC(om2,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 8.00000 30.41999
#Calculate BIC
extractAIC(om2,k = log(n))
## [1] 3.00000 27.11484
#Subset size = 3
npar <- length(om3$coefficients) +1
#Calculate AIC
extractAIC(om3,k = 2)
## [1] 4.00000 24.97388
#Calculate AICc
extractAIC(om3,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 12.57143 33.54531
#Calculate BIC
extractAIC(om3,k = log(n))
## [1] 4.00000 27.23368
#Subset size = 4
npar <- length(om4$coefficients) +1
#Calculate AIC
extractAIC(om4,k = 2)
## [1] 5.00000 26.94429
#Calculate AICc
extractAIC(om4,k = 2)+2*npar*(npar+1)/(n-npar-1)
## [1] 19.00000 40.94429
#Calculate BIC
extractAIC(om4,k = log(n))
## [1] 5.00000 29.76903
#Backward elimination based on AIC on page 257
backAIC <- step(om4,direction = "backward", data = Hald)
## Start: AIC=26.94
## Y ~ x1 + x2 + x3 + x4
##
## Df Sum of Sq RSS AIC
## - x3 1 0.1091 47.973 24.974
## - x4 1 0.2470 48.111 25.011
## - x2 1 2.9725 50.836 25.728
## <none> 47.864 26.944
## - x1 1 25.9509 73.815 30.576
##
## Step: AIC=24.97
## Y ~ x1 + x2 + x4
##
## Df Sum of Sq RSS AIC
## <none> 47.97 24.974
## - x4 1 9.93 57.90 25.420
## - x2 1 26.79 74.76 28.742
## - x1 1 820.91 868.88 60.629
#Backward elimination based on BIC on page 258
backBIC <- step(om4,direction = "backward", data = Hald, k = log(n))
## Start: AIC=29.77
## Y ~ x1 + x2 + x3 + x4
##
## Df Sum of Sq RSS AIC
## - x3 1 0.1091 47.973 27.234
## - x4 1 0.2470 48.111 27.271
## - x2 1 2.9725 50.836 27.987
## <none> 47.864 29.769
## - x1 1 25.9509 73.815 32.836
##
## Step: AIC=27.23
## Y ~ x1 + x2 + x4
##
## Df Sum of Sq RSS AIC
## - x4 1 9.93 57.90 27.115
## <none> 47.97 27.234
## - x2 1 26.79 74.76 30.437
## - x1 1 820.91 868.88 62.324
##
## Step: AIC=27.11
## Y ~ x1 + x2
##
## Df Sum of Sq RSS AIC
## <none> 57.90 27.115
## - x1 1 848.43 906.34 60.308
## - x2 1 1207.78 1265.69 64.649
#Forward selection based on AIC on pages 258-259
mint <- lm(Y~1,data = Hald)
forwardAIC <- step(mint,scope = list(lower = ~1,
upper = ~x1+x2+x3+x4),
direction = "forward", data = Hald)
## Start: AIC=71.44
## Y ~ 1
##
## Df Sum of Sq RSS AIC
## + x4 1 1831.90 883.87 58.852
## + x2 1 1809.43 906.34 59.178
## + x1 1 1450.08 1265.69 63.519
## + x3 1 776.36 1939.40 69.067
## <none> 2715.76 71.444
##
## Step: AIC=58.85
## Y ~ x4
##
## Df Sum of Sq RSS AIC
## + x1 1 809.10 74.76 28.742
## + x3 1 708.13 175.74 39.853
## <none> 883.87 58.852
## + x2 1 14.99 868.88 60.629
##
## Step: AIC=28.74
## Y ~ x4 + x1
##
## Df Sum of Sq RSS AIC
## + x2 1 26.789 47.973 24.974
## + x3 1 23.926 50.836 25.728
## <none> 74.762 28.742
##
## Step: AIC=24.97
## Y ~ x4 + x1 + x2
##
## Df Sum of Sq RSS AIC
## <none> 47.973 24.974
## + x3 1 0.10909 47.864 26.944
#Forward selection based on BIC on page 259
forwardBIC <- step(mint,scope = list(lower = ~1,
upper = ~x1+x2+x3+x4),
direction = "forward", data = Hald,k = log(n))
## Start: AIC=72.01
## Y ~ 1
##
## Df Sum of Sq RSS AIC
## + x4 1 1831.90 883.87 59.982
## + x2 1 1809.43 906.34 60.308
## + x1 1 1450.08 1265.69 64.649
## + x3 1 776.36 1939.40 70.197
## <none> 2715.76 72.009
##
## Step: AIC=59.98
## Y ~ x4
##
## Df Sum of Sq RSS AIC
## + x1 1 809.10 74.76 30.437
## + x3 1 708.13 175.74 41.547
## <none> 883.87 59.982
## + x2 1 14.99 868.88 62.324
##
## Step: AIC=30.44
## Y ~ x4 + x1
##
## Df Sum of Sq RSS AIC
## + x2 1 26.789 47.973 27.234
## + x3 1 23.926 50.836 27.987
## <none> 74.762 30.437
##
## Step: AIC=27.23
## Y ~ x4 + x1 + x2
##
## Df Sum of Sq RSS AIC
## <none> 47.973 27.234
## + x3 1 0.10909 47.864 29.769
#Regression output from R on pages 259 and 260
summary(om1)
##
## Call:
## lm(formula = Y ~ x4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.589 -8.228 1.495 4.726 17.524
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 117.5679 5.2622 22.342 1.62e-10 ***
## x4 -0.7382 0.1546 -4.775 0.000576 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.964 on 11 degrees of freedom
## Multiple R-squared: 0.6745, Adjusted R-squared: 0.645
## F-statistic: 22.8 on 1 and 11 DF, p-value: 0.0005762
summary(om2)
##
## Call:
## lm(formula = Y ~ x1 + x2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.893 -1.574 -1.302 1.363 4.048
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.57735 2.28617 23.00 5.46e-10 ***
## x1 1.46831 0.12130 12.11 2.69e-07 ***
## x2 0.66225 0.04585 14.44 5.03e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.406 on 10 degrees of freedom
## Multiple R-squared: 0.9787, Adjusted R-squared: 0.9744
## F-statistic: 229.5 on 2 and 10 DF, p-value: 4.407e-09
library(car)
vif(om2)
## x1 x2
## 1.055129 1.055129
summary(om3)
##
## Call:
## lm(formula = Y ~ x1 + x2 + x4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0919 -1.8016 0.2562 1.2818 3.8982
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.6483 14.1424 5.066 0.000675 ***
## x1 1.4519 0.1170 12.410 5.78e-07 ***
## x2 0.4161 0.1856 2.242 0.051687 .
## x4 -0.2365 0.1733 -1.365 0.205395
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.309 on 9 degrees of freedom
## Multiple R-squared: 0.9823, Adjusted R-squared: 0.9764
## F-statistic: 166.8 on 3 and 9 DF, p-value: 3.323e-08
vif(om3)
## x1 x2 x4
## 1.06633 18.78031 18.94008
summary(om4)
##
## Call:
## lm(formula = Y ~ x1 + x2 + x3 + x4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.1750 -1.6709 0.2508 1.3783 3.9254
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.4054 70.0710 0.891 0.3991
## x1 1.5511 0.7448 2.083 0.0708 .
## x2 0.5102 0.7238 0.705 0.5009
## x3 0.1019 0.7547 0.135 0.8959
## x4 -0.1441 0.7091 -0.203 0.8441
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.446 on 8 degrees of freedom
## Multiple R-squared: 0.9824, Adjusted R-squared: 0.9736
## F-statistic: 111.5 on 4 and 8 DF, p-value: 4.756e-07
vif(om4)
## x1 x2 x3 x4
## 38.49621 254.42317 46.86839 282.51286
detach(Hald)