This shows the outputs from Chapter 6 using R. The textbook is “A Modern Approach to Regression with R” by Simon J. Sheather (2008). The R code provided with the book has been updated.
nyc <- read_excel("MARData.xlsx", sheet = "nyc")
attach(nyc)
head(nyc, 10)
## # A tibble: 10 x 7
## Case Restaurant Price Food Decor Service East
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Daniella Ristorante 43 22 18 20 0
## 2 2 Tello's Ristorante 32 20 19 19 0
## 3 3 Biricchino 34 21 13 18 0
## 4 4 Bottino 41 20 20 17 0
## 5 5 Da Umberto 54 24 19 21 0
## 6 6 Le Madri 52 22 22 21 0
## 7 7 Le Zie 34 22 16 21 0
## 8 8 Pasticcio 34 20 18 21 1
## 9 9 Belluno 39 22 19 22 1
## 10 10 Cinque Terre 44 21 17 19 1
#Figure 6.1 on page 157
pairs(~Food+Decor+Service,data = nyc,gap = 0.4,cex.labels = 1.5)
#Figure 6.2 on page 158
m1 <- lm(Price~Food+Decor+Service+East)
summary(m1)
##
## Call:
## lm(formula = Price ~ Food + Decor + Service + East)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.0465 -3.8837 0.0373 3.3942 17.7491
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -24.023800 4.708359 -5.102 9.24e-07 ***
## Food 1.538120 0.368951 4.169 4.96e-05 ***
## Decor 1.910087 0.217005 8.802 1.87e-15 ***
## Service -0.002727 0.396232 -0.007 0.9945
## East 2.068050 0.946739 2.184 0.0304 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.738 on 163 degrees of freedom
## Multiple R-squared: 0.6279, Adjusted R-squared: 0.6187
## F-statistic: 68.76 on 4 and 163 DF, p-value: < 2.2e-16
StanRes1 <- rstandard(m1)
par(mfrow = c(2,2))
plot(Food,StanRes1, ylab = "Standardized Residuals")
plot(Decor,StanRes1, ylab = "Standardized Residuals")
plot(Service,StanRes1, ylab = "Standardized Residuals")
plot(East,StanRes1, ylab = "Standardized Residuals")
#Figure 6.3 on page 158
par(mfrow = c(1,1))
plot(m1$fitted.values,Price,xlab = "Fitted Values", ylab = "Price")
abline(lsfit(m1$fitted.values,Price))
detach(nyc)
#Figure 6.4 on page 160
library(alr3)
data(caution)
attach(caution)
head(caution, 10)
## x1 x2 y
## 1 0.8880370 0.4537070 0.1560420
## 2 -0.0975793 0.1571440 0.0343178
## 3 -0.2105760 0.9681980 0.0304575
## 4 -0.6783160 -0.6756590 0.8727100
## 5 0.7905930 -0.4961290 0.3411820
## 6 0.5247750 -0.6484660 0.6188230
## 7 -0.4929670 -0.8610200 0.5097080
## 8 0.2334210 -0.7425650 0.1831930
## 9 0.6767580 0.0494377 0.1733210
## 10 -0.4276640 -0.1798130 0.1283770
with(caution, pairs(y~x1+x2))
#Figure 6.5 on page 160
m1 <- with(caution, lm(y~x1+x2))
StanRes1 <- rstandard(m1)
par(mfrow = c(2,2))
plot(x1,StanRes1, ylab = "Standardized Residuals")
plot(x2,StanRes1, ylab = "Standardized Residuals")
plot(m1$fitted.values,StanRes1, ylab = "Standardized Residuals",xlab = "Fitted Values")
#Figure 6.6 on page 161
par(mfrow = c(1,1))
plot(m1$fitted.values, caution$y, xlab = "Fitted Values")
abline(lsfit(m1$fitted.values, caution$y))
detach(caution)
nonlinearx <- read_excel("MARData.xlsx", sheet = "nonlinearx")
attach(nonlinearx)
head(nonlinearx)
## # A tibble: 6 x 3
## y x1 x2
## <dbl> <dbl> <dbl>
## 1 -2.87 -3 -0.232
## 2 -2.91 -2.99 -0.123
## 3 -2.84 -2.98 -0.174
## 4 -2.98 -2.97 -0.133
## 5 -2.94 -2.96 -0.173
## 6 -2.85 -2.95 -0.180
#Figure 6.7 on page 162
par(mfrow = c(2,2))
with(nonlinearx, plot(x1,y))
with(nonlinearx, plot(x2,y))
with(nonlinearx, plot(x1,x2))
#Figure 6.8 on page 163
m1 <- with(nonlinearx, lm(y~x1+x2))
summary(m1)
##
## Call:
## lm(formula = y ~ x1 + x2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.78955 -1.00642 0.04537 0.99198 1.90114
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.54913 0.04287 36.132 <2e-16 ***
## x1 0.99280 0.04389 22.618 <2e-16 ***
## x2 0.00388 0.10570 0.037 0.971
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.051 on 598 degrees of freedom
## Multiple R-squared: 0.7302, Adjusted R-squared: 0.7293
## F-statistic: 809.2 on 2 and 598 DF, p-value: < 2.2e-16
StanRes1 <- rstandard(m1)
par(mfrow = c(2,2))
plot(x1,StanRes1, ylab = "Standardized Residuals")
plot(x2,StanRes1, ylab = "Standardized Residuals")
plot(m1$fitted.values,StanRes1,xlab = "Fitted Values",ylab = "Standardized Residuals")
detach(nonlinearx)
nyc <- read_excel("MARData.xlsx", sheet = "nyc")
attach(nyc)
head(nyc)
## # A tibble: 6 x 7
## Case Restaurant Price Food Decor Service East
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Daniella Ristorante 43 22 18 20 0
## 2 2 Tello's Ristorante 32 20 19 19 0
## 3 3 Biricchino 34 21 13 18 0
## 4 4 Bottino 41 20 20 17 0
## 5 5 Da Umberto 54 24 19 21 0
## 6 6 Le Madri 52 22 22 21 0
#Figure 6.9 on page 165
par(mfrow = c(2,2))
plot(Food,Price)
abline(lsfit(Food,Price))
plot(Decor,Price)
abline(lsfit(Decor,Price))
plot(Service,Price)
abline(lsfit(Service,Price))
plot(East,Price)
abline(lsfit(East,Price))
#Figure 6.10 on page 166
#install.packages("car")
#You will be asked to
#--- Please select a CRAN mirror for use in this session ---
library(car)
m1 <- with(nyc, lm(Price~Food+Decor+Service+East))
par(mfrow = c(2,2))
with(nyc, avPlots(m1,variable = Food,ask = FALSE,identify.points = TRUE))
# Click on the points you wish to identify. When you wish
# to stop click the right mouse button and select "Stop"
with(nyc, avPlots(m1,variable = Decor,ask = FALSE,identify.points = FALSE))
with(nyc,avPlots(m1,variable = Service,ask = FALSE,identify.points = FALSE))
with(nyc, avPlots(m1,variable = East,ask = FALSE,identify.points = FALSE))
detach(nyc)
defects <- read_excel("MARData.xlsx", sheet = "defects")
attach(defects)
head(defects, 10)
## # A tibble: 10 x 5
## Case Temperature Density Rate Defective
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.97 32.1 178. 0.2
## 2 2 2.85 21.1 254. 47.9
## 3 3 2.95 20.6 273. 50.9
## 4 4 2.84 22.5 273. 49.7
## 5 5 1.84 27.4 211. 11
## 6 6 2.05 25.4 236. 15.6
## 7 7 1.5 27.9 219. 5.5
## 8 8 2.48 23.3 239. 37.4
## 9 9 2.23 24.0 252. 27.8
## 10 10 3.02 19.4 282. 58.7
#Figure 6.11 on page 169
pairs(Defective ~ Temperature+Density+Rate)
#Figure 6.12 on page 170
m1 <- lm(Defective ~ Temperature+Density+Rate)
par(mfrow = c(2,2))
StanRes1 <- rstandard(m1)
plot(Temperature,StanRes1,ylab = "Standardized Residuals")
plot(Density,StanRes1,ylab = "Standardized Residuals")
plot(Rate,StanRes1,ylab = "Standardized Residuals")
plot(m1$fitted.values,StanRes1,ylab = "Standardized Residuals",xlab = "Fitted Values")
#Figure 6.13 on page 170
par(mfrow = c(1,1))
fit1 <- m1$fitted.values
m2 <- lm(Defective~fit1 + I(fit1^2))
plot(fit1,Defective,xlab = "Fitted Values")
fitnew <- seq(-15,60,len = 76)
lines(fitnew,predict(m2,newdata = data.frame(fit1 = fitnew)))
abline(lsfit(m1$fitted.values,Defective),lty = 2)
#Figure 6.14 on page 171
library(alr3)
inverseResponsePlot(m1,key = TRUE)
## lambda RSS
## 1 0.4359947 541.9376
## 2 -1.0000000 6465.6551
## 3 0.0000000 1572.7444
## 4 1.0000000 1156.2704
#Figure 6.15 on page 173
library(MASS)
boxcox(m1,lambda = seq(0.3,0.65,length = 20))
#Figure 6.16 on page 173
par(mfrow = c(2,2))
plot(Temperature,sqrt(Defective),ylab = expression(sqrt(Defective)))
plot(Density,sqrt(Defective),ylab = expression(sqrt(Defective)))
plot(Rate,sqrt(Defective),ylab = expression(sqrt(Defective)))
#Figure 6.17 on page 174
mt <- lm(sqrt(Defective) ~ Temperature+Density+Rate)
par(mfrow = c(2,2))
StanRest <- rstandard(mt)
plot(Temperature,StanRest,ylab = "Standardized Residuals")
plot(Density,StanRest,ylab = "Standardized Residuals")
plot(Rate,StanRest,ylab = "Standardized Residuals")
plot(mt$fitted.values,StanRest,ylab = "Standardized Residuals",xlab = "Fitted Values")
#Figure 6.18 on page 174
par(mfrow = c(1,1))
plot(mt$fitted.values,sqrt(Defective),xlab = "Fitted Values",ylab = expression(sqrt(Defective)))
abline(lsfit(mt$fitted.values,sqrt(Defective)))
#Figure 6.19 on page 175
par(mfrow = c(2,2))
plot(mt)
#Regression output on page 175
summary(mt)
##
## Call:
## lm(formula = sqrt(Defective) ~ Temperature + Density + Rate)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.10147 -0.28502 -0.07716 0.34139 1.13951
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.59297 5.26401 1.062 0.2978
## Temperature 1.56516 0.66226 2.363 0.0259 *
## Density -0.29166 0.11954 -2.440 0.0218 *
## Rate 0.01290 0.01043 1.237 0.2273
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5677 on 26 degrees of freedom
## Multiple R-squared: 0.943, Adjusted R-squared: 0.9365
## F-statistic: 143.5 on 3 and 26 DF, p-value: 2.713e-16
#Figure 6.20 on page 176
library(car)
par(mfrow = c(2,2))
avPlots(mt,variable = Temperature,ask = FALSE,identify.points = FALSE)
avPlots(mt,variable = Density,ask = FALSE,identify.points = FALSE)
avPlots(mt,variable = Rate,ask = FALSE,identify.points = FALSE)
detach(defects)
magazines <- read_excel("MARData.xlsx", sheet = "magazines")
attach(magazines)
head(magazines)
## # A tibble: 6 x 5
## Magazine AdRevenue AdPages SubRevenue NewsRevenue
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Weekly World News 2280 300 854 16568
## 2 National Examiner 3382 380 968 27215
## 3 J-14 4218 250 2206 12453
## 4 Soap Opera Weekly 4622 439 5555 24282
## 5 Easyriders 5121 524. 4155 9929
## 6 Mary Engelbreit's Home Companion 5259 189 9048 4363
#R output on page 177
library(alr3)
summary(powerTransform(AdRevenue~AdPages+SubRevenue+NewsRevenue))
## bcPower Transformation to Normality
## Est Power Rounded Pwr Wald Lwr Bnd Wald Upr Bnd
## Y1 0.213 0.21 0.1074 0.3186
##
## Likelihood ratio test that transformation parameter is equal to 0
## (log transformation)
## LRT df pval
## LR test, lambda = (0) 14.88858 1 0.00011405
##
## Likelihood ratio test that no transformation is needed
## LRT df pval
## LR test, lambda = (1) 244.9774 1 < 2.22e-16
#Figure 6.21 on page 178
pairs(AdRevenue~AdPages+SubRevenue+NewsRevenue)
#Figure 6.22 on page 179
tAdPages<- log(AdPages)
tSubRevenue <- log(SubRevenue)
tNewsRevenue <- log(NewsRevenue)
m1 <- lm(AdRevenue~log(AdPages)+log(SubRevenue)+log(NewsRevenue))
library(alr3)
par(mfrow = c(1,1))
inverseResponsePlot(m1,key = TRUE)
## lambda RSS
## 1 0.2308236 245941272773
## 2 -1.0000000 892293703420
## 3 0.0000000 279043725925
## 4 1.0000000 521866609845
#R output on page 179
library(alr3)
summary(tranxy <- powerTransform(AdRevenue ~ AdPages+SubRevenue+NewsRevenue))
## bcPower Transformation to Normality
## Est Power Rounded Pwr Wald Lwr Bnd Wald Upr Bnd
## Y1 0.213 0.21 0.1074 0.3186
##
## Likelihood ratio test that transformation parameter is equal to 0
## (log transformation)
## LRT df pval
## LR test, lambda = (0) 14.88858 1 0.00011405
##
## Likelihood ratio test that no transformation is needed
## LRT df pval
## LR test, lambda = (1) 244.9774 1 < 2.22e-16
#Figure 6.23 on page 180
pairs(log(AdRevenue)~log(AdPages)+log(SubRevenue)+log(NewsRevenue))
#Figure 6.24 on page 181
m2 <- lm(log(AdRevenue)~log(AdPages)+log(SubRevenue)+log(NewsRevenue))
par(mfrow = c(2,2))
StanRes2 <- rstandard(m2)
plot(log(AdPages),StanRes2,ylab = "Standardized Residuals")
plot(log(SubRevenue),StanRes2,ylab = "Standardized Residuals")
plot(log(NewsRevenue),StanRes2,ylab = "Standardized Residuals")
plot(m2$fitted.values,StanRes2,ylab = "Standardized Residuals",xlab = "Fitted Values")
#Figure 6.25 on page 181
par(mfrow = c(1,1))
plot(m2$fitted.values,log(AdRevenue),xlab = "Fitted Values")
abline(lsfit(m2$fitted.values,log(AdRevenue)))
#Figure 6.26 on page 182
par(mfrow = c(2,2))
plot(m2)
abline(v = 2*4/204,lty = 2)
#Figure 6.27 on page 183
library(car)
par(mfrow = c(2,2))
avPlots(m2,variable = log(AdPages),ask = FALSE,identify.points = FALSE)
avPlots(m2,variable = log(SubRevenue),ask = FALSE,identify.points = FALSE)
avPlots(m2,variable = log(NewsRevenue),ask = FALSE,identify.points = FALSE)
#Regression output on page 183
summary(m2)
##
## Call:
## lm(formula = log(AdRevenue) ~ log(AdPages) + log(SubRevenue) +
## log(NewsRevenue))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.29980 -0.25827 0.04816 0.33238 0.85795
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.02894 0.41407 -4.900 1.98e-06 ***
## log(AdPages) 1.02918 0.05564 18.497 < 2e-16 ***
## log(SubRevenue) 0.55849 0.03159 17.677 < 2e-16 ***
## log(NewsRevenue) 0.04109 0.02414 1.702 0.0903 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4483 on 200 degrees of freedom
## Multiple R-squared: 0.8326, Adjusted R-squared: 0.8301
## F-statistic: 331.6 on 3 and 200 DF, p-value: < 2.2e-16
detach(magazines)
circulation <- read_excel("MARData.xlsx", sheet = "circulation")
attach(circulation)
head(circulation)
## # A tibble: 6 x 4
## Newspaper Sunday Weekday Competitor
## <chr> <dbl> <dbl> <dbl>
## 1 Akron (OH) Beacon Journal 185915 134401 0
## 2 Albuquerque (NM) Journal 154413 109693 0
## 3 Allentown (PA) Morning Call 165607 111594 0
## 4 Atlanta (GA) Journal-Constitution 622065 371853 0
## 5 Austin (TX) American-Statesman 233767 183312 0
## 6 Baltimore (MD) Sun 465807 301186 0
#Figure 6.28 on page 185
par(mfrow = c(1,1))
plot(log(Weekday),log(Sunday),xlab = "log(Weekday Circulation)",ylab = "log(Sunday Circulation)",
pch = Competitor+1,col = Competitor+1)
legend(11.6, 14.1,legend = c("0","1"),pch = 1:2,col = 1:2,title = "Tabloid dummy variable")
#Figure 6.29 on page 186
m1 <- lm(log(Sunday) ~ log(Weekday) + Competitor)
par(mfrow = c(2,2))
StanRes1 <- rstandard(m1)
plot(log(Weekday),StanRes1,ylab = "Standardized Residuals",xlab = "log(Sunday Circulation)")
plot(Competitor,StanRes1,ylab = "Standardized Residuals")
plot(m1$fitted.values,StanRes1,ylab = "Standardized Residuals",xlab = "Fitted Values")
#Figure 6.30 on page 186
par(mfrow = c(1,1))
plot(m1$fitted.values,log(Sunday),xlab = "Fitted Values",ylab = "log(Sunday Circulation)")
abline(lsfit(m1$fitted.values,log(Sunday)))
#Figure 6.31 on page 187
par(mfrow = c(2,2))
plot(m1)
abline(v = 2*3/89,lty = 2)
#Regression output on page 188
summary(m1)
##
## Call:
## lm(formula = log(Sunday) ~ log(Weekday) + Competitor)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27550 -0.10175 -0.00198 0.08758 0.34881
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.44730 0.35138 -1.273 0.206
## log(Weekday) 1.06133 0.02848 37.270 < 2e-16 ***
## Competitor -0.53137 0.06800 -7.814 1.26e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1392 on 86 degrees of freedom
## Multiple R-squared: 0.9427, Adjusted R-squared: 0.9413
## F-statistic: 706.8 on 2 and 86 DF, p-value: < 2.2e-16
#R output on page 188
predict(m1,newdata = data.frame(
Weekday = c(210000),Competitor = c(1)),interval = "prediction",level = 0.95)
## fit lwr upr
## 1 12.02778 11.72066 12.33489
predict(m1,newdata = data.frame(
Weekday = c(210000),Competitor = c(0)),interval = "prediction",level = 0.95)
## fit lwr upr
## 1 12.55915 12.28077 12.83753
#Figure 6.32 on page 189
library(car)
par(mfrow = c(1,2))
avPlots(m1,variable = log(Weekday),ask = FALSE,identify.points = FALSE)
avPlots(m1,variable = Competitor,ask = FALSE,identify.points = FALSE)
detach(circulation)
profsalary <- read_excel("MARData.xlsx", sheet = "profsalary")
attach(profsalary)
head(profsalary)
## # A tibble: 6 x 3
## Case Salary Experience
## <dbl> <dbl> <dbl>
## 1 1 71 26
## 2 2 69 19
## 3 3 73 22
## 4 4 69 17
## 5 5 65 13
## 6 6 75 25
#Figure 6.33 on page 190
library(alr3)
m1 <- lm(Salary~Experience)
par(mfrow = c(1,1))
mmp(m1,Experience,xlab = "Years of Experience",key = NULL)
#Figure 6.34 on page 191
m2 <- lm(Salary~Experience + I(Experience^2))
mmp(m2,Experience,xlab = "Years of Experience",key = NULL)
detach(profsalary)
defects <- read_excel("MARData.xlsx", sheet = "defects")
attach(defects)
head(defects)
## # A tibble: 6 x 5
## Case Temperature Density Rate Defective
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.97 32.1 178. 0.2
## 2 2 2.85 21.1 254. 47.9
## 3 3 2.95 20.6 273. 50.9
## 4 4 2.84 22.5 273. 49.7
## 5 5 1.84 27.4 211. 11
## 6 6 2.05 25.4 236. 15.6
#Figure 6.35 on page 193
m1 <- lm(Defective ~ Temperature+Density+Rate)
loessfit1 <- loess(Defective ~ Temperature,degree = 1,span = 2/3)
loessfit2 <- loess(m1$fitted.values ~ Temperature,degree = 1,span = 2/3)
xx <- seq(min(Temperature),max(Temperature),length = 100)
par(mfrow = c(1,2))
plot(Temperature,Defective,xlab = "Temperature, x1", ylab = "Defective, Y")
lines(xx,predict(loessfit1,data.frame(Temperature = xx)))
plot(Temperature,m1$fitted.values,ylab = expression(hat(Y)),xlab = "Temperature, x1")
lines(xx,predict(loessfit2,data.frame(Temperature = xx)))
#Figure 6.36 on page 193
library(alr3)
par(mfrow = c(1,1))
mmp(m1,Temperature)
#Notice that the figure produced has a legend box in the upper left hand corner, that is
#not in the version of Figure 6.36 in the published book. The reason for this is that
#the figure in the book was produced using an earlier version of the alr3 function 'mmp'.
#Figure 6.37 on page 194
par(mfrow = c(2,2))
mmp(m1,Temperature)
mmp(m1,Density,key = "topright")
mmp(m1,Rate)
mmp(m1,m1$fitted.values,xlab = "Fitted Values")
#Figure 6.38 on page 195
m2 <- lm(sqrt(Defective) ~ Temperature+Density+Rate)
par(mfrow = c(2,2))
mmp(m2,Temperature)
mmp(m2,Density,key = "topright")
mmp(m2,Rate)
mmp(m2,m2$fitted.values,xlab = "Fitted Values")
detach(defects)
bridge <- read_excel("MARData.xlsx", sheet = "bridge")
attach(bridge)
head(bridge)
## # A tibble: 6 x 7
## Case Time DArea CCost Dwgs Length Spans
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 78.8 3.6 82.4 6 90 1
## 2 2 310. 5.33 422. 12 126 2
## 3 3 184. 6.29 180. 9 78 1
## 4 4 69.6 2.2 100 5 60 1
## 5 5 68.8 1.44 103 5 60 1
## 6 6 95.7 5.4 134. 5 60 1
#R output on page 196
library(alr3)
summary(tranxy <- powerTransform(Time ~ DArea+CCost+Dwgs+Length+Spans))
## bcPower Transformation to Normality
## Est Power Rounded Pwr Wald Lwr Bnd Wald Upr Bnd
## Y1 0.0691 0 -0.3238 0.4619
##
## Likelihood ratio test that transformation parameter is equal to 0
## (log transformation)
## LRT df pval
## LR test, lambda = (0) 0.117711 1 0.73153
##
## Likelihood ratio test that no transformation is needed
## LRT df pval
## LR test, lambda = (1) 21.73224 1 3.1348e-06
#Figure 6.39 page 197
pairs(Time~DArea+CCost+Dwgs+Length+Spans,data = bridge,cex.labels = 1.4)
#Figure 6.40 page 198
pairs(log(Time)~log(DArea)+log(CCost)+log(Dwgs)+log(Length)+log(Spans),data = bridge)
#Figure 6.41 page 199
m1 <- lm(log(Time)~log(DArea)+log(CCost)+log(Dwgs)+log(Length)+log(Spans))
StanRes1 <- rstandard(m1)
par(mfrow = c(2,3))
plot(log(DArea),StanRes1, ylab = "Standardized Residuals")
plot(log(CCost),StanRes1, ylab = "Standardized Residuals")
plot(log(Dwgs),StanRes1, ylab = "Standardized Residuals")
plot(log(Length),StanRes1, ylab = "Standardized Residuals")
plot(log(Spans),StanRes1, ylab = "Standardized Residuals")
plot(m1$fitted.values,StanRes1, ylab = "Standardized Residuals",xlab = "Fitted values")
#Figure 6.42 page 199
par(mfrow = c(1,1))
plot(m1$fitted.values,log(Time),xlab = "Fitted Values")
abline(lsfit(m1$fitted.values,log(Time)))
#Figure 6.43 page 200
par(mfrow = c(2,2))
plot(m1)
abline(v = 2*6/45,lty = 2)
#Regression output on page 200
summary(m1)
##
## Call:
## lm(formula = log(Time) ~ log(DArea) + log(CCost) + log(Dwgs) +
## log(Length) + log(Spans))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.68394 -0.17167 -0.02604 0.23157 0.67307
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.28590 0.61926 3.691 0.000681 ***
## log(DArea) -0.04564 0.12675 -0.360 0.720705
## log(CCost) 0.19609 0.14445 1.358 0.182426
## log(Dwgs) 0.85879 0.22362 3.840 0.000440 ***
## log(Length) -0.03844 0.15487 -0.248 0.805296
## log(Spans) 0.23119 0.14068 1.643 0.108349
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3139 on 39 degrees of freedom
## Multiple R-squared: 0.7762, Adjusted R-squared: 0.7475
## F-statistic: 27.05 on 5 and 39 DF, p-value: 1.043e-11
#Figure 6.44 page 201
library(alr3)
mmps(m1,layout = c(2,3))
#R output on page 202
logDArea <- log(DArea)
logCCost <- log(CCost)
logDwgs <- log(Dwgs)
logLength <- log(Length)
logSpans <- log(Spans)
X <- cbind(logDArea,logCCost,logDwgs,logLength,logSpans)
c <- cor(X)
round(c,3)
## logDArea logCCost logDwgs logLength logSpans
## logDArea 1.000 0.909 0.801 0.884 0.782
## logCCost 0.909 1.000 0.831 0.890 0.775
## logDwgs 0.801 0.831 1.000 0.752 0.630
## logLength 0.884 0.890 0.752 1.000 0.858
## logSpans 0.782 0.775 0.630 0.858 1.000
#Figure 6.45 on page 202
library(car)
par(mfrow = c(2,3))
avPlots(m1,variable = log(DArea),ask = FALSE,identify.points = FALSE)
avPlots(m1,variable = log(CCost),ask = FALSE,identify.points = FALSE)
avPlots(m1,variable = log(Dwgs),ask = FALSE,identify.points = FALSE)
avPlots(m1,variable = log(Length),ask = FALSE,identify.points = FALSE)
avPlots(m1,variable = log(Spans),ask = FALSE,identify.points = FALSE)
#R output on page 203
library(car)
vif(m1)
## log(DArea) log(CCost) log(Dwgs) log(Length) log(Spans)
## 7.164619 8.483522 3.408900 8.014174 3.878397
detach(bridge)
Bordeaux <- read_excel("MARData.xlsx", sheet = "Bordeaux")
attach(Bordeaux)
head(Bordeaux)
## # A tibble: 6 x 9
## Wine Price ParkerPoints CoatesPoints P95andAbove FirstGrowth CultWine Pomerol
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Lafi~ 2850 100 19.5 1 1 0 0
## 2 Lato~ 2850 98 18.5 1 1 0 0
## 3 Marg~ 2900 100 19.5 1 1 0 0
## 4 Mout~ 2500 97 17 1 1 0 0
## 5 Haut~ 2500 98 18.5 1 1 0 0
## 6 Chev~ 3650 100 19.5 1 1 0 0
## # ... with 1 more variable: VintageSuperstar <dbl>
#Figure 6.46 on page 205
m1 <- lm(log(Price)~log(ParkerPoints)+log(CoatesPoints)+P95andAbove+FirstGrowth+CultWine+Pomerol+VintageSuperstar)
StanRes1 <- rstandard(m1)
par(mfrow = c(3,3))
plot(log(ParkerPoints),StanRes1, ylab = "Standardized Residuals")
plot(log(CoatesPoints),StanRes1, ylab = "Standardized Residuals")
boxplot(StanRes1~P95andAbove,ylab = "Standardized Residuals",xlab = "P95andAbove")
boxplot(StanRes1~FirstGrowth,ylab = "Standardized Residuals",xlab = "FirstGrowth")
boxplot(StanRes1~CultWine, ylab = "Standardized Residuals",xlab = "CultWine")
boxplot(StanRes1~Pomerol, ylab = "Standardized Residuals",xlab = "Pomerol")
boxplot(StanRes1~VintageSuperstar, ylab = "Standardized Residuals",xlab = "VintageSuperstar")
plot(m1$fitted.values,StanRes1, ylab = "Standardized Residuals",xlab = "Fitted values")
#Figure 6.47 on page 205
par(mfrow = c(1,1))
plot(m1$fitted.values,log(Price),xlab = "Fitted Values")
abline(lsfit(m1$fitted.values,log(Price)))
#Figure 6.48 on page 206
par(mfrow = c(2,2))
plot(m1)
abline(v = 2*8/72,lty = 2)
#Regression output on page 206
summary(m1)
##
## Call:
## lm(formula = log(Price) ~ log(ParkerPoints) + log(CoatesPoints) +
## P95andAbove + FirstGrowth + CultWine + Pomerol + VintageSuperstar)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7379 -0.2008 -0.0111 0.1989 0.6784
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -51.14156 8.98557 -5.692 3.39e-07 ***
## log(ParkerPoints) 11.58862 2.06763 5.605 4.74e-07 ***
## log(CoatesPoints) 1.62053 0.61154 2.650 0.01013 *
## P95andAbove 0.10055 0.13697 0.734 0.46556
## FirstGrowth 0.86970 0.12524 6.944 2.33e-09 ***
## CultWine 1.35317 0.14569 9.288 1.78e-13 ***
## Pomerol 0.53644 0.09366 5.727 2.95e-07 ***
## VintageSuperstar 0.61590 0.22067 2.791 0.00692 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2883 on 64 degrees of freedom
## Multiple R-squared: 0.9278, Adjusted R-squared: 0.9199
## F-statistic: 117.5 on 7 and 64 DF, p-value: < 2.2e-16
#R output on page 206
library(car)
vif(m1)
## log(ParkerPoints) log(CoatesPoints) P95andAbove FirstGrowth
## 5.825135 1.410011 4.012792 1.625091
## CultWine Pomerol VintageSuperstar
## 1.188243 1.124300 1.139201
#Figure 6.49 on page 207
library(alr3)
par(mfrow = c(2,2))
mmp(m1,log(ParkerPoints))
mmp(m1,log(CoatesPoints))
mmp(m1,m1$fitted.values,xlab = "Fitted Values")
#Figure 6.50 on page 208
library(car)
par(mfrow = c(2,4))
avPlots(m1,variable = log(ParkerPoints),ask = FALSE,identify.points = TRUE, main = "")
avPlots(m1,variable = log(CoatesPoints),ask = FALSE,identify.points = TRUE, main = "")
avPlots(m1,variable = P95andAbove,ask = FALSE,identify.points = FALSE, main = "")
avPlots(m1,variable = FirstGrowth,ask = FALSE,identify.points = FALSE, main = "")
avPlots(m1,variable = CultWine,ask = FALSE,identify.points = FALSE, main = "")
avPlots(m1,variable = Pomerol,ask = FALSE,identify.points = FALSE, main = "")
avPlots(m1,variable = VintageSuperstar,ask = FALSE,identify.points = FALSE, main = "")
#Regression output on pages 208-209
m2 <- lm(log(Price)~log(ParkerPoints)+log(CoatesPoints)+FirstGrowth+CultWine+Pomerol+VintageSuperstar)
summary(m2)
##
## Call:
## lm(formula = log(Price) ~ log(ParkerPoints) + log(CoatesPoints) +
## FirstGrowth + CultWine + Pomerol + VintageSuperstar)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.70185 -0.19752 -0.03061 0.19347 0.70118
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -56.47547 5.26798 -10.721 5.20e-16 ***
## log(ParkerPoints) 12.78432 1.26915 10.073 6.66e-15 ***
## log(CoatesPoints) 1.60447 0.60898 2.635 0.01052 *
## FirstGrowth 0.86149 0.12430 6.931 2.30e-09 ***
## CultWine 1.33601 0.14330 9.323 1.34e-13 ***
## Pomerol 0.53619 0.09333 5.745 2.64e-07 ***
## VintageSuperstar 0.59470 0.21800 2.728 0.00819 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2873 on 65 degrees of freedom
## Multiple R-squared: 0.9272, Adjusted R-squared: 0.9205
## F-statistic: 138 on 6 and 65 DF, p-value: < 2.2e-16
anova(m2,m1)
## Analysis of Variance Table
##
## Model 1: log(Price) ~ log(ParkerPoints) + log(CoatesPoints) + FirstGrowth +
## CultWine + Pomerol + VintageSuperstar
## Model 2: log(Price) ~ log(ParkerPoints) + log(CoatesPoints) + P95andAbove +
## FirstGrowth + CultWine + Pomerol + VintageSuperstar
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 65 5.3643
## 2 64 5.3195 1 0.044794 0.5389 0.4656
detach(Bordeaux)
storks <- read_excel("MARData.xlsx", sheet = "storks")
attach(storks)
head(storks)
## # A tibble: 6 x 4
## County Women Storks Babies
## <dbl> <dbl> <dbl> <dbl>
## 1 1 1 2 10
## 2 2 1 2 15
## 3 3 1 2 20
## 4 4 1 3 10
## 5 5 1 3 15
## 6 6 1 3 20
#Figure 6.51 on page 211
par(mfrow = c(1,1))
plot(Storks,Babies,xlab = "Number of Storks",ylab = "Number of Babies")
abline(lsfit(Storks,Babies))
#Regression output on page 212
m1 <- lm(Babies ~ Storks,data = storks)
summary(m1)
##
## Call:
## lm(formula = Babies ~ Storks, data = storks)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.280 -3.872 0.061 3.720 11.402
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.3293 2.3225 1.864 0.068 .
## Storks 3.6585 0.3475 10.528 1.71e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.451 on 52 degrees of freedom
## Multiple R-squared: 0.6807, Adjusted R-squared: 0.6745
## F-statistic: 110.8 on 1 and 52 DF, p-value: 1.707e-14
#Figure 6.52 on page 212
par(mfrow = c(2,2))
plot(Storks,Babies,xlab = "Number of Storks",ylab = "Number of Babies")
abline(lsfit(Storks,Babies))
plot(Women,Babies,xlab = "Number of Women",ylab = "Number of Babies")
abline(lsfit(Women,Babies))
plot(Storks,Women,xlab = "Number of Storks",ylab = "Number of Women")
abline(lsfit(Storks,Women))
#Regression output on page 213
m2 <- lm(Babies ~ Women + Storks,data = storks)
summary(m2)
##
## Call:
## lm(formula = Babies ~ Women + Storks, data = storks)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5 -5 0 5 5
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.000e+01 2.021e+00 4.948 8.56e-06 ***
## Women 5.000e+00 8.272e-01 6.045 1.74e-07 ***
## Storks -2.239e-15 6.619e-01 0.000 1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.201 on 51 degrees of freedom
## Multiple R-squared: 0.814, Adjusted R-squared: 0.8067
## F-statistic: 111.6 on 2 and 51 DF, p-value: < 2.2e-16
detach(storks)
#################EXERCISES
#Exercise 6.7.3
cars04 <- read_excel("MARData.xlsx", sheet = "cars04")
attach(cars04)
head(cars04)
## # A tibble: 6 x 13
## `Vehicle Name` Hybrid SuggestedRetail~ DealerCost EngineSize Cylinders
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Chevrolet Aveo 4dr 0 11690 10965 1.6 4
## 2 Chevrolet Aveo LS 4dr~ 0 12585 11802 1.6 4
## 3 Chevrolet Cavalier 2dr 0 14610 13697 2.2 4
## 4 Chevrolet Cavalier 4dr 0 14810 13884 2.2 4
## 5 Chevrolet Cavalier LS~ 0 16385 15357 2.2 4
## 6 Dodge Neon SE 4dr 0 13670 12849 2 4
## # ... with 7 more variables: Horsepower <dbl>, CityMPG <dbl>, HighwayMPG <dbl>,
## # Weight <dbl>, WheelBase <dbl>, Length <dbl>, Width <dbl>
#Figure 6.53 on page 217
pairs(~EngineSize+Cylinders+Horsepower+HighwayMPG+Weight+WheelBase+Hybrid,
data = cars04,gap = 0.4,cex.labels = 0.85)
#Output from R for model (6.36) on pages 217 and 218
m1 <- lm(SuggestedRetailPrice~EngineSize+Cylinders+Horsepower+HighwayMPG+Weight+WheelBase+Hybrid)
summary(m1)
##
## Call:
## lm(formula = SuggestedRetailPrice ~ EngineSize + Cylinders +
## Horsepower + HighwayMPG + Weight + WheelBase + Hybrid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17436 -4134 173 3561 46392
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -68965.793 16180.381 -4.262 2.97e-05 ***
## EngineSize -6957.457 1600.137 -4.348 2.08e-05 ***
## Cylinders 3564.755 969.633 3.676 0.000296 ***
## Horsepower 179.702 16.411 10.950 < 2e-16 ***
## HighwayMPG 637.939 202.724 3.147 0.001873 **
## Weight 11.911 2.658 4.481 1.18e-05 ***
## WheelBase 47.607 178.070 0.267 0.789444
## Hybrid 431.759 6092.087 0.071 0.943562
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7533 on 226 degrees of freedom
## Multiple R-squared: 0.7819, Adjusted R-squared: 0.7751
## F-statistic: 115.7 on 7 and 226 DF, p-value: < 2.2e-16
library(alr3)
summary(tranxy <-
powerTransform(EngineSize ~ Cylinders+Horsepower+HighwayMPG+Weight+WheelBase))
## bcPower Transformation to Normality
## Est Power Rounded Pwr Wald Lwr Bnd Wald Upr Bnd
## Y1 0.5916 0.5 0.3334 0.8497
##
## Likelihood ratio test that transformation parameter is equal to 0
## (log transformation)
## LRT df pval
## LR test, lambda = (0) 18.9585 1 1.3359e-05
##
## Likelihood ratio test that no transformation is needed
## LRT df pval
## LR test, lambda = (1) 9.786179 1 0.0017583
#Figure 6.54 on page 218
par(mfrow = c(2,2))
plot(m1)
#Output from R for model (6.37) on pages 219 and 220
tSuggestedRetailPrice <- log(SuggestedRetailPrice)
tEngineSize <- EngineSize^0.25
tCylinders <- log(Cylinders)
tHorsepower <- log(Horsepower)
tHighwayMPG <- 1/HighwayMPG
tWheelBase <- log(WheelBase)
m2 <- lm(tSuggestedRetailPrice~tEngineSize+tCylinders+tHorsepower+tHighwayMPG+Weight+tWheelBase+Hybrid)
summary(m2)
##
## Call:
## lm(formula = tSuggestedRetailPrice ~ tEngineSize + tCylinders +
## tHorsepower + tHighwayMPG + Weight + tWheelBase + Hybrid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.42288 -0.10983 -0.00203 0.10279 0.70068
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.703e+00 2.010e+00 2.838 0.00496 **
## tEngineSize -1.575e+00 3.332e-01 -4.727 4.01e-06 ***
## tCylinders 2.335e-01 1.204e-01 1.940 0.05359 .
## tHorsepower 8.992e-01 8.876e-02 10.130 < 2e-16 ***
## tHighwayMPG 8.029e-01 4.758e+00 0.169 0.86614
## Weight 5.043e-04 6.367e-05 7.920 1.07e-13 ***
## tWheelBase -6.385e-02 4.715e-01 -0.135 0.89240
## Hybrid 6.422e-01 1.150e-01 5.582 6.78e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1789 on 226 degrees of freedom
## Multiple R-squared: 0.8621, Adjusted R-squared: 0.8578
## F-statistic: 201.8 on 7 and 226 DF, p-value: < 2.2e-16
#Note that the output that appears in the book is incorrect as it does not coincide what is
#produced by the previous commands.
#Figure 6.55 on page 219
pairs(~tEngineSize+tCylinders+tHorsepower+tHighwayMPG+Weight+tWheelBase+Hybrid,
data = cars04,gap = 0.4,cex.labels = 0.82)
#Figure 6.56 on page 220
par(mfrow = c(2,2))
plot(m2)
#Output from R for model (6.37) on page 220
library(car)
round(vif(m2),2)
## tEngineSize tCylinders tHorsepower tHighwayMPG Weight tWheelBase
## 8.67 7.17 5.96 4.59 8.20 4.78
## Hybrid
## 1.22
#Regression output on pages 220 and 221
m3 <- lm(tSuggestedRetailPrice~tEngineSize+tCylinders+tHorsepower+Weight+Hybrid)
summary(m3)
##
## Call:
## lm(formula = tSuggestedRetailPrice ~ tEngineSize + tCylinders +
## tHorsepower + Weight + Hybrid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.42224 -0.11001 -0.00099 0.10191 0.70205
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.422e+00 3.291e-01 16.474 < 2e-16 ***
## tEngineSize -1.591e+00 3.157e-01 -5.041 9.45e-07 ***
## tCylinders 2.375e-01 1.186e-01 2.003 0.0463 *
## tHorsepower 9.049e-01 8.305e-02 10.896 < 2e-16 ***
## Weight 5.029e-04 5.203e-05 9.666 < 2e-16 ***
## Hybrid 6.340e-01 1.080e-01 5.870 1.53e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1781 on 228 degrees of freedom
## Multiple R-squared: 0.862, Adjusted R-squared: 0.859
## F-statistic: 284.9 on 5 and 228 DF, p-value: < 2.2e-16
#Figure 6.57 on page 221
library(alr3)
par(mfrow = c(3,3))
mmp(m2,tEngineSize,key = NULL)
mmp(m2,tCylinders,key = NULL)
mmp(m2,tHorsepower,key = NULL)
mmp(m2,tHighwayMPG,key = NULL)
mmp(m2,Weight,key = NULL)
mmp(m2,tWheelBase,key = NULL)
mmp(m2,m2$fitted.values,xlab = "Fitted Values",key = NULL)
detach(cars04)
#Exercise 6.7.4
krafft <- read_excel("MARData.xlsx", sheet = "krafft")
attach(krafft)
head(krafft)
## # A tibble: 6 x 6
## RA VTINV DIPINV HEAT KPOINT GROUP
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 6.38 0.0051 0.0382 -296. 7 1
## 2 6.89 0.0047 0.0346 -303. 16 1
## 3 7.77 0.0041 0.0358 -314. 11 1
## 4 7.88 0.0044 0.0316 -310. 20.8 1
## 5 7.88 0.0041 0.029 -317. 21 1
## 6 8.38 0.0038 0.0268 -336. 31.5 1
#Figure 6.58 on page 222
pairs(KPOINT~RA+VTINV+DIPINV+HEAT,data = krafft,gap = 0.4)
#Output from R for model (6.38) on page 223 and 224
m1 <- lm(KPOINT~RA+VTINV+DIPINV+HEAT)
summary(m1)
##
## Call:
## lm(formula = KPOINT ~ RA + VTINV + DIPINV + HEAT)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.1451 -2.7920 -0.4552 1.9715 7.5934
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.031e+01 3.368e+01 2.088 0.046369 *
## RA 1.047e+01 2.418e+00 4.331 0.000184 ***
## VTINV 9.038e+03 4.409e+03 2.050 0.050217 .
## DIPINV -1.826e+03 3.765e+02 -4.850 4.56e-05 ***
## HEAT 3.550e-01 2.176e-02 16.312 1.66e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.919 on 27 degrees of freedom
## Multiple R-squared: 0.9446, Adjusted R-squared: 0.9363
## F-statistic: 115 on 4 and 27 DF, p-value: < 2.2e-16
library(car)
vif(m1)
## RA VTINV DIPINV HEAT
## 25.792770 22.834190 13.621363 2.389645
#Figure 6.59 on page 223
par(mfrow = c(2,2))
plot(m1)
par(mfrow = c(2,2))
plot(m1)
#Figure 6.60 on page 224
leverage1 <- hatvalues(m1)
StanRes1 <- rstandard(m1)
plot(RA,StanRes1, ylab = "Standardized Residuals")
plot(VTINV,StanRes1, ylab = "Standardized Residuals")
plot(DIPINV,StanRes1, ylab = "Standardized Residuals")
plot(HEAT,StanRes1, ylab = "Standardized Residuals")
detach(krafft)