CHAPTER 5 OUTPUT

This shows the outputs from Chapter 5 using R. The textbook is “A Modern Approach to Regression with R” by Simon J. Sheather (2008). The R code provided with the book has been updated.

profsalary <- read_excel("MARData.xlsx", sheet  =  "profsalary")
attach(profsalary)
head(profsalary, 10)
## # A tibble: 10 x 3
##     Case Salary Experience
##    <dbl>  <dbl>      <dbl>
##  1     1     71         26
##  2     2     69         19
##  3     3     73         22
##  4     4     69         17
##  5     5     65         13
##  6     6     75         25
##  7     7     66         35
##  8     8     66         16
##  9     9     67         16
## 10    10     69         16
#Figure 5.1 on page 126
plot(Experience,Salary,xlab = "Years of Experience")

#Figure 5.2 on page 127
m1 <- lm(Salary~Experience)
leverage1 <- hatvalues(m1)
StanRes1 <- rstandard(m1)
ExperienceNew <- seq(0,37,len = 37)
plot(Experience,StanRes1,xlab = "Years of Experience", ylab = "Standardized Residuals")

#Figure 5.3 on page 127
m2 <- lm(Salary~Experience + I(Experience^2))
plot(Experience,Salary,xlab = "Years of Experience")
ExperienceNew <- seq(0,37,len = 37)
lines(ExperienceNew,predict(m2,newdata = data.frame(Experience = ExperienceNew)))

#Figure 5.4 on page 128
StanRes2 <- rstandard(m2)
plot(Experience,StanRes2,xlab = "Years of Experience", ylab = "Standardized Residuals")

#Figure 5.5 on page 128
leverage2 <- hatvalues(m2)
plot(Experience,leverage2,xlab = "Years of Experience",ylab = "Leverage")
abline(h = 6/max(Case),lty = 2)

#Figure 5.6 on page 129
par(mfrow = c(2,2))
plot(m2)

#Regression output on pages 129 & 130
summary(m2)
## 
## Call:
## lm(formula = Salary ~ Experience + I(Experience^2))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.5786 -2.3573  0.0957  2.0171  5.5176 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     34.720498   0.828724   41.90   <2e-16 ***
## Experience       2.872275   0.095697   30.01   <2e-16 ***
## I(Experience^2) -0.053316   0.002477  -21.53   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.817 on 140 degrees of freedom
## Multiple R-squared:  0.9247, Adjusted R-squared:  0.9236 
## F-statistic: 859.3 on 2 and 140 DF,  p-value: < 2.2e-16
predict(m2,newdata = data.frame(Experience = c(10)),interval = "prediction",level = 0.95)
##        fit      lwr      upr
## 1 58.11164 52.50481 63.71847
detach(profsalary)
nyc <- read_excel("MARData.xlsx", sheet  =  "nyc")
attach(nyc)
head(nyc, 10)
## # A tibble: 10 x 7
##     Case Restaurant          Price  Food Decor Service  East
##    <dbl> <chr>               <dbl> <dbl> <dbl>   <dbl> <dbl>
##  1     1 Daniella Ristorante    43    22    18      20     0
##  2     2 Tello's Ristorante     32    20    19      19     0
##  3     3 Biricchino             34    21    13      18     0
##  4     4 Bottino                41    20    20      17     0
##  5     5 Da Umberto             54    24    19      21     0
##  6     6 Le Madri               52    22    22      21     0
##  7     7 Le Zie                 34    22    16      21     0
##  8     8 Pasticcio              34    20    18      21     1
##  9     9 Belluno                39    22    19      22     1
## 10    10 Cinque Terre           44    21    17      19     1
#Regression output on pages 138 & 139
m1 <- lm(Price~Food+Decor+Service+East)
summary(m1)
## 
## Call:
## lm(formula = Price ~ Food + Decor + Service + East)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.0465  -3.8837   0.0373   3.3942  17.7491 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -24.023800   4.708359  -5.102 9.24e-07 ***
## Food          1.538120   0.368951   4.169 4.96e-05 ***
## Decor         1.910087   0.217005   8.802 1.87e-15 ***
## Service      -0.002727   0.396232  -0.007   0.9945    
## East          2.068050   0.946739   2.184   0.0304 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.738 on 163 degrees of freedom
## Multiple R-squared:  0.6279, Adjusted R-squared:  0.6187 
## F-statistic: 68.76 on 4 and 163 DF,  p-value: < 2.2e-16
#Regression output on page 139
m2 <- lm(Price~Food+Decor+East)
summary(m2)
## 
## Call:
## lm(formula = Price ~ Food + Decor + East)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.0451  -3.8809   0.0389   3.3918  17.7557 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -24.0269     4.6727  -5.142 7.67e-07 ***
## Food          1.5363     0.2632   5.838 2.76e-08 ***
## Decor         1.9094     0.1900  10.049  < 2e-16 ***
## East          2.0670     0.9318   2.218   0.0279 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.72 on 164 degrees of freedom
## Multiple R-squared:  0.6279, Adjusted R-squared:  0.6211 
## F-statistic: 92.24 on 3 and 164 DF,  p-value: < 2.2e-16
#An alternative way to obtain m2 is to use the update command
m2 <- update(m1,~.-Service)
summary(m2)
## 
## Call:
## lm(formula = Price ~ Food + Decor + East)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.0451  -3.8809   0.0389   3.3918  17.7557 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -24.0269     4.6727  -5.142 7.67e-07 ***
## Food          1.5363     0.2632   5.838 2.76e-08 ***
## Decor         1.9094     0.1900  10.049  < 2e-16 ***
## East          2.0670     0.9318   2.218   0.0279 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.72 on 164 degrees of freedom
## Multiple R-squared:  0.6279, Adjusted R-squared:  0.6211 
## F-statistic: 92.24 on 3 and 164 DF,  p-value: < 2.2e-16
detach(nyc)
travel <- read_excel("MARData.xlsx", sheet  =  "travel")
attach(travel)
head(travel, 10)
## # A tibble: 10 x 4
##    Amount   Age Segment     C
##     <dbl> <dbl> <chr>   <dbl>
##  1    997    44 A           0
##  2    997    43 A           0
##  3    951    41 A           0
##  4    649    59 A           0
##  5   1265    25 A           0
##  6   1059    38 A           0
##  7    837    46 A           0
##  8    924    42 A           0
##  9    852    48 A           0
## 10    963    39 A           0
#Regression output on page 141
mfull <- lm(Amount~Age+C+C:Age)
summary(mfull)
## 
## Call:
## lm(formula = Amount ~ Age + C + C:Age)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -143.298  -30.541   -0.034   31.108  130.743 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1814.5445     8.6011   211.0   <2e-16 ***
## Age           -20.3175     0.1878  -108.2   <2e-16 ***
## C           -1821.2337    12.5736  -144.8   <2e-16 ***
## Age:C          40.4461     0.2724   148.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.63 on 921 degrees of freedom
## Multiple R-squared:  0.9601, Adjusted R-squared:  0.9599 
## F-statistic:  7379 on 3 and 921 DF,  p-value: < 2.2e-16
#Figure 5.7 on page 142
par(mfrow = c(1,1))
plot(Age[C == 0],Amount[C == 0],pch = c("A"),col = c("black"),ylab = "Amount Spent",xlab = "Age")
points(Age[C == 1],Amount[C == 1],pch = c("C"),col = c("red"))

#Regression output on page 143
mreduced <- lm(Amount~Age)
summary(mreduced)
## 
## Call:
## lm(formula = Amount ~ Age)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -545.06 -199.03    6.34  198.74  497.39 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 957.9103    31.3056  30.599   <2e-16 ***
## Age          -1.1140     0.6784  -1.642    0.101    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 237.7 on 923 degrees of freedom
## Multiple R-squared:  0.002913,   Adjusted R-squared:  0.001833 
## F-statistic: 2.697 on 1 and 923 DF,  p-value: 0.1009
#Regression output on page 144
anova(mreduced,mfull)
## Analysis of Variance Table
## 
## Model 1: Amount ~ Age
## Model 2: Amount ~ Age + C + C:Age
##   Res.Df      RSS Df Sum of Sq     F    Pr(>F)    
## 1    923 52158945                                 
## 2    921  2089377  2  50069568 11035 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
detach(travel)
nyc <- read_excel("MARData.xlsx", sheet  =  "nyc")
attach(nyc)
head(nyc, 10)
## # A tibble: 10 x 7
##     Case Restaurant          Price  Food Decor Service  East
##    <dbl> <chr>               <dbl> <dbl> <dbl>   <dbl> <dbl>
##  1     1 Daniella Ristorante    43    22    18      20     0
##  2     2 Tello's Ristorante     32    20    19      19     0
##  3     3 Biricchino             34    21    13      18     0
##  4     4 Bottino                41    20    20      17     0
##  5     5 Da Umberto             54    24    19      21     0
##  6     6 Le Madri               52    22    22      21     0
##  7     7 Le Zie                 34    22    16      21     0
##  8     8 Pasticcio              34    20    18      21     1
##  9     9 Belluno                39    22    19      22     1
## 10    10 Cinque Terre           44    21    17      19     1
#Regression output on page 145
mfull <- lm(Price~Food+Decor+Service+East+Food:East+Decor:East+Service:East)
summary(mfull)
## 
## Call:
## lm(formula = Price ~ Food + Decor + Service + East + Food:East + 
##     Decor:East + Service:East)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.5099  -3.7996  -0.1413   3.6522  17.1656 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -26.9949     8.4672  -3.188  0.00172 ** 
## Food           1.0068     0.5704   1.765  0.07946 .  
## Decor          1.8881     0.2984   6.327  2.4e-09 ***
## Service        0.7438     0.6443   1.155  0.25001    
## East           6.1253    10.2499   0.598  0.55095    
## Food:East      1.2077     0.7743   1.560  0.12079    
## Decor:East    -0.2500     0.4570  -0.547  0.58510    
## Service:East  -1.2719     0.8171  -1.557  0.12151    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.713 on 160 degrees of freedom
## Multiple R-squared:  0.6379, Adjusted R-squared:  0.622 
## F-statistic: 40.27 on 7 and 160 DF,  p-value: < 2.2e-16
#Regression output on page 146
mreduced <- lm(Price~Food+Decor+East)
summary(mreduced)
## 
## Call:
## lm(formula = Price ~ Food + Decor + East)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.0451  -3.8809   0.0389   3.3918  17.7557 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -24.0269     4.6727  -5.142 7.67e-07 ***
## Food          1.5363     0.2632   5.838 2.76e-08 ***
## Decor         1.9094     0.1900  10.049  < 2e-16 ***
## East          2.0670     0.9318   2.218   0.0279 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.72 on 164 degrees of freedom
## Multiple R-squared:  0.6279, Adjusted R-squared:  0.6211 
## F-statistic: 92.24 on 3 and 164 DF,  p-value: < 2.2e-16
#Regression output on page 146
anova(mreduced,mfull)
## Analysis of Variance Table
## 
## Model 1: Price ~ Food + Decor + East
## Model 2: Price ~ Food + Decor + Service + East + Food:East + Decor:East + 
##     Service:East
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1    164 5366.5                           
## 2    160 5222.2  4    144.36 1.1057 0.3558
detach(nyc)
#################EXERCISES

#Ex 5.4.3

latour <- read_excel("MARData.xlsx", sheet  =  "Latour")
attach(latour)
head(latour, 10)
## # A tibble: 10 x 4
##    Vintage Quality EndofHarvest  Rain
##      <dbl>   <dbl>        <dbl> <dbl>
##  1    1961       5           28     0
##  2    1962       4           50     0
##  3    1963       1           53     1
##  4    1964       3           38     0
##  5    1965       1           46     1
##  6    1966       4           40     0
##  7    1967       3           35     1
##  8    1968       2           38     1
##  9    1969       2           45     1
## 10    1970       4           47     0
#Regression output on page 148
mfull <- lm(Quality ~ EndofHarvest + Rain + Rain:EndofHarvest)
summary(mfull)
## 
## Call:
## lm(formula = Quality ~ EndofHarvest + Rain + Rain:EndofHarvest)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6833 -0.5703  0.1265  0.4385  1.6354 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        5.16122    0.68917   7.489 3.95e-09 ***
## EndofHarvest      -0.03145    0.01760  -1.787   0.0816 .  
## Rain               1.78670    1.31740   1.356   0.1826    
## EndofHarvest:Rain -0.08314    0.03160  -2.631   0.0120 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7578 on 40 degrees of freedom
## Multiple R-squared:  0.6848, Adjusted R-squared:  0.6612 
## F-statistic: 28.97 on 3 and 40 DF,  p-value: 4.017e-10
#Figure 5.8 on page 149
y  =  Rain
par(mfrow = c(1,1))
plot(EndofHarvest,Quality,pch = y+1,col = y+1,xlab = "End of Harvest (in days since August 31)")
abline(lsfit(EndofHarvest[y == 0],Quality[y == 0]),lty = 1,col = 1)
abline(lsfit(EndofHarvest[y == 1],Quality[y == 1]),lty = 2,col = 2)
legend(23, 2.5,legend = c("No","Yes"),pch = 1:2,col = 1:2,title = "Rain at Harvest?")

#Regression output on page 149
mreduced <- lm(Quality ~ EndofHarvest + Rain)
summary(mreduced)
## 
## Call:
## lm(formula = Quality ~ EndofHarvest + Rain)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4563 -0.7366  0.1430  0.6413  1.7652 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   6.14633    0.61896   9.930  1.8e-12 ***
## EndofHarvest -0.05723    0.01564  -3.660 0.000713 ***
## Rain         -1.62219    0.25478  -6.367  1.3e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8107 on 41 degrees of freedom
## Multiple R-squared:  0.6303, Adjusted R-squared:  0.6123 
## F-statistic: 34.95 on 2 and 41 DF,  p-value: 1.383e-09
anova(mreduced,mfull)
## Analysis of Variance Table
## 
## Model 1: Quality ~ EndofHarvest + Rain
## Model 2: Quality ~ EndofHarvest + Rain + Rain:EndofHarvest
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1     41 26.945                              
## 2     40 22.971  1    3.9749 6.9218 0.01203 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
detach(latour)