A Modern Approach to Regression with R (Simon J. Sheather)

CHAPTER 1 GRAPHS

This shows the outputs from Chapter 1 using R. The textbook is “A Modern Approach to Regression with R” by Simon J. Sheather (2008). The R code provided with the book has been updated.

kicker <- read_excel("MARData.xlsx", sheet = "FieldGoals2003to2006")
attach(kicker)

head(kicker, 10)

## # A tibble: 10 x 10
##    Name           Yeart Teamt  FGAt   FGt `Team(t-1)` FGAtM1 FGtM1 FGAtM2 FGtM2
##    <chr>          <dbl> <chr> <dbl> <dbl> <chr>        <dbl> <dbl>  <dbl> <dbl>
##  1 Adam Vinatieri  2003 NE       34  73.5 NE              30  90       NA  NA  
##  2 Adam Vinatieri  2004 NE       33  93.9 NE              34  73.5     30  90  
##  3 Adam Vinatieri  2005 NE       25  80   NE              33  93.9     34  73.5
##  4 Adam Vinatieri  2006 IND      19  89.4 NE              25  80       33  93.9
##  5 David Akers     2003 PHI      29  82.7 PHI             34  88.2     NA  NA  
##  6 David Akers     2004 PHI      32  84.3 PHI             29  82.7     34  88.2
##  7 David Akers     2005 PHI      22  72.7 PHI             32  84.3     29  82.7
##  8 David Akers     2006 PHI      12  83.3 PHI             22  72.7     32  84.3
##  9 Jason Elam      2003 DEN      31  87   DEN             36  72.2     NA  NA  
## 10 Jason Elam      2004 DEN      34  85.2 DEN             31  87       36  72.2

#Figure 1.1 on page 2
cor(kicker$FGtM1, kicker$FGt)

## [1] -0.1391935

plot(kicker$FGtM1, kicker$FGt,
main = "Unadjusted Correlation = -0.139",
xlab = "Field Goal Percentage in Year t-1", ylab = "Field Goal Percentage in Year t")

#p-values on page 3
fit.1 <- lm(FGt ~ FGtM1 + Name + FGtM1:Name, data = kicker)
anova(fit.1)

## Analysis of Variance Table
## 
## Response: FGt
##            Df  Sum Sq Mean Sq F value   Pr(>F)   
## FGtM1       1   87.20  87.199  1.9008 0.176047   
## Name       18 2252.47 125.137  2.7279 0.004565 **
## FGtM1:Name 18  417.75  23.209  0.5059 0.938592   
## Residuals  38 1743.20  45.874                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

fit.1

## 
## Call:
## lm(formula = FGt ~ FGtM1 + Name + FGtM1:Name, data = kicker)
## 
## Coefficients:
##                    (Intercept)                           FGtM1  
##                      158.37358                        -0.87935  
##                NameDavid Akers                  NameJason Elam  
##                      -60.27937                       -37.84384  
##               NameJason Hanson                   NameJay Feely  
##                      -65.37924                       -60.38853  
##                  NameJeff Reed                NameJeff Wilkins  
##                      -28.28698                       -17.74434  
##                NameJohn Carney                   NameJohn Hall  
##                      -22.35998                       -81.49901  
##                 NameKris Brown                 NameMatt Stover  
##                      -24.19535                      -115.88691  
##            NameMike Vanderjagt                NameNeil Rackers  
##                       -1.15485                       -10.06826  
##                NameOlindo Mare                 NamePhil Dawson  
##                       74.81204                       -71.79379  
##               NameRian Lindell               NameRyan Longwell  
##                      -72.42881                       -63.94690  
##       NameSebastian Janikowski               NameShayne Graham  
##                      -45.09448                       -61.94539  
##          FGtM1:NameDavid Akers            FGtM1:NameJason Elam  
##                        0.66778                         0.41499  
##         FGtM1:NameJason Hanson             FGtM1:NameJay Feely  
##                        0.79387                         0.61078  
##            FGtM1:NameJeff Reed          FGtM1:NameJeff Wilkins  
##                        0.23334                         0.23672  
##          FGtM1:NameJohn Carney             FGtM1:NameJohn Hall  
##                        0.18513                         0.90633  
##           FGtM1:NameKris Brown           FGtM1:NameMatt Stover  
##                        0.09746                         1.43900  
##      FGtM1:NameMike Vanderjagt          FGtM1:NameNeil Rackers  
##                        0.07937                         0.03265  
##          FGtM1:NameOlindo Mare           FGtM1:NamePhil Dawson  
##                       -1.15930                         0.88930  
##         FGtM1:NameRian Lindell         FGtM1:NameRyan Longwell  
##                        0.82606                         0.73920  
## FGtM1:NameSebastian Janikowski         FGtM1:NameShayne Graham  
##                        0.49264                         0.76279

#slope and intercepts of lines in Figure 1.2 on page 3
fit.2 <- lm(FGt ~ Name + FGtM1, data = kicker)
anova(fit.2)

## Analysis of Variance Table
## 
## Response: FGt
##           Df  Sum Sq Mean Sq F value  Pr(>F)    
## Name      18 1569.68   87.20  2.2599 0.01051 *  
## FGtM1      1  769.99  769.99 19.9538 3.9e-05 ***
## Residuals 56 2160.96   38.59                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

fit.2

## 
## Call:
## lm(formula = FGt ~ Name + FGtM1, data = kicker)
## 
## Coefficients:
##              (Intercept)           NameDavid Akers            NameJason Elam  
##                 126.6872                   -4.6463                   -3.0167  
##         NameJason Hanson             NameJay Feely             NameJeff Reed  
##                   2.1172                  -10.3737                   -8.2955  
##         NameJeff Wilkins           NameJohn Carney             NameJohn Hall  
##                   2.3102                   -5.9774                   -8.4865  
##           NameKris Brown           NameMatt Stover       NameMike Vanderjagt  
##                 -13.3598                    8.7363                    4.8955  
##         NameNeil Rackers           NameOlindo Mare           NamePhil Dawson  
##                  -6.6200                  -13.0365                    3.5524  
##         NameRian Lindell         NameRyan Longwell  NameSebastian Janikowski  
##                  -4.8674                   -2.2315                   -3.9763  
##        NameShayne Graham                     FGtM1  
##                   2.1350                   -0.5037

#Figure 1.2 on page 3
with(kicker, plot(FGtM1, FGt,
main = "Slope of each line = -0.504",
xlab = "Field Goal Percentage in Year t-1",
ylab = "Field Goal Percentage in Year t"))

## Plot one line for each player, same slope of fit.2$coef[20]= -0.504
## Let the intercept vary for each player

tt <- seq(60, 100,length = 1001)
slope.piece <- fit.2$coef[20]*tt
lines(tt, fit.2$coef[1] + slope.piece,lty = 2)

for (i in 2:19) {
  lines(tt,fit.2$coef[1] + fit.2$coef[i] + slope.piece,lty = 2)
  }

detach(kicker)

###########################################################
circulation <- read_excel("MARData.xlsx", sheet = "circulation")
attach(circulation)
head(circulation, 10)

## # A tibble: 10 x 4
##    Newspaper                         Sunday Weekday Competitor
##    <chr>                              <dbl>   <dbl>      <dbl>
##  1 Akron (OH) Beacon Journal         185915  134401          0
##  2 Albuquerque (NM) Journal          154413  109693          0
##  3 Allentown (PA) Morning Call       165607  111594          0
##  4 Atlanta (GA) Journal-Constitution 622065  371853          0
##  5 Austin (TX) American-Statesman    233767  183312          0
##  6 Baltimore (MD) Sun                465807  301186          0
##  7 Bergen County (NJ) Record         227806  179270          0
##  8 Birmingham (AL) News              186747  148938          0
##  9 Boston (MA) Herald                151589  241457          1
## 10 Boston (MA) Globe                 706153  450538          0

#Figure 1.3 on page 5
plot(Weekday, Sunday, xlab = "Weekday Circulation", ylab = "Sunday Circulation", 
     pch = Competitor + 1, col = Competitor + 1)
legend(110000, 1600000,legend = c("0","1"),
pch = 1:2,col = 1:2, title = "Tabloid dummy variable")

#Figure 1.4 on page 5
plot(log(Weekday),log(Sunday), xlab = "log(Weekday Circulation)", ylab = "log(Sunday Circulation)",
pch = Competitor + 1,
col = Competitor + 1)
legend(11.6, 14.1,legend = c("0","1"), pch = 1:2, col = 1:2,
title = "Tabloid dummy variable")

detach(circulation)

nyc <- read_excel("MARData.xlsx", sheet = "nyc")
attach(nyc)
head(nyc)

## # A tibble: 6 x 7
##    Case Restaurant          Price  Food Decor Service  East
##   <dbl> <chr>               <dbl> <dbl> <dbl>   <dbl> <dbl>
## 1     1 Daniella Ristorante    43    22    18      20     0
## 2     2 Tello's Ristorante     32    20    19      19     0
## 3     3 Biricchino             34    21    13      18     0
## 4     4 Bottino                41    20    20      17     0
## 5     5 Da Umberto             54    24    19      21     0
## 6     6 Le Madri               52    22    22      21     0

#Figure 1.5 on page 7
pairs(Price ~ Food + Decor + Service, data = nyc, gap = 0.4,
cex.labels = 1.5)

#Figure 1.6 on page 10
boxplot(Price ~ East, ylab = "Price",
xlab = "East (1 = East of Fifth Avenue)")

detach(nyc)

Bordeaux <- read_excel("MARData.xlsx", sheet = "Bordeaux")
attach(Bordeaux)
head(Bordeaux)

## # A tibble: 6 x 9
##   Wine  Price ParkerPoints CoatesPoints P95andAbove FirstGrowth CultWine Pomerol
##   <chr> <dbl>        <dbl>        <dbl>       <dbl>       <dbl>    <dbl>   <dbl>
## 1 Lafi~  2850          100         19.5           1           1        0       0
## 2 Lato~  2850           98         18.5           1           1        0       0
## 3 Marg~  2900          100         19.5           1           1        0       0
## 4 Mout~  2500           97         17             1           1        0       0
## 5 Haut~  2500           98         18.5           1           1        0       0
## 6 Chev~  3650          100         19.5           1           1        0       0
## # ... with 1 more variable: VintageSuperstar <dbl>

#Figure 1.7 on page 10
pairs(Price ~ ParkerPoints + CoatesPoints, data = Bordeaux, gap = 0.4, cex.labels = 1.5)

#Figure 1.8 on page 11
par(mfrow = c(2,3))
boxplot(Price ~ P95andAbove, ylab = "Price", xlab = "P95andAbove")
boxplot(Price ~ FirstGrowth, ylab = "Price", xlab = "First Growth")
boxplot(Price ~ CultWine,ylab = "Price", xlab = "Cult Wine")
boxplot(Price ~ Pomerol,ylab = "Price", xlab = "Pomerol")
boxplot(Price ~ VintageSuperstar,ylab = "Price", xlab = "Vintage Superstar")

#Figure 1.9 on page 12
par(mfrow = c(1,1))

pairs(log(Price) ~ log(ParkerPoints) + log(CoatesPoints), data = Bordeaux, gap = 0.4, cex.labels = 1.5)

#Figure 1.10 on page 13
par(mfrow = c(2,3))
boxplot(log(Price) ~ P95andAbove, ylab = "log(Price)",
xlab = "P95andAbove")
boxplot(log(Price) ~ FirstGrowth, ylab = "log(Price)",
xlab = "First Growth")
boxplot(log(Price)~CultWine, ylab = "log(Price)",
xlab = "Cult Wine")
boxplot(log(Price)~ Pomerol, ylab = "log(Price)",
xlab = "Pomerol")
boxplot(log(Price) ~ VintageSuperstar, ylab = "log(Price)",
xlab = "Vintage Superstar")

detach(Bordeaux)

A Modern Approach to Regression with R (Simon J. Sheather)

A Student

2022-03-30

CHAPTER 1 GRAPHS