This shows the outputs from Chapter 1 using R. The textbook is “A Modern Approach to Regression with R” by Simon J. Sheather (2008). The R code provided with the book has been updated.
kicker <- read_excel("MARData.xlsx", sheet = "FieldGoals2003to2006")
attach(kicker)
head(kicker, 10)
## # A tibble: 10 x 10
## Name Yeart Teamt FGAt FGt `Team(t-1)` FGAtM1 FGtM1 FGAtM2 FGtM2
## <chr> <dbl> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Adam Vinatieri 2003 NE 34 73.5 NE 30 90 NA NA
## 2 Adam Vinatieri 2004 NE 33 93.9 NE 34 73.5 30 90
## 3 Adam Vinatieri 2005 NE 25 80 NE 33 93.9 34 73.5
## 4 Adam Vinatieri 2006 IND 19 89.4 NE 25 80 33 93.9
## 5 David Akers 2003 PHI 29 82.7 PHI 34 88.2 NA NA
## 6 David Akers 2004 PHI 32 84.3 PHI 29 82.7 34 88.2
## 7 David Akers 2005 PHI 22 72.7 PHI 32 84.3 29 82.7
## 8 David Akers 2006 PHI 12 83.3 PHI 22 72.7 32 84.3
## 9 Jason Elam 2003 DEN 31 87 DEN 36 72.2 NA NA
## 10 Jason Elam 2004 DEN 34 85.2 DEN 31 87 36 72.2
#Figure 1.1 on page 2
cor(kicker$FGtM1, kicker$FGt)
## [1] -0.1391935
plot(kicker$FGtM1, kicker$FGt,
main = "Unadjusted Correlation = -0.139",
xlab = "Field Goal Percentage in Year t-1", ylab = "Field Goal Percentage in Year t")
#p-values on page 3
fit.1 <- lm(FGt ~ FGtM1 + Name + FGtM1:Name, data = kicker)
anova(fit.1)
## Analysis of Variance Table
##
## Response: FGt
## Df Sum Sq Mean Sq F value Pr(>F)
## FGtM1 1 87.20 87.199 1.9008 0.176047
## Name 18 2252.47 125.137 2.7279 0.004565 **
## FGtM1:Name 18 417.75 23.209 0.5059 0.938592
## Residuals 38 1743.20 45.874
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit.1
##
## Call:
## lm(formula = FGt ~ FGtM1 + Name + FGtM1:Name, data = kicker)
##
## Coefficients:
## (Intercept) FGtM1
## 158.37358 -0.87935
## NameDavid Akers NameJason Elam
## -60.27937 -37.84384
## NameJason Hanson NameJay Feely
## -65.37924 -60.38853
## NameJeff Reed NameJeff Wilkins
## -28.28698 -17.74434
## NameJohn Carney NameJohn Hall
## -22.35998 -81.49901
## NameKris Brown NameMatt Stover
## -24.19535 -115.88691
## NameMike Vanderjagt NameNeil Rackers
## -1.15485 -10.06826
## NameOlindo Mare NamePhil Dawson
## 74.81204 -71.79379
## NameRian Lindell NameRyan Longwell
## -72.42881 -63.94690
## NameSebastian Janikowski NameShayne Graham
## -45.09448 -61.94539
## FGtM1:NameDavid Akers FGtM1:NameJason Elam
## 0.66778 0.41499
## FGtM1:NameJason Hanson FGtM1:NameJay Feely
## 0.79387 0.61078
## FGtM1:NameJeff Reed FGtM1:NameJeff Wilkins
## 0.23334 0.23672
## FGtM1:NameJohn Carney FGtM1:NameJohn Hall
## 0.18513 0.90633
## FGtM1:NameKris Brown FGtM1:NameMatt Stover
## 0.09746 1.43900
## FGtM1:NameMike Vanderjagt FGtM1:NameNeil Rackers
## 0.07937 0.03265
## FGtM1:NameOlindo Mare FGtM1:NamePhil Dawson
## -1.15930 0.88930
## FGtM1:NameRian Lindell FGtM1:NameRyan Longwell
## 0.82606 0.73920
## FGtM1:NameSebastian Janikowski FGtM1:NameShayne Graham
## 0.49264 0.76279
#slope and intercepts of lines in Figure 1.2 on page 3
fit.2 <- lm(FGt ~ Name + FGtM1, data = kicker)
anova(fit.2)
## Analysis of Variance Table
##
## Response: FGt
## Df Sum Sq Mean Sq F value Pr(>F)
## Name 18 1569.68 87.20 2.2599 0.01051 *
## FGtM1 1 769.99 769.99 19.9538 3.9e-05 ***
## Residuals 56 2160.96 38.59
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fit.2
##
## Call:
## lm(formula = FGt ~ Name + FGtM1, data = kicker)
##
## Coefficients:
## (Intercept) NameDavid Akers NameJason Elam
## 126.6872 -4.6463 -3.0167
## NameJason Hanson NameJay Feely NameJeff Reed
## 2.1172 -10.3737 -8.2955
## NameJeff Wilkins NameJohn Carney NameJohn Hall
## 2.3102 -5.9774 -8.4865
## NameKris Brown NameMatt Stover NameMike Vanderjagt
## -13.3598 8.7363 4.8955
## NameNeil Rackers NameOlindo Mare NamePhil Dawson
## -6.6200 -13.0365 3.5524
## NameRian Lindell NameRyan Longwell NameSebastian Janikowski
## -4.8674 -2.2315 -3.9763
## NameShayne Graham FGtM1
## 2.1350 -0.5037
#Figure 1.2 on page 3
with(kicker, plot(FGtM1, FGt,
main = "Slope of each line = -0.504",
xlab = "Field Goal Percentage in Year t-1",
ylab = "Field Goal Percentage in Year t"))
## Plot one line for each player, same slope of fit.2$coef[20]= -0.504
## Let the intercept vary for each player
tt <- seq(60, 100,length = 1001)
slope.piece <- fit.2$coef[20]*tt
lines(tt, fit.2$coef[1] + slope.piece,lty = 2)
for (i in 2:19) {
lines(tt,fit.2$coef[1] + fit.2$coef[i] + slope.piece,lty = 2)
}
detach(kicker)
###########################################################
circulation <- read_excel("MARData.xlsx", sheet = "circulation")
attach(circulation)
head(circulation, 10)
## # A tibble: 10 x 4
## Newspaper Sunday Weekday Competitor
## <chr> <dbl> <dbl> <dbl>
## 1 Akron (OH) Beacon Journal 185915 134401 0
## 2 Albuquerque (NM) Journal 154413 109693 0
## 3 Allentown (PA) Morning Call 165607 111594 0
## 4 Atlanta (GA) Journal-Constitution 622065 371853 0
## 5 Austin (TX) American-Statesman 233767 183312 0
## 6 Baltimore (MD) Sun 465807 301186 0
## 7 Bergen County (NJ) Record 227806 179270 0
## 8 Birmingham (AL) News 186747 148938 0
## 9 Boston (MA) Herald 151589 241457 1
## 10 Boston (MA) Globe 706153 450538 0
#Figure 1.3 on page 5
plot(Weekday, Sunday, xlab = "Weekday Circulation", ylab = "Sunday Circulation",
pch = Competitor + 1, col = Competitor + 1)
legend(110000, 1600000,legend = c("0","1"),
pch = 1:2,col = 1:2, title = "Tabloid dummy variable")
#Figure 1.4 on page 5
plot(log(Weekday),log(Sunday), xlab = "log(Weekday Circulation)", ylab = "log(Sunday Circulation)",
pch = Competitor + 1,
col = Competitor + 1)
legend(11.6, 14.1,legend = c("0","1"), pch = 1:2, col = 1:2,
title = "Tabloid dummy variable")
detach(circulation)
nyc <- read_excel("MARData.xlsx", sheet = "nyc")
attach(nyc)
head(nyc)
## # A tibble: 6 x 7
## Case Restaurant Price Food Decor Service East
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Daniella Ristorante 43 22 18 20 0
## 2 2 Tello's Ristorante 32 20 19 19 0
## 3 3 Biricchino 34 21 13 18 0
## 4 4 Bottino 41 20 20 17 0
## 5 5 Da Umberto 54 24 19 21 0
## 6 6 Le Madri 52 22 22 21 0
#Figure 1.5 on page 7
pairs(Price ~ Food + Decor + Service, data = nyc, gap = 0.4,
cex.labels = 1.5)
#Figure 1.6 on page 10
boxplot(Price ~ East, ylab = "Price",
xlab = "East (1 = East of Fifth Avenue)")
detach(nyc)
Bordeaux <- read_excel("MARData.xlsx", sheet = "Bordeaux")
attach(Bordeaux)
head(Bordeaux)
## # A tibble: 6 x 9
## Wine Price ParkerPoints CoatesPoints P95andAbove FirstGrowth CultWine Pomerol
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Lafi~ 2850 100 19.5 1 1 0 0
## 2 Lato~ 2850 98 18.5 1 1 0 0
## 3 Marg~ 2900 100 19.5 1 1 0 0
## 4 Mout~ 2500 97 17 1 1 0 0
## 5 Haut~ 2500 98 18.5 1 1 0 0
## 6 Chev~ 3650 100 19.5 1 1 0 0
## # ... with 1 more variable: VintageSuperstar <dbl>
#Figure 1.7 on page 10
pairs(Price ~ ParkerPoints + CoatesPoints, data = Bordeaux, gap = 0.4, cex.labels = 1.5)
#Figure 1.8 on page 11
par(mfrow = c(2,3))
boxplot(Price ~ P95andAbove, ylab = "Price", xlab = "P95andAbove")
boxplot(Price ~ FirstGrowth, ylab = "Price", xlab = "First Growth")
boxplot(Price ~ CultWine,ylab = "Price", xlab = "Cult Wine")
boxplot(Price ~ Pomerol,ylab = "Price", xlab = "Pomerol")
boxplot(Price ~ VintageSuperstar,ylab = "Price", xlab = "Vintage Superstar")
#Figure 1.9 on page 12
par(mfrow = c(1,1))
pairs(log(Price) ~ log(ParkerPoints) + log(CoatesPoints), data = Bordeaux, gap = 0.4, cex.labels = 1.5)
#Figure 1.10 on page 13
par(mfrow = c(2,3))
boxplot(log(Price) ~ P95andAbove, ylab = "log(Price)",
xlab = "P95andAbove")
boxplot(log(Price) ~ FirstGrowth, ylab = "log(Price)",
xlab = "First Growth")
boxplot(log(Price)~CultWine, ylab = "log(Price)",
xlab = "Cult Wine")
boxplot(log(Price)~ Pomerol, ylab = "log(Price)",
xlab = "Pomerol")
boxplot(log(Price) ~ VintageSuperstar, ylab = "log(Price)",
xlab = "Vintage Superstar")
detach(Bordeaux)