CHAPTER 2 OUTPUT

This shows the outputs from Chapter 2 using R. The textbook is “A Modern Approach to Regression with R” by Simon J. Sheather (2008). The R code provided with the book has been updated.

Some important formulas

  1. Variance of the slope of the regression line \[ Var{(\hat{\beta_0}|X)} = \sigma^{2}(\frac{1}{n} + \frac{\bar{x}^2}{SXX})\]

  2. Variance of the intercept of the regression line \[ Var{(\hat{\beta_1}|X)} = \frac{\sigma^{2}}{SXX}\]

  3. Confidence interval for the population regression line \[ Var{(\hat{y}|X=x^*}) = \sigma^2\lgroup\frac{1}{n}+\frac{(x^*-\bar{x})^2}{SXX}\rgroup \]

  4. Prediction intervals for the actual Value of Y \[ Var{(Y^*-{\hat{y}}^*)} = \sigma^2\lgroup1+\frac{1}{n}+\frac{(x^*-\bar{x})^2}{SXX}\rgroup \]

  5. Unbiased estimate of \(\sigma^2\) \[ {S^2} = \frac{RSS}{n-2} = \frac{1}{n-2}\sum_{i=1}^{n}{\hat{e_i}^2} \]

ANOVA Table for Simple Linear Regression

Anova Table

production <- read_excel("MARData.xlsx", sheet = "production")
attach(production)
str(production)
## tibble [20 x 3] (S3: tbl_df/tbl/data.frame)
##  $ Case   : num [1:20] 1 2 3 4 5 6 7 8 9 10 ...
##  $ RunTime: num [1:20] 195 215 243 162 185 231 234 166 253 196 ...
##  $ RunSize: num [1:20] 175 189 344 88 114 338 271 173 284 277 ...
head(production)
## # A tibble: 6 x 3
##    Case RunTime RunSize
##   <dbl>   <dbl>   <dbl>
## 1     1     195     175
## 2     2     215     189
## 3     3     243     344
## 4     4     162      88
## 5     5     185     114
## 6     6     231     338
#Figure 2.1 on page 16
par(mfrow = c(1,1))
plot(production$RunSize, production$RunTime, xlab = "Run Size", ylab = "Run Time")

#R output on page 19
m1 <- lm(RunTime ~ RunSize)
summary(m1)
## 
## Call:
## lm(formula = RunTime ~ RunSize)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -28.597 -11.079   3.329   8.302  29.627 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 149.74770    8.32815   17.98 6.00e-13 ***
## RunSize       0.25924    0.03714    6.98 1.61e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.25 on 18 degrees of freedom
## Multiple R-squared:  0.7302, Adjusted R-squared:  0.7152 
## F-statistic: 48.72 on 1 and 18 DF,  p-value: 1.615e-06
#Figure 2.3 on page 20
plot(production$RunSize, production$RunTime, xlab = "Run Size", ylab = "Run Time")
abline(lsfit(production$RunSize, production$RunTime))

#t-value on page 23
tval <- qt(1 - 0.05/2,18)
tval
## [1] 2.100922
#95% confidence intervals on page 24
round(confint(m1,level = 0.95), 3)
##               2.5 %  97.5 %
## (Intercept) 132.251 167.244
## RunSize       0.181   0.337
#R output on page 27
predict(m1,newdata = data.frame(RunSize = c(50,100,150,200,250,300,350)),interval = "confidence",level = 0.95)
##        fit      lwr      upr
## 1 162.7099 148.6204 176.7994
## 2 175.6720 164.6568 186.6872
## 3 188.6342 179.9969 197.2714
## 4 201.5963 193.9600 209.2326
## 5 214.5585 206.0455 223.0714
## 6 227.5206 216.7006 238.3407
## 7 240.4828 226.6220 254.3435
predict(m1,newdata = data.frame(RunSize = c(50,100,150,200,250,300,350)),interval = "prediction",level = 0.95)
##        fit      lwr      upr
## 1 162.7099 125.7720 199.6478
## 2 175.6720 139.7940 211.5500
## 3 188.6342 153.4135 223.8548
## 4 201.5963 166.6076 236.5850
## 5 214.5585 179.3681 249.7489
## 6 227.5206 191.7021 263.3392
## 7 240.4828 203.6315 277.3340
#R output on page 30
anova(m1)
## Analysis of Variance Table
## 
## Response: RunTime
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## RunSize    1 12868.4 12868.4  48.717 1.615e-06 ***
## Residuals 18  4754.6   264.1                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
detach(production)
changeover_times <- read_excel("MARData.xlsx", sheet = "changeover_times")
attach(changeover_times)
str(changeover_times)
## tibble [120 x 3] (S3: tbl_df/tbl/data.frame)
##  $ Method    : chr [1:120] "Existing" "Existing" "Existing" "Existing" ...
##  $ Changeover: num [1:120] 19 24 39 12 29 19 23 22 12 29 ...
##  $ New       : num [1:120] 0 0 0 0 0 0 0 0 0 0 ...
head(changeover_times)
## # A tibble: 6 x 3
##   Method   Changeover   New
##   <chr>         <dbl> <dbl>
## 1 Existing         19     0
## 2 Existing         24     0
## 3 Existing         39     0
## 4 Existing         12     0
## 5 Existing         29     0
## 6 Existing         19     0
#R output on page 31
m1 <- lm(Changeover ~ New)
summary(m1)
## 
## Call:
## lm(formula = Changeover ~ New)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.861  -5.861  -1.861   4.312  25.312 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  17.8611     0.8905  20.058   <2e-16 ***
## New          -3.1736     1.4080  -2.254    0.026 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.556 on 118 degrees of freedom
## Multiple R-squared:  0.04128,    Adjusted R-squared:  0.03315 
## F-statistic: 5.081 on 1 and 118 DF,  p-value: 0.02604
#Figure 2.5 on page 32
par(mfrow = c(2,2))
plot(New,Changeover, xlab = "Dummy variable, New", ylab = "Change Over Time")
abline(lsfit(New,Changeover))
boxplot(Changeover ~ New, xlab = "Dummy variable, New", ylab = "Change Over Time")
boxplot(Changeover ~ Method, ylab = "Change Over Time", xlab = "Method")

#t-value on page 33
tval <- qt(1 - 0.05/2, 118)
tval
## [1] 1.980272
detach(changeover_times)

#################EXERCISES

#Exercise 2.8.1
playbill <- read_excel("MARData.xlsx", sheet = "playbill")
attach(playbill)
str(playbill)
## tibble [18 x 3] (S3: tbl_df/tbl/data.frame)
##  $ Production : chr [1:18] "42nd Street" "Avenue Q" "Beauty and Beast" "Bombay Dreams" ...
##  $ CurrentWeek: num [1:18] 684966 502367 594474 529298 570254 ...
##  $ LastWeek   : num [1:18] 695437 498969 598576 528994 562964 ...
head(playbill)
## # A tibble: 6 x 3
##   Production       CurrentWeek LastWeek
##   <chr>                  <dbl>    <dbl>
## 1 42nd Street           684966   695437
## 2 Avenue Q              502367   498969
## 3 Beauty and Beast      594474   598576
## 4 Bombay Dreams         529298   528994
## 5 Chicago               570254   562964
## 6 Dracula               319959   282778
#Figure 2.6 on page 38
par(mfrow = c(1,1))
plot(LastWeek, CurrentWeek, xlab = "Gross box office results previous week",
ylab = "Gross box office results current week")

detach(playbill)
#Exercise 2.8.3
invoice <- read_excel("MARData.xlsx", sheet = "invoices")
attach(invoice)
str(invoice)
## tibble [30 x 3] (S3: tbl_df/tbl/data.frame)
##  $ Day     : num [1:30] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Invoices: num [1:30] 149 60 188 23 201 58 77 222 181 30 ...
##  $ Time    : num [1:30] 2.1 1.8 2.3 0.8 2.7 1 1.7 3.1 2.8 1 ...
head(invoice)
## # A tibble: 6 x 3
##     Day Invoices  Time
##   <dbl>    <dbl> <dbl>
## 1     1      149   2.1
## 2     2       60   1.8
## 3     3      188   2.3
## 4     4       23   0.8
## 5     5      201   2.7
## 6     6       58   1
#Figure 2.7 on page 40
par(mfrow = c(1,1))
plot(Invoices, Time, xlab = "Number of Invoices", ylab = "Processing Time")
abline(lsfit(Invoices, Time))

#Output from R on page 40
m1 <- lm(Time ~ Invoices)
summary(m1)
## 
## Call:
## lm(formula = Time ~ Invoices)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.59516 -0.27851  0.03485  0.19346  0.53083 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.6417099  0.1222707   5.248 1.41e-05 ***
## Invoices    0.0112916  0.0008184  13.797 5.17e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3298 on 28 degrees of freedom
## Multiple R-squared:  0.8718, Adjusted R-squared:  0.8672 
## F-statistic: 190.4 on 1 and 28 DF,  p-value: 5.175e-14
mean(Time)
## [1] 2.11
median(Time)
## [1] 2
mean(Invoices)
## [1] 130.0333
median(Invoices)
## [1] 127.5
detach(invoice)
#Exercise 2.8.5
problem5 <- read_excel("MARData.xlsx", sheet = "Ch2Problem5")
attach(problem5)
str(problem5)
## tibble [100 x 3] (S3: tbl_df/tbl/data.frame)
##  $ y : num [1:100] -0.6412 -1.1823 -0.5518 -0.0702 0.2046 ...
##  $ x1: num [1:100] 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 ...
##  $ x2: num [1:100] 1.2 3.6 4.2 8.8 1.7 5.5 5.7 5.9 8.1 0.8 ...
head(problem5)
## # A tibble: 6 x 3
##         y    x1    x2
##     <dbl> <dbl> <dbl>
## 1 -0.641    0.1   1.2
## 2 -1.18     0.2   3.6
## 3 -0.552    0.3   4.2
## 4 -0.0702   0.4   8.8
## 5  0.205    0.5   1.7
## 6  1.51     0.6   5.5
#Figure 2.8 on page 42
par(mfrow = c(1,2))
plot(x1, y, main = "Model 1")
abline(lsfit(x1,y))
plot(x2, y, main = "Model 2")
abline(lsfit(x2,y))

detach(problem5)