This shows the outputs from Chapter 2 using R. The textbook is “A Modern Approach to Regression with R” by Simon J. Sheather (2008). The R code provided with the book has been updated.
Variance of the slope of the regression line \[ Var{(\hat{\beta_0}|X)} = \sigma^{2}(\frac{1}{n} + \frac{\bar{x}^2}{SXX})\]
Variance of the intercept of the regression line \[ Var{(\hat{\beta_1}|X)} = \frac{\sigma^{2}}{SXX}\]
Confidence interval for the population regression line \[ Var{(\hat{y}|X=x^*}) = \sigma^2\lgroup\frac{1}{n}+\frac{(x^*-\bar{x})^2}{SXX}\rgroup \]
Prediction intervals for the actual Value of Y \[ Var{(Y^*-{\hat{y}}^*)} = \sigma^2\lgroup1+\frac{1}{n}+\frac{(x^*-\bar{x})^2}{SXX}\rgroup \]
Unbiased estimate of \(\sigma^2\) \[ {S^2} = \frac{RSS}{n-2} = \frac{1}{n-2}\sum_{i=1}^{n}{\hat{e_i}^2} \]
Anova Table
production <- read_excel("MARData.xlsx", sheet = "production")
attach(production)
str(production)
## tibble [20 x 3] (S3: tbl_df/tbl/data.frame)
## $ Case : num [1:20] 1 2 3 4 5 6 7 8 9 10 ...
## $ RunTime: num [1:20] 195 215 243 162 185 231 234 166 253 196 ...
## $ RunSize: num [1:20] 175 189 344 88 114 338 271 173 284 277 ...
head(production)
## # A tibble: 6 x 3
## Case RunTime RunSize
## <dbl> <dbl> <dbl>
## 1 1 195 175
## 2 2 215 189
## 3 3 243 344
## 4 4 162 88
## 5 5 185 114
## 6 6 231 338
#Figure 2.1 on page 16
par(mfrow = c(1,1))
plot(production$RunSize, production$RunTime, xlab = "Run Size", ylab = "Run Time")
#R output on page 19
m1 <- lm(RunTime ~ RunSize)
summary(m1)
##
## Call:
## lm(formula = RunTime ~ RunSize)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.597 -11.079 3.329 8.302 29.627
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 149.74770 8.32815 17.98 6.00e-13 ***
## RunSize 0.25924 0.03714 6.98 1.61e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.25 on 18 degrees of freedom
## Multiple R-squared: 0.7302, Adjusted R-squared: 0.7152
## F-statistic: 48.72 on 1 and 18 DF, p-value: 1.615e-06
#Figure 2.3 on page 20
plot(production$RunSize, production$RunTime, xlab = "Run Size", ylab = "Run Time")
abline(lsfit(production$RunSize, production$RunTime))
#t-value on page 23
tval <- qt(1 - 0.05/2,18)
tval
## [1] 2.100922
#95% confidence intervals on page 24
round(confint(m1,level = 0.95), 3)
## 2.5 % 97.5 %
## (Intercept) 132.251 167.244
## RunSize 0.181 0.337
#R output on page 27
predict(m1,newdata = data.frame(RunSize = c(50,100,150,200,250,300,350)),interval = "confidence",level = 0.95)
## fit lwr upr
## 1 162.7099 148.6204 176.7994
## 2 175.6720 164.6568 186.6872
## 3 188.6342 179.9969 197.2714
## 4 201.5963 193.9600 209.2326
## 5 214.5585 206.0455 223.0714
## 6 227.5206 216.7006 238.3407
## 7 240.4828 226.6220 254.3435
predict(m1,newdata = data.frame(RunSize = c(50,100,150,200,250,300,350)),interval = "prediction",level = 0.95)
## fit lwr upr
## 1 162.7099 125.7720 199.6478
## 2 175.6720 139.7940 211.5500
## 3 188.6342 153.4135 223.8548
## 4 201.5963 166.6076 236.5850
## 5 214.5585 179.3681 249.7489
## 6 227.5206 191.7021 263.3392
## 7 240.4828 203.6315 277.3340
#R output on page 30
anova(m1)
## Analysis of Variance Table
##
## Response: RunTime
## Df Sum Sq Mean Sq F value Pr(>F)
## RunSize 1 12868.4 12868.4 48.717 1.615e-06 ***
## Residuals 18 4754.6 264.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
detach(production)
changeover_times <- read_excel("MARData.xlsx", sheet = "changeover_times")
attach(changeover_times)
str(changeover_times)
## tibble [120 x 3] (S3: tbl_df/tbl/data.frame)
## $ Method : chr [1:120] "Existing" "Existing" "Existing" "Existing" ...
## $ Changeover: num [1:120] 19 24 39 12 29 19 23 22 12 29 ...
## $ New : num [1:120] 0 0 0 0 0 0 0 0 0 0 ...
head(changeover_times)
## # A tibble: 6 x 3
## Method Changeover New
## <chr> <dbl> <dbl>
## 1 Existing 19 0
## 2 Existing 24 0
## 3 Existing 39 0
## 4 Existing 12 0
## 5 Existing 29 0
## 6 Existing 19 0
#R output on page 31
m1 <- lm(Changeover ~ New)
summary(m1)
##
## Call:
## lm(formula = Changeover ~ New)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.861 -5.861 -1.861 4.312 25.312
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.8611 0.8905 20.058 <2e-16 ***
## New -3.1736 1.4080 -2.254 0.026 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.556 on 118 degrees of freedom
## Multiple R-squared: 0.04128, Adjusted R-squared: 0.03315
## F-statistic: 5.081 on 1 and 118 DF, p-value: 0.02604
#Figure 2.5 on page 32
par(mfrow = c(2,2))
plot(New,Changeover, xlab = "Dummy variable, New", ylab = "Change Over Time")
abline(lsfit(New,Changeover))
boxplot(Changeover ~ New, xlab = "Dummy variable, New", ylab = "Change Over Time")
boxplot(Changeover ~ Method, ylab = "Change Over Time", xlab = "Method")
#t-value on page 33
tval <- qt(1 - 0.05/2, 118)
tval
## [1] 1.980272
detach(changeover_times)
#################EXERCISES
#Exercise 2.8.1
playbill <- read_excel("MARData.xlsx", sheet = "playbill")
attach(playbill)
str(playbill)
## tibble [18 x 3] (S3: tbl_df/tbl/data.frame)
## $ Production : chr [1:18] "42nd Street" "Avenue Q" "Beauty and Beast" "Bombay Dreams" ...
## $ CurrentWeek: num [1:18] 684966 502367 594474 529298 570254 ...
## $ LastWeek : num [1:18] 695437 498969 598576 528994 562964 ...
head(playbill)
## # A tibble: 6 x 3
## Production CurrentWeek LastWeek
## <chr> <dbl> <dbl>
## 1 42nd Street 684966 695437
## 2 Avenue Q 502367 498969
## 3 Beauty and Beast 594474 598576
## 4 Bombay Dreams 529298 528994
## 5 Chicago 570254 562964
## 6 Dracula 319959 282778
#Figure 2.6 on page 38
par(mfrow = c(1,1))
plot(LastWeek, CurrentWeek, xlab = "Gross box office results previous week",
ylab = "Gross box office results current week")
detach(playbill)
#Exercise 2.8.3
invoice <- read_excel("MARData.xlsx", sheet = "invoices")
attach(invoice)
str(invoice)
## tibble [30 x 3] (S3: tbl_df/tbl/data.frame)
## $ Day : num [1:30] 1 2 3 4 5 6 7 8 9 10 ...
## $ Invoices: num [1:30] 149 60 188 23 201 58 77 222 181 30 ...
## $ Time : num [1:30] 2.1 1.8 2.3 0.8 2.7 1 1.7 3.1 2.8 1 ...
head(invoice)
## # A tibble: 6 x 3
## Day Invoices Time
## <dbl> <dbl> <dbl>
## 1 1 149 2.1
## 2 2 60 1.8
## 3 3 188 2.3
## 4 4 23 0.8
## 5 5 201 2.7
## 6 6 58 1
#Figure 2.7 on page 40
par(mfrow = c(1,1))
plot(Invoices, Time, xlab = "Number of Invoices", ylab = "Processing Time")
abline(lsfit(Invoices, Time))
#Output from R on page 40
m1 <- lm(Time ~ Invoices)
summary(m1)
##
## Call:
## lm(formula = Time ~ Invoices)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59516 -0.27851 0.03485 0.19346 0.53083
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6417099 0.1222707 5.248 1.41e-05 ***
## Invoices 0.0112916 0.0008184 13.797 5.17e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3298 on 28 degrees of freedom
## Multiple R-squared: 0.8718, Adjusted R-squared: 0.8672
## F-statistic: 190.4 on 1 and 28 DF, p-value: 5.175e-14
mean(Time)
## [1] 2.11
median(Time)
## [1] 2
mean(Invoices)
## [1] 130.0333
median(Invoices)
## [1] 127.5
detach(invoice)
#Exercise 2.8.5
problem5 <- read_excel("MARData.xlsx", sheet = "Ch2Problem5")
attach(problem5)
str(problem5)
## tibble [100 x 3] (S3: tbl_df/tbl/data.frame)
## $ y : num [1:100] -0.6412 -1.1823 -0.5518 -0.0702 0.2046 ...
## $ x1: num [1:100] 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1 ...
## $ x2: num [1:100] 1.2 3.6 4.2 8.8 1.7 5.5 5.7 5.9 8.1 0.8 ...
head(problem5)
## # A tibble: 6 x 3
## y x1 x2
## <dbl> <dbl> <dbl>
## 1 -0.641 0.1 1.2
## 2 -1.18 0.2 3.6
## 3 -0.552 0.3 4.2
## 4 -0.0702 0.4 8.8
## 5 0.205 0.5 1.7
## 6 1.51 0.6 5.5
#Figure 2.8 on page 42
par(mfrow = c(1,2))
plot(x1, y, main = "Model 1")
abline(lsfit(x1,y))
plot(x2, y, main = "Model 2")
abline(lsfit(x2,y))
detach(problem5)