env <- new.env()
load("../data.rda", envir = env)
df <- get("CH01TA01", envir = env)
names(df) <- c('x', 'y')
fit <- lm(y ~ x, df)
As the authors explain, we're looking for B = t(1 - 0.10/4; 23) for the 90% joint confidence interval. But notice that the confint funtion uses a 95% confidence interval t(1 - 0.05/2; 23) that equals B. Therefore, all we require in this instance is to use confint(fit) to get the correct result.
The reason this works is because the Bonferroni correction is just a modification to the t-statistic. The confint function uses the level parameter to produce the alpha = 0.05 in the above case. Therefore, what we require is a level to turn 1 - 0.05/2 into 1 - 0.1/4, or a level such that:
1 - a/2 = 1 - (1 - level)/2 = 1 - alpha/4, alpha = 0.1, a = (1 - level)
The 4, as we shall see in multiple regression, is really just 2g, where g is the number of confidence coefficients in the family, in this case 2. It turns out that a level = 1 - alpha/g = 1 - 0.1/2 will give us the desired result:
1 - a/2 = 1 - (1 - level)/2 = 1 - (1 - (1 - alpha/g))/2 = 1 - alpha/2g
In this case, 1 - alpha/g = 0.95, just is the default level of confint.
confint(fit, level = (1 - 0.1/2))
## 2.5 % 97.5 %
## (Intercept) 8.214 116.518
## x 2.852 4.288
ci.wh <- function(model, newdata, alpha = 0.1)
{
df <- nrow(model.frame(model)) - length(coef(model)) # 23
W <- sqrt( 2 * qf(1 - alpha, 2, df) ) # 2.2580
ci <- predict(model, newdata, se.fit = TRUE)
x <- cbind(
'x' = newdata,
's' = ci$se.fit,
'fit' = ci$fit,
'lwr' = ci$fit - W * ci$se.fit,
'upr' = ci$fit + W * ci$se.fit)
return(x)
}
new <- data.frame(x = c(30, 65, 100))
ci.wh(fit, new)
## x s fit lwr upr
## 1 30 16.970 169.5 131.2 207.8
## 2 65 9.918 294.4 272.0 316.8
## 3 100 14.272 419.4 387.2 451.6
As in the above Bonferroni correction, we require level = (1 - alpha/g). For clarity, part of the return object includes se.fit (set to true), which are the standard errors and residual.scale, which is equal to the positive squart root of the MSE. This will be used in calculations below.
predict(fit, new, int = "c", level = (1 - 0.1/nrow(new)), se.fit = TRUE)
## $fit
## fit lwr upr
## 1 169.5 131.1 207.9
## 2 294.4 272.0 316.9
## 3 419.4 387.1 451.7
##
## $se.fit
## 1 2 3
## 16.970 9.918 14.272
##
## $df
## [1] 23
##
## $residual.scale
## [1] 48.82
ci.sim <- function(model, newdata, type = c("B", "S"), alpha = 0.05)
{
g <- nrow(newdata)
CI <- predict(model, newdata, se.fit = TRUE)
M <- ifelse(match.arg(type) == "B",
qt(1 - alpha / (2*g), model$df), # B = (4.9a)
sqrt( g * qf( 1 - alpha, g, model$df))) # S = (4.8a)
spred <- sqrt( CI$residual.scale^2 + (CI$se.fit)^2 ) # (2.38)
x <- data.frame(
"x" = newdata,
"spred" = spred,
"fit" = CI$fit,
"lower" = CI$fit - M * spred,
"upper" = CI$fit + M * spred)
return(x)
}
new <- data.frame(x = c(80, 100))
ci.sim(fit, new, type = "S")
## x spred fit lower upper
## 1 80 49.91 348.0 217.4 478.6
## 2 100 50.87 419.4 286.3 552.5
ci.sim(fit, new, type = "B")
## x spred fit lower upper
## 1 80 49.91 348.0 228.3 467.7
## 2 100 50.87 419.4 297.4 541.4
df <- get("CH04TA02", envir = env)
names(df) <- c('x', 'y')
Included in the summary and anova output are the values calculated on pages 163 and 164.
fit <- lm(y ~ 0 + x, df) # alternate way to exclude intercept: lm(y ~ x - 1)
tab <- transform(df,
x = round(x, 0),
y = round(y, 0),
xy = round(x * y, 0),
xsq = round(x * x, 0),
fit = round(fitted(fit), 2),
e = round(resid(fit), 2))
rbind(tab, Total = colSums(tab))
## x y xy xsq fit e
## 1 20 114 2280 400 93.71 20.29
## 2 196 921 180516 38416 918.31 2.69
## 3 115 560 64400 13225 538.81 21.19
## 4 50 245 12250 2500 234.26 10.74
## 5 122 575 70150 14884 571.60 3.40
## 6 100 475 47500 10000 468.53 6.47
## 7 33 138 4554 1089 154.61 -16.61
## 8 154 727 111958 23716 721.53 5.47
## 9 80 375 30000 6400 374.82 0.18
## 10 147 670 98490 21609 688.74 -18.74
## 11 182 828 150696 33124 852.72 -24.72
## 12 160 762 121920 25600 749.64 12.36
## Total 1359 6390 894714 190963 6367.28 22.72
plot(df$x, fitted(fit), pch = 19, xlab = "Work United Performed", ylab = "Variable Labor Costs")
abline(fit)
# Some of the results from pp 163-4 are included or derivable from these
summary(fit)
##
## Call:
## lm(formula = y ~ 0 + x, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.72 -4.02 4.43 11.14 21.19
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x 4.6853 0.0342 137 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.9 on 11 degrees of freedom
## Multiple R-squared: 0.999, Adjusted R-squared: 0.999
## F-statistic: 1.88e+04 on 1 and 11 DF, p-value: <2e-16
anova(fit)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 4191980 4191980 18762 <2e-16 ***
## Residuals 11 2458 223
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(fit)
## 2.5 % 97.5 %
## x 4.61 4.761