env <- new.env()
load("../data.rda", envir = env)
df <- get("CH01TA01", envir = env)
names(df) <- c('x', 'y')
fit <- lm(y ~ x, df)
As the authors explain, we're looking for B = t(1 - 0.10/4; 23)
for the 90% joint confidence interval. But notice that the confint
funtion uses a 95% confidence interval t(1 - 0.05/2; 23)
that equals B
. Therefore, all we require in this instance is to use confint(fit)
to get the correct result.
The reason this works is because the Bonferroni correction is just a modification to the t-statistic. The confint
function uses the level parameter to produce the alpha = 0.05
in the above case. Therefore, what we require is a level to turn 1 - 0.05/2
into 1 - 0.1/4
, or a level such that:
1 - a/2 = 1 - (1 - level)/2 = 1 - alpha/4, alpha = 0.1, a = (1 - level)
The 4, as we shall see in multiple regression, is really just 2g
, where g
is the number of confidence coefficients in the family, in this case 2. It turns out that a level = 1 - alpha/g = 1 - 0.1/2
will give us the desired result:
1 - a/2 = 1 - (1 - level)/2 = 1 - (1 - (1 - alpha/g))/2 = 1 - alpha/2g
In this case, 1 - alpha/g = 0.95
, just is the default level of confint.
confint(fit, level = (1 - 0.1/2))
## 2.5 % 97.5 %
## (Intercept) 8.214 116.518
## x 2.852 4.288
ci.wh <- function(model, newdata, alpha = 0.1)
{
df <- nrow(model.frame(model)) - length(coef(model)) # 23
W <- sqrt( 2 * qf(1 - alpha, 2, df) ) # 2.2580
ci <- predict(model, newdata, se.fit = TRUE)
x <- cbind(
'x' = newdata,
's' = ci$se.fit,
'fit' = ci$fit,
'lwr' = ci$fit - W * ci$se.fit,
'upr' = ci$fit + W * ci$se.fit)
return(x)
}
new <- data.frame(x = c(30, 65, 100))
ci.wh(fit, new)
## x s fit lwr upr
## 1 30 16.970 169.5 131.2 207.8
## 2 65 9.918 294.4 272.0 316.8
## 3 100 14.272 419.4 387.2 451.6
As in the above Bonferroni correction, we require level = (1 - alpha/g)
. For clarity, part of the return object includes se.fit
(set to true), which are the standard errors and residual.scale
, which is equal to the positive squart root of the MSE. This will be used in calculations below.
predict(fit, new, int = "c", level = (1 - 0.1/nrow(new)), se.fit = TRUE)
## $fit
## fit lwr upr
## 1 169.5 131.1 207.9
## 2 294.4 272.0 316.9
## 3 419.4 387.1 451.7
##
## $se.fit
## 1 2 3
## 16.970 9.918 14.272
##
## $df
## [1] 23
##
## $residual.scale
## [1] 48.82
ci.sim <- function(model, newdata, type = c("B", "S"), alpha = 0.05)
{
g <- nrow(newdata)
CI <- predict(model, newdata, se.fit = TRUE)
M <- ifelse(match.arg(type) == "B",
qt(1 - alpha / (2*g), model$df), # B = (4.9a)
sqrt( g * qf( 1 - alpha, g, model$df))) # S = (4.8a)
spred <- sqrt( CI$residual.scale^2 + (CI$se.fit)^2 ) # (2.38)
x <- data.frame(
"x" = newdata,
"spred" = spred,
"fit" = CI$fit,
"lower" = CI$fit - M * spred,
"upper" = CI$fit + M * spred)
return(x)
}
new <- data.frame(x = c(80, 100))
ci.sim(fit, new, type = "S")
## x spred fit lower upper
## 1 80 49.91 348.0 217.4 478.6
## 2 100 50.87 419.4 286.3 552.5
ci.sim(fit, new, type = "B")
## x spred fit lower upper
## 1 80 49.91 348.0 228.3 467.7
## 2 100 50.87 419.4 297.4 541.4
df <- get("CH04TA02", envir = env)
names(df) <- c('x', 'y')
Included in the summary and anova output are the values calculated on pages 163 and 164.
fit <- lm(y ~ 0 + x, df) # alternate way to exclude intercept: lm(y ~ x - 1)
tab <- transform(df,
x = round(x, 0),
y = round(y, 0),
xy = round(x * y, 0),
xsq = round(x * x, 0),
fit = round(fitted(fit), 2),
e = round(resid(fit), 2))
rbind(tab, Total = colSums(tab))
## x y xy xsq fit e
## 1 20 114 2280 400 93.71 20.29
## 2 196 921 180516 38416 918.31 2.69
## 3 115 560 64400 13225 538.81 21.19
## 4 50 245 12250 2500 234.26 10.74
## 5 122 575 70150 14884 571.60 3.40
## 6 100 475 47500 10000 468.53 6.47
## 7 33 138 4554 1089 154.61 -16.61
## 8 154 727 111958 23716 721.53 5.47
## 9 80 375 30000 6400 374.82 0.18
## 10 147 670 98490 21609 688.74 -18.74
## 11 182 828 150696 33124 852.72 -24.72
## 12 160 762 121920 25600 749.64 12.36
## Total 1359 6390 894714 190963 6367.28 22.72
plot(df$x, fitted(fit), pch = 19, xlab = "Work United Performed", ylab = "Variable Labor Costs")
abline(fit)
# Some of the results from pp 163-4 are included or derivable from these
summary(fit)
##
## Call:
## lm(formula = y ~ 0 + x, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.72 -4.02 4.43 11.14 21.19
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## x 4.6853 0.0342 137 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.9 on 11 degrees of freedom
## Multiple R-squared: 0.999, Adjusted R-squared: 0.999
## F-statistic: 1.88e+04 on 1 and 11 DF, p-value: <2e-16
anova(fit)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x 1 4191980 4191980 18762 <2e-16 ***
## Residuals 11 2458 223
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(fit)
## 2.5 % 97.5 %
## x 4.61 4.761