rm(list=ls(all=TRUE))
fiber <- read.table('~/Dropbox/Statistics_study/STOR_664/Homework/HW2/HW2_C_18_data.dat')
names(fiber) <- c('obs', 'y','x2','x3','x4')
fiber = log(fiber)
attach(fiber)
# log model, assume multiply relasionship.
fiber.lm <- lm(y ~ x2+x3+x4)
summary(fiber.lm)
##
## Call:
## lm(formula = y ~ x2 + x3 + x4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.10878 -0.04042 -0.01865 0.07459 0.11288
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.94934 2.20907 0.882 0.3906
## x2 0.83492 0.15755 5.299 7.19e-05 ***
## x3 0.09126 0.39551 0.231 0.8204
## x4 -0.38146 0.14939 -2.553 0.0213 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06937 on 16 degrees of freedom
## Multiple R-squared: 0.7184, Adjusted R-squared: 0.6656
## F-statistic: 13.61 on 3 and 16 DF, p-value: 0.0001143
the coefficient of x3 is not significant from the lm result, so we igore x3 and only consider x1, x2 in our model.
fiber.lm <- lm(y ~ x2+x4)
summary(fiber.lm)
##
## Call:
## lm(formula = y ~ x2 + x4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.11149 -0.03979 -0.01930 0.07103 0.12112
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.4244 0.7779 3.117 0.00628 **
## x2 0.8355 0.1531 5.458 4.25e-05 ***
## x4 -0.4040 0.1098 -3.681 0.00185 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06741 on 17 degrees of freedom
## Multiple R-squared: 0.7175, Adjusted R-squared: 0.6842
## F-statistic: 21.58 on 2 and 17 DF, p-value: 2.159e-05
ci = data.frame(1:4)
ci[[1]] <- predict(fiber.lm, data.frame(x2 = log(75), x4 = log(45)), interval = "confidence", se.fit=T)
exp(ci[[1]]$fit)
## fit lwr upr
## 1 89.46327 86.00056 93.06539
ci[[2]] <- predict(fiber.lm, data.frame(x2 = log(80), x4 = log(45)), interval = "confidence", se.fit=T)
exp(ci[[2]]$fit)
## fit lwr upr
## 1 94.41991 90.61474 98.38488
ci[[3]] <- predict(fiber.lm, data.frame(x2 = log(80), x4 = log(42)), interval = "confidence", se.fit=T)
exp(ci[[3]]$fit)
## fit lwr upr
## 1 97.08884 93.73159 100.5663
ci[[4]] <- predict(fiber.lm, data.frame(x2 = log(65), x4 = log(40)), interval = "confidence", se.fit=T)
exp(ci[[4]]$fit)
## fit lwr upr
## 1 83.25013 78.22336 88.59993
p=3 # p is 3 in the model y~x2+x4!
n=length(y)
alpha=0.05
k=4 #number of new observations
level=1-alpha/(2*k)
t = qt(level, n-p)
t
## [1] 2.792542
simult_ci = data.frame(1:k,1:k,1:k)*0
names(simult_ci) = c('fit', 'lower','upper')
for (i in 1:k) {
simult_ci$fit[i] = ci[[i]]$fit[1]
simult_ci$lower[i] = ci[[i]]$fit[1] - t * ci[[i]]$se.fit
simult_ci$upper[i] = ci[[i]]$fit[1] + t * ci[[i]]$se.fit
}
simult_ci
## fit lower upper
## 1 4.493828 4.441580 4.546076
## 2 4.547752 4.493306 4.602198
## 3 4.575626 4.529048 4.622205
## 4 4.421850 4.339414 4.504285
simult_ci <- exp(simult_ci)
simult_ci
## fit lower upper
## 1 89.46327 84.90900 94.26181
## 2 94.41991 89.41654 99.70325
## 3 97.08884 92.67026 101.71811
## 4 83.25013 76.66263 90.40369
q=p # q <= p, is the rank of matrix C
G = sqrt(q * qf(1-alpha, q, n-p))
G
## [1] 3.096826
simult_ci_scheffe = data.frame(1:k,1:k,1:k)*0
names(simult_ci_scheffe) = c('fit', 'lower','upper')
for (i in 1:k) {
simult_ci_scheffe$fit[i] = ci[[i]]$fit[1]
simult_ci_scheffe$lower[i] = ci[[i]]$fit[1] - G * ci[[i]]$se.fit
simult_ci_scheffe$upper[i] = ci[[i]]$fit[1] + G * ci[[i]]$se.fit
}
simult_ci_scheffe <- exp(simult_ci_scheffe)
simult_ci_scheffe
## fit lower upper
## 1 89.46327 84.42698 94.79998
## 2 94.41991 88.88764 100.29651
## 3 97.08884 92.20111 102.23568
## 4 83.25013 75.97709 91.21939
In this case, G > t, which means the Scheffe predication intervals are wider than the Bonferroni predication intervals, so we choose the narrower one - the Bonferroni method.