Textbook: Linear Models with R, 2nd, Julian J. Faraway

Hypothesis tests to compare models

1. Test of all the predictors

\[y=X\beta + \varepsilon\] \(\beta = (\beta_0, \beta_1,...,\beta_{p-1})'\) \[H_0: \beta_1 = ... = \beta _{p-1}=0\]

data(gala, package="faraway")
lmod <- lm(Species ~ Area + Elevation + Nearest + Scruz + Adjacent, gala)

\(anova()\): 計算 ANOVA

nullmod <- lm(Species ~ 1, gala)
anova(nullmod, lmod)
## Analysis of Variance Table
## 
## Model 1: Species ~ 1
## Model 2: Species ~ Area + Elevation + Nearest + Scruz + Adjacent
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     29 381081                                  
## 2     24  89231  5    291850 15.699 6.838e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(rss0 <- deviance(nullmod)) #RSS_w
## [1] 381081.4
(rss <- deviance(lmod))  #RSS_Omega
## [1] 89231.37
(df0 <- df.residual(nullmod)) #n-q
## [1] 29
(df <- df.residual(lmod)) #n-p
## [1] 24
(fstat <- ((rss0-rss)/(df0-df))/(rss/df)) #F-statistic
## [1] 15.69941
1-pf(fstat, df0-df,df) #p-value
## [1] 6.837893e-07

2. Testing one predictor

\[H_0:\beta_{Area}=0\]

lmods <- lm(Species ~ Elevation + Nearest + Scruz + Adjacent, gala)
anova(lmods, lmod)
## Analysis of Variance Table
## 
## Model 1: Species ~ Elevation + Nearest + Scruz + Adjacent
## Model 2: Species ~ Area + Elevation + Nearest + Scruz + Adjacent
##   Res.Df   RSS Df Sum of Sq      F Pr(>F)
## 1     25 93469                           
## 2     24 89231  1    4237.7 1.1398 0.2963

Compare with t-statistic

summary(lmod)$coefficients
##                 Estimate  Std. Error      t value     Pr(>|t|)
## (Intercept)  7.068220709 19.15419782  0.369016796 7.153508e-01
## Area        -0.023938338  0.02242235 -1.067610554 2.963180e-01
## Elevation    0.319464761  0.05366280  5.953187968 3.823409e-06
## Nearest      0.009143961  1.05413595  0.008674366 9.931506e-01
## Scruz       -0.240524230  0.21540225 -1.116628222 2.752082e-01
## Adjacent    -0.074804832  0.01770019 -4.226216850 2.970655e-04
(t_area<-summary(lmod)$coefficients[2,3])
## [1] -1.067611
t_area^2
## [1] 1.139792

3. Testing a pair of predictors

\[H_0: \beta_{Area}=\beta_{Adjacent}=0\]

lmods <- lm(Species ~ Elevation + Nearest + Scruz, gala)
anova(lmods, lmod)
## Analysis of Variance Table
## 
## Model 1: Species ~ Elevation + Nearest + Scruz
## Model 2: Species ~ Area + Elevation + Nearest + Scruz + Adjacent
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)   
## 1     26 158292                               
## 2     24  89231  2     69060 9.2874 0.00103 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4. Testing a subspace

\[H_0: \beta_{Elevation}=0.5\]

t-statistic:

\[ t= \frac{\hat{\beta}_{Elevation}-0.5 }{se(\hat{\beta}_{Elevation})} \]

(b.elevation <- summary(lmod)$coefficients[3,1])
## [1] 0.3194648
(se.elevation <- summary(lmod)$coefficients[3,2])
## [1] 0.0536628
(tstat <- (b.elevation-0.5)/se.elevation)
## [1] -3.364253
2*pt(tstat,24)
## [1] 0.002573836

Confidence intervals for \(\beta\)

qt(0.975,30-6)
## [1] 2.063899
b.adjacent <- summary(lmod)$coefficients[6,1]
se.adjacent <- summary(lmod)$coefficients[6,2]
b.adjacent+c(-1,1)*se.adjacent*qt(0.975,30-6)
## [1] -0.11133622 -0.03827344
confint(lmod)
##                   2.5 %      97.5 %
## (Intercept) -32.4641006 46.60054205
## Area         -0.0702158  0.02233912
## Elevation     0.2087102  0.43021935
## Nearest      -2.1664857  2.18477363
## Scruz        -0.6850926  0.20404416
## Adjacent     -0.1113362 -0.03827344