# Chpt 6 - Further Inference on the Multiple Regression Model
# Based on the Text'Principles of Econometrics 3e' - Hill, Griffiths, Lim
# Single null hypothesis: one restriction on one or more parameters
# Joint null hypothesis: two or more restrictions on two or more
# parameters
# 1. Use a t-test for a one tail single null hypothesis.
# 2. Use either t-test or F-test for a two tail single null hypothesis.
# 3. Use an F-test for a joint null hypothesis
# The F Test
# Demonstrate an F test on a joint null hypothesis.
# note: in this example we could use the t test since the square of the t
# random variable with m degrees of freedom is an F random variable with 1
# degree of freedom in the numerator and m degrees of freedom in the
# denominator.
# F = [(SSEr - SSEu)/J] / SSEu/(N-Ku).
# SSEr - restricted model sum of squared errors. SSEu - unrestricted model
# sum of squared errors. J - number of restrictions, N - number of
# samples, K - number of parameters (incl intercept term)
# Example data: 'andy.csv' - load data from local csv file
andy <- read.csv("C:/statdata/andy.csv")
head(andy)
## sales price advert
## 1 73.2 5.69 1.3
## 2 71.8 6.49 2.9
## 3 62.4 5.63 0.8
## 4 67.4 6.22 0.7
## 5 89.3 5.02 1.5
## 6 70.3 6.41 1.3
str(andy)
## 'data.frame': 75 obs. of 3 variables:
## $ sales : num 73.2 71.8 62.4 67.4 89.3 70.3 73.2 86.1 81 76.4 ...
## $ price : num 5.69 6.49 5.63 6.22 5.02 6.41 5.85 5.41 6.24 6.2 ...
## $ advert: num 1.3 2.9 0.8 0.7 1.5 1.3 1.8 2.4 0.7 3 ...
# basic plot of the two variables against sales
plot(andy$price, andy$sales)
plot(andy$advert, andy$sales)
# the unrestricted model: sales as a function of advertising and price
unrest_mrm <- lm(sales ~ price + advert, data = andy)
summary(unrest_mrm)
##
## Call:
## lm(formula = sales ~ price + advert, data = andy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.482 -3.143 -0.346 2.875 11.305
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 118.914 6.352 18.72 < 2e-16 ***
## price -7.908 1.096 -7.22 4.4e-10 ***
## advert 1.863 0.683 2.73 0.008 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.89 on 72 degrees of freedom
## Multiple R-squared: 0.448, Adjusted R-squared: 0.433
## F-statistic: 29.2 on 2 and 72 DF, p-value: 5.04e-10
# a restricted model: sales as a function of advertising only
rest_mrm <- lm(sales ~ advert, data = andy)
summary(rest_mrm)
##
## Call:
## lm(formula = sales ~ advert, data = andy)
##
## Residuals:
## Min 1Q Median 3Q Max
## -13.166 -4.195 -0.578 4.995 14.248
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 74.18 1.80 41.23 <2e-16 ***
## advert 1.73 0.89 1.95 0.055 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.37 on 73 degrees of freedom
## Multiple R-squared: 0.0493, Adjusted R-squared: 0.0363
## F-statistic: 3.79 on 1 and 73 DF, p-value: 0.0555
# a function to compare the two
f.test.mrm <- function(rest_mrm, unrest_mrm) {
SSE.R <- sum(resid(rest_mrm)^2) #sum of squared errors - restricted model
SSE.U <- sum(resid(unrest_mrm)^2) #sum of squared errors - unrestricted model
df.num <- rest_mrm$df - unrest_mrm$df #degrees of freedom numerator (J)
df.den <- unrest_mrm$df #degrees of freedom denominator (N-Ku)
F <- ((SSE.R - SSE.U)/df.num)/(SSE.U/df.den) #the F statistic
p.value <- 1 - pf(F, df.num, df.den) #the p value (compare to required alpha: 0.05, 0.01 etc.)
return(data.frame(F, df.num, df.den, p.value))
}
print(f.test.mrm(rest_mrm, unrest_mrm))
## F df.num df.den p.value
## 1 52.06 1 72 4.424e-10
# or use anova function
print(anova(rest_mrm, unrest_mrm))
## Analysis of Variance Table
##
## Model 1: sales ~ advert
## Model 2: sales ~ price + advert
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 73 2962
## 2 72 1719 1 1243 52.1 4.4e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# in one step
print(anova(lm(sales ~ advert, data = andy), lm(sales ~ advert + price, data = andy)))
## Analysis of Variance Table
##
## Model 1: sales ~ advert
## Model 2: sales ~ advert + price
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 73 2962
## 2 72 1719 1 1243 52.1 4.4e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# also note that the square of the t statistic for sales from the
# unrestricted model equals the F statistic. So we could have tested the
# hypothesis with either method in this specific case
(-7.215)^2
## [1] 52.06