# Chpt 6 - Further Inference on the Multiple Regression Model

# Based on the Text'Principles of Econometrics 3e' - Hill, Griffiths, Lim

# Single null hypothesis: one restriction on one or more parameters

# Joint null hypothesis: two or more restrictions on two or more
# parameters

# 1. Use a t-test for a one tail single null hypothesis.

# 2. Use either t-test or F-test for a two tail single null hypothesis.

# 3. Use an F-test for a joint null hypothesis

# The F Test

# Demonstrate an F test on a joint null hypothesis.

# note: in this example we could use the t test since the square of the t
# random variable with m degrees of freedom is an F random variable with 1
# degree of freedom in the numerator and m degrees of freedom in the
# denominator.

# F = [(SSEr - SSEu)/J] / SSEu/(N-Ku).

# SSEr - restricted model sum of squared errors. SSEu - unrestricted model
# sum of squared errors. J - number of restrictions, N - number of
# samples, K - number of parameters (incl intercept term)

# Example data: 'andy.csv' - load data from local csv file
andy <- read.csv("C:/statdata/andy.csv")
head(andy)
##   sales price advert
## 1  73.2  5.69    1.3
## 2  71.8  6.49    2.9
## 3  62.4  5.63    0.8
## 4  67.4  6.22    0.7
## 5  89.3  5.02    1.5
## 6  70.3  6.41    1.3
str(andy)
## 'data.frame':    75 obs. of  3 variables:
##  $ sales : num  73.2 71.8 62.4 67.4 89.3 70.3 73.2 86.1 81 76.4 ...
##  $ price : num  5.69 6.49 5.63 6.22 5.02 6.41 5.85 5.41 6.24 6.2 ...
##  $ advert: num  1.3 2.9 0.8 0.7 1.5 1.3 1.8 2.4 0.7 3 ...

# basic plot of the two variables against sales
plot(andy$price, andy$sales)

plot of chunk unnamed-chunk-1

plot(andy$advert, andy$sales)

plot of chunk unnamed-chunk-1


# the unrestricted model: sales as a function of advertising and price
unrest_mrm <- lm(sales ~ price + advert, data = andy)
summary(unrest_mrm)
## 
## Call:
## lm(formula = sales ~ price + advert, data = andy)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.482  -3.143  -0.346   2.875  11.305 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  118.914      6.352   18.72  < 2e-16 ***
## price         -7.908      1.096   -7.22  4.4e-10 ***
## advert         1.863      0.683    2.73    0.008 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 4.89 on 72 degrees of freedom
## Multiple R-squared: 0.448,   Adjusted R-squared: 0.433 
## F-statistic: 29.2 on 2 and 72 DF,  p-value: 5.04e-10

# a restricted model: sales as a function of advertising only
rest_mrm <- lm(sales ~ advert, data = andy)
summary(rest_mrm)
## 
## Call:
## lm(formula = sales ~ advert, data = andy)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -13.166  -4.195  -0.578   4.995  14.248 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    74.18       1.80   41.23   <2e-16 ***
## advert          1.73       0.89    1.95    0.055 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 6.37 on 73 degrees of freedom
## Multiple R-squared: 0.0493,  Adjusted R-squared: 0.0363 
## F-statistic: 3.79 on 1 and 73 DF,  p-value: 0.0555

# a function to compare the two
f.test.mrm <- function(rest_mrm, unrest_mrm) {
    SSE.R <- sum(resid(rest_mrm)^2)  #sum of squared errors - restricted model
    SSE.U <- sum(resid(unrest_mrm)^2)  #sum of squared errors - unrestricted model
    df.num <- rest_mrm$df - unrest_mrm$df  #degrees of freedom numerator (J)
    df.den <- unrest_mrm$df  #degrees of freedom denominator (N-Ku)
    F <- ((SSE.R - SSE.U)/df.num)/(SSE.U/df.den)  #the F statistic
    p.value <- 1 - pf(F, df.num, df.den)  #the p value (compare to required alpha: 0.05, 0.01 etc.)
    return(data.frame(F, df.num, df.den, p.value))
}
print(f.test.mrm(rest_mrm, unrest_mrm))
##       F df.num df.den   p.value
## 1 52.06      1     72 4.424e-10

# or use anova function
print(anova(rest_mrm, unrest_mrm))
## Analysis of Variance Table
## 
## Model 1: sales ~ advert
## Model 2: sales ~ price + advert
##   Res.Df  RSS Df Sum of Sq    F  Pr(>F)    
## 1     73 2962                              
## 2     72 1719  1      1243 52.1 4.4e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# in one step
print(anova(lm(sales ~ advert, data = andy), lm(sales ~ advert + price, data = andy)))
## Analysis of Variance Table
## 
## Model 1: sales ~ advert
## Model 2: sales ~ advert + price
##   Res.Df  RSS Df Sum of Sq    F  Pr(>F)    
## 1     73 2962                              
## 2     72 1719  1      1243 52.1 4.4e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# also note that the square of the t statistic for sales from the
# unrestricted model equals the F statistic. So we could have tested the
# hypothesis with either method in this specific case
(-7.215)^2
## [1] 52.06