### Sample Code for ANCOVA
#### Female Labor Supply Data
labor <- read.table("http://www.unc.edu/~zhangk/STOR665Spring15/labor.dat", header=T)
earning <- labor[,3]
edu <- labor[,5]
eduf <- rep("A", length(edu))
eduf[edu<16] <- "B"
eduf[edu<13] <- "C"
eduf[edu<12] <- "D"
eduf <- factor(eduf)
age <- labor[,2]
prestige <- labor[,4]
options(contrasts=c("contr.treatment", "contr.treatment"))
#rearrange the order, result will change, because design matrix is not orthgonal
summary(aov(earning~age+prestige+eduf)) ### different results
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 2 2.5 0.173 0.677
## prestige 1 2393 2392.8 166.841 < 2e-16 ***
## eduf 3 476 158.7 11.063 4.46e-07 ***
## Residuals 601 8619 14.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~eduf+age+prestige))
## Df Sum Sq Mean Sq F value Pr(>F)
## eduf 3 1885 628.3 43.81 < 2e-16 ***
## age 1 32 32.3 2.25 0.134
## prestige 1 954 954.1 66.53 2.02e-15 ***
## Residuals 601 8619 14.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~age+eduf+prestige))
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 2 2.5 0.173 0.677
## eduf 3 1915 638.2 44.501 < 2e-16 ***
## prestige 1 954 954.1 66.526 2.02e-15 ***
## Residuals 601 8619 14.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm(earning~prestige+eduf+age))
##
## Call:
## lm(formula = earning ~ prestige + eduf + age)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.492 -1.758 -0.190 1.575 57.086
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.74806 1.19583 5.643 2.58e-08 ***
## prestige 0.11856 0.01454 8.156 2.02e-15 ***
## edufB -2.33528 0.53727 -4.347 1.62e-05 ***
## edufC -2.91639 0.58089 -5.021 6.80e-07 ***
## edufD -3.65075 0.65815 -5.547 4.36e-08 ***
## age 0.02125 0.02091 1.016 0.31
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.787 on 601 degrees of freedom
## Multiple R-squared: 0.2499, Adjusted R-squared: 0.2436
## F-statistic: 40.04 on 5 and 601 DF, p-value: < 2.2e-16
#gives the var-covar matrix, can see they are not orthgonal, so we subtract the group mean
round(vcov(lm(earning~eduf+prestige+age)),4)
## (Intercept) edufB edufC edufD prestige age
## (Intercept) 1.4300 -0.3021 -0.4310 -0.3120 -0.0109 -0.0161
## edufB -0.3021 0.2887 0.2342 0.2405 0.0017 0.0000
## edufC -0.4310 0.2342 0.3374 0.2702 0.0035 0.0009
## edufD -0.3120 0.2405 0.2702 0.4332 0.0045 -0.0035
## prestige -0.0109 0.0017 0.0035 0.0045 0.0002 0.0000
## age -0.0161 0.0000 0.0009 -0.0035 0.0000 0.0004
### only use eduf and prestige
summary(aov(earning~eduf+prestige)) ### different results
## Df Sum Sq Mean Sq F value Pr(>F)
## eduf 3 1885 628.3 43.81 < 2e-16 ***
## prestige 1 972 971.6 67.74 1.16e-15 ***
## Residuals 602 8634 14.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~prestige+eduf))
## Df Sum Sq Mean Sq F value Pr(>F)
## prestige 1 2395 2394.8 166.98 < 2e-16 ***
## eduf 3 462 153.9 10.73 7.08e-07 ***
## Residuals 602 8634 14.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### center the covariate
pres.mean <- mean(prestige[eduf=="A"])*(eduf=="A")+mean(prestige[eduf=="B"])*(eduf=="B")+mean(prestige[eduf=="C"])*(eduf=="C")+mean(prestige[eduf=="D"])*(eduf=="D")
pres.dm <- prestige-pres.mean
### run the ANCOVA again
summary(aov(earning~eduf+pres.dm)) ### same results
## Df Sum Sq Mean Sq F value Pr(>F)
## eduf 3 1885 628.3 43.81 < 2e-16 ***
## pres.dm 1 972 971.6 67.74 1.16e-15 ***
## Residuals 602 8634 14.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~pres.dm+eduf))
## Df Sum Sq Mean Sq F value Pr(>F)
## pres.dm 1 972 971.6 67.74 1.16e-15 ***
## eduf 3 1885 628.3 43.81 < 2e-16 ***
## Residuals 602 8634 14.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### add in "age"
age.mean <- mean(age[eduf=="A"])*(eduf=="A")+mean(age[eduf=="B"])*(eduf=="B")+mean(age[eduf=="C"])*(eduf=="C")+mean(age[eduf=="D"])*(eduf=="D")
age.dm <- age-age.mean
### run the ANCOVA again
summary(aov(earning~eduf+pres.dm+age.dm)) ### same results
## Df Sum Sq Mean Sq F value Pr(>F)
## eduf 3 1885 628.3 43.808 < 2e-16 ***
## pres.dm 1 972 971.6 67.744 1.16e-15 ***
## age.dm 1 15 14.8 1.032 0.31
## Residuals 601 8619 14.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~pres.dm+age.dm+eduf))
## Df Sum Sq Mean Sq F value Pr(>F)
## pres.dm 1 972 971.6 67.744 1.16e-15 ***
## age.dm 1 15 14.8 1.032 0.31
## eduf 3 1885 628.3 43.808 < 2e-16 ***
## Residuals 601 8619 14.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#orthgonal now
round(vcov(lm(earning~eduf+pres.dm+age.dm)),4)
## (Intercept) edufB edufC edufD pres.dm age.dm
## (Intercept) 0.2049 -0.2049 -0.2049 -0.2049 0e+00 0e+00
## edufB -0.2049 0.2742 0.2049 0.2049 0e+00 0e+00
## edufC -0.2049 0.2049 0.2766 0.2049 0e+00 0e+00
## edufD -0.2049 0.2049 0.2049 0.3152 0e+00 0e+00
## pres.dm 0.0000 0.0000 0.0000 0.0000 2e-04 0e+00
## age.dm 0.0000 0.0000 0.0000 0.0000 0e+00 4e-04
#Fan handout example
summary(aov(earning~age+prestige+eduf)) #Table 4.3
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 2 2.5 0.173 0.677
## prestige 1 2393 2392.8 166.841 < 2e-16 ***
## eduf 3 476 158.7 11.063 4.46e-07 ***
## Residuals 601 8619 14.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~eduf+age+prestige)) #Table 4.4
## Df Sum Sq Mean Sq F value Pr(>F)
## eduf 3 1885 628.3 43.81 < 2e-16 ***
## age 1 32 32.3 2.25 0.134
## prestige 1 954 954.1 66.53 2.02e-15 ***
## Residuals 601 8619 14.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~prestige+eduf+age)) #Table 4.5
## Df Sum Sq Mean Sq F value Pr(>F)
## prestige 1 2395 2394.8 166.986 < 2e-16 ***
## eduf 3 462 153.9 10.728 7.08e-07 ***
## age 1 15 14.8 1.032 0.31
## Residuals 601 8619 14.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~eduf+prestige+age)) #the same with centered data
## Df Sum Sq Mean Sq F value Pr(>F)
## eduf 3 1885 628.3 43.808 < 2e-16 ***
## prestige 1 972 971.6 67.744 1.16e-15 ***
## age 1 15 14.8 1.032 0.31
## Residuals 601 8619 14.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1