### Sample Code for ANCOVA

#### Female Labor Supply Data
labor <- read.table("http://www.unc.edu/~zhangk/STOR665Spring15/labor.dat", header=T)
earning <- labor[,3]
edu <- labor[,5]
eduf <- rep("A", length(edu))
eduf[edu<16] <- "B"
eduf[edu<13] <- "C"
eduf[edu<12] <- "D"
eduf <- factor(eduf)
age <- labor[,2]
prestige <- labor[,4]


options(contrasts=c("contr.treatment", "contr.treatment"))

#rearrange the order, result will change, because design matrix is not orthgonal
summary(aov(earning~age+prestige+eduf)) ### different results
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## age           1      2     2.5   0.173    0.677    
## prestige      1   2393  2392.8 166.841  < 2e-16 ***
## eduf          3    476   158.7  11.063 4.46e-07 ***
## Residuals   601   8619    14.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~eduf+age+prestige))
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## eduf          3   1885   628.3   43.81  < 2e-16 ***
## age           1     32    32.3    2.25    0.134    
## prestige      1    954   954.1   66.53 2.02e-15 ***
## Residuals   601   8619    14.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~age+eduf+prestige))
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## age           1      2     2.5   0.173    0.677    
## eduf          3   1915   638.2  44.501  < 2e-16 ***
## prestige      1    954   954.1  66.526 2.02e-15 ***
## Residuals   601   8619    14.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(lm(earning~prestige+eduf+age))
## 
## Call:
## lm(formula = earning ~ prestige + eduf + age)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.492 -1.758 -0.190  1.575 57.086 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.74806    1.19583   5.643 2.58e-08 ***
## prestige     0.11856    0.01454   8.156 2.02e-15 ***
## edufB       -2.33528    0.53727  -4.347 1.62e-05 ***
## edufC       -2.91639    0.58089  -5.021 6.80e-07 ***
## edufD       -3.65075    0.65815  -5.547 4.36e-08 ***
## age          0.02125    0.02091   1.016     0.31    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.787 on 601 degrees of freedom
## Multiple R-squared:  0.2499, Adjusted R-squared:  0.2436 
## F-statistic: 40.04 on 5 and 601 DF,  p-value: < 2.2e-16
#gives the var-covar matrix, can see they are not orthgonal, so we subtract the group mean
round(vcov(lm(earning~eduf+prestige+age)),4)
##             (Intercept)   edufB   edufC   edufD prestige     age
## (Intercept)      1.4300 -0.3021 -0.4310 -0.3120  -0.0109 -0.0161
## edufB           -0.3021  0.2887  0.2342  0.2405   0.0017  0.0000
## edufC           -0.4310  0.2342  0.3374  0.2702   0.0035  0.0009
## edufD           -0.3120  0.2405  0.2702  0.4332   0.0045 -0.0035
## prestige        -0.0109  0.0017  0.0035  0.0045   0.0002  0.0000
## age             -0.0161  0.0000  0.0009 -0.0035   0.0000  0.0004
### only use eduf and prestige
summary(aov(earning~eduf+prestige)) ### different results
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## eduf          3   1885   628.3   43.81  < 2e-16 ***
## prestige      1    972   971.6   67.74 1.16e-15 ***
## Residuals   602   8634    14.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~prestige+eduf))
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## prestige      1   2395  2394.8  166.98  < 2e-16 ***
## eduf          3    462   153.9   10.73 7.08e-07 ***
## Residuals   602   8634    14.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### center the covariate
pres.mean <- mean(prestige[eduf=="A"])*(eduf=="A")+mean(prestige[eduf=="B"])*(eduf=="B")+mean(prestige[eduf=="C"])*(eduf=="C")+mean(prestige[eduf=="D"])*(eduf=="D")
pres.dm <- prestige-pres.mean

### run the ANCOVA again
summary(aov(earning~eduf+pres.dm)) ### same results
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## eduf          3   1885   628.3   43.81  < 2e-16 ***
## pres.dm       1    972   971.6   67.74 1.16e-15 ***
## Residuals   602   8634    14.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~pres.dm+eduf))
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## pres.dm       1    972   971.6   67.74 1.16e-15 ***
## eduf          3   1885   628.3   43.81  < 2e-16 ***
## Residuals   602   8634    14.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### add in "age"
age.mean <- mean(age[eduf=="A"])*(eduf=="A")+mean(age[eduf=="B"])*(eduf=="B")+mean(age[eduf=="C"])*(eduf=="C")+mean(age[eduf=="D"])*(eduf=="D")
age.dm <- age-age.mean

### run the ANCOVA again
summary(aov(earning~eduf+pres.dm+age.dm)) ### same results
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## eduf          3   1885   628.3  43.808  < 2e-16 ***
## pres.dm       1    972   971.6  67.744 1.16e-15 ***
## age.dm        1     15    14.8   1.032     0.31    
## Residuals   601   8619    14.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~pres.dm+age.dm+eduf))
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## pres.dm       1    972   971.6  67.744 1.16e-15 ***
## age.dm        1     15    14.8   1.032     0.31    
## eduf          3   1885   628.3  43.808  < 2e-16 ***
## Residuals   601   8619    14.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#orthgonal now
round(vcov(lm(earning~eduf+pres.dm+age.dm)),4)
##             (Intercept)   edufB   edufC   edufD pres.dm age.dm
## (Intercept)      0.2049 -0.2049 -0.2049 -0.2049   0e+00  0e+00
## edufB           -0.2049  0.2742  0.2049  0.2049   0e+00  0e+00
## edufC           -0.2049  0.2049  0.2766  0.2049   0e+00  0e+00
## edufD           -0.2049  0.2049  0.2049  0.3152   0e+00  0e+00
## pres.dm          0.0000  0.0000  0.0000  0.0000   2e-04  0e+00
## age.dm           0.0000  0.0000  0.0000  0.0000   0e+00  4e-04
#Fan handout example
summary(aov(earning~age+prestige+eduf)) #Table 4.3
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## age           1      2     2.5   0.173    0.677    
## prestige      1   2393  2392.8 166.841  < 2e-16 ***
## eduf          3    476   158.7  11.063 4.46e-07 ***
## Residuals   601   8619    14.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~eduf+age+prestige)) #Table 4.4
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## eduf          3   1885   628.3   43.81  < 2e-16 ***
## age           1     32    32.3    2.25    0.134    
## prestige      1    954   954.1   66.53 2.02e-15 ***
## Residuals   601   8619    14.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~prestige+eduf+age)) #Table 4.5
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## prestige      1   2395  2394.8 166.986  < 2e-16 ***
## eduf          3    462   153.9  10.728 7.08e-07 ***
## age           1     15    14.8   1.032     0.31    
## Residuals   601   8619    14.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(aov(earning~eduf+prestige+age)) #the same with centered data
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## eduf          3   1885   628.3  43.808  < 2e-16 ***
## prestige      1    972   971.6  67.744 1.16e-15 ***
## age           1     15    14.8   1.032     0.31    
## Residuals   601   8619    14.3                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1