#Name: Sweta Kumari #ANALYSIS OF COVARIANCE

#ANCOVA calculations

photoperiod<-read.table("C:\\Users\\Sweta\\Documents\\Data\\Photoperiod.txt",header=T)
attach(photoperiod)
names(photoperiod) 
## [1] "Genotype"    "Growth"      "Photoperiod"

#Let say Photoperiod=x and Growth=y #Find the sum of x and sum of x square

sum(Photoperiod);sum(Photoperiod^2)
## [1] 360
## [1] 6240

#Find the sum of y and sum of y square

sum(Growth);sum(Growth^2) 
## [1] 57
## [1] 175

#Find the sum of x*y

sum(Growth*Photoperiod)
## [1] 932

#Find the sum and sum of squares #Calculate the sum of growth according to Genotype

tapply(Growth,Genotype,sum)
##  A  B  C  D  E  F 
## 12 18  6  6  8  7

#Calculate the sum of growth square according to Genotype

tapply(Growth^2,Genotype,sum) 
##  A  B  C  D  E  F 
## 38 86 10 10 16 15

#Calculate the sum of Photoperiod according to Genotype

tapply(Photoperiod,Genotype,sum)
##  A  B  C  D  E  F 
## 60 60 60 60 60 60

#Calculate the sum of Photoperiod square according to Genotype

tapply(Photoperiod^2,Genotype,sum) 
##    A    B    C    D    E    F 
## 1040 1040 1040 1040 1040 1040

#Calculate the sum of growth*Photoperiod according to Genotype

tapply(Photoperiod*Growth,Genotype,sum)
##   A   B   C   D   E   F 
## 196 296  96 100 120 124

#Analysis of Covariance in R #Use lm directive and capture the output in model object

options(contrasts=c("contr.treatment","contr.poly")) 
model<-lm(Growth~Genotype*Photoperiod) 

#Summarize the model

summary(model)
## 
## Call:
## lm(formula = Growth ~ Genotype * Photoperiod)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5429 -0.1786  0.0000  0.1143  0.6286 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)   
## (Intercept)            1.28571    0.48865   2.631  0.02193 * 
## GenotypeB              0.42857    0.69105   0.620  0.54674   
## GenotypeC             -0.42857    0.69105  -0.620  0.54674   
## GenotypeD             -0.85714    0.69105  -1.240  0.23855   
## GenotypeE              0.71429    0.69105   1.034  0.32169   
## GenotypeF             -1.57143    0.69105  -2.274  0.04213 * 
## Photoperiod            0.11429    0.03030   3.771  0.00267 **
## GenotypeB:Photoperiod  0.07143    0.04286   1.667  0.12145   
## GenotypeC:Photoperiod -0.07143    0.04286  -1.667  0.12145   
## GenotypeD:Photoperiod -0.04286    0.04286  -1.000  0.33705   
## GenotypeE:Photoperiod -0.11429    0.04286  -2.667  0.02054 * 
## GenotypeF:Photoperiod  0.02143    0.04286   0.500  0.62612   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3586 on 12 degrees of freedom
## Multiple R-squared:  0.9611, Adjusted R-squared:  0.9254 
## F-statistic: 26.93 on 11 and 12 DF,  p-value: 9.445e-07

#Use aov directive to get anova table for model

model<-aov(Growth~Genotype*Photoperiod) 

#Summarize the model

summary(model) 
##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## Genotype              5 27.875   5.575  43.361 2.85e-07 ***
## Photoperiod           1  7.058   7.058  54.898 8.17e-06 ***
## Genotype:Photoperiod  5  3.149   0.630   4.898   0.0113 *  
## Residuals            12  1.543   0.129                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#use summary.aov to get ANOVA table

summary.aov(model)
##                      Df Sum Sq Mean Sq F value   Pr(>F)    
## Genotype              5 27.875   5.575  43.361 2.85e-07 ***
## Photoperiod           1  7.058   7.058  54.898 8.17e-06 ***
## Genotype:Photoperiod  5  3.149   0.630   4.898   0.0113 *  
## Residuals            12  1.543   0.129                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Use summary.lm to get the parameters and standard error.

summary.lm(model) 
## 
## Call:
## aov(formula = Growth ~ Genotype * Photoperiod)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.5429 -0.1786  0.0000  0.1143  0.6286 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)   
## (Intercept)            1.28571    0.48865   2.631  0.02193 * 
## GenotypeB              0.42857    0.69105   0.620  0.54674   
## GenotypeC             -0.42857    0.69105  -0.620  0.54674   
## GenotypeD             -0.85714    0.69105  -1.240  0.23855   
## GenotypeE              0.71429    0.69105   1.034  0.32169   
## GenotypeF             -1.57143    0.69105  -2.274  0.04213 * 
## Photoperiod            0.11429    0.03030   3.771  0.00267 **
## GenotypeB:Photoperiod  0.07143    0.04286   1.667  0.12145   
## GenotypeC:Photoperiod -0.07143    0.04286  -1.667  0.12145   
## GenotypeD:Photoperiod -0.04286    0.04286  -1.000  0.33705   
## GenotypeE:Photoperiod -0.11429    0.04286  -2.667  0.02054 * 
## GenotypeF:Photoperiod  0.02143    0.04286   0.500  0.62612   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3586 on 12 degrees of freedom
## Multiple R-squared:  0.9611, Adjusted R-squared:  0.9254 
## F-statistic: 26.93 on 11 and 12 DF,  p-value: 9.445e-07

#Ancova with different values of the covariates

ipomopsis<-read.csv("C:\\Users\\Sweta\\Documents\\Data\\ipomopsis.csv",header=T)
attach(ipomopsis)
names(ipomopsis)
## [1] "Root"    "Fruit"   "Grazing"

#Find sum of root and root^2

sum(Root);sum(Root^2)
## [1] 287.246
## [1] 2148.172

#Find sum of fruit and fruit^2

sum(Fruit);sum(Fruit^2) 
## [1] 2376.42
## [1] 164928.1

#Find sum of root*fruit

sum(Root*Fruit)
## [1] 18263.16

#Let’s consider only grazed plant data

sum(Root[Grazing=="Grazed"]);sum(Root[Grazing=="Grazed"]^2) 
## [1] 166.188
## [1] 1400.834

#Now consider ungrazed plant data

sum(Root[Grazing=="Ungrazed"]);sum(Root[Grazing=="Ungrazed"]^2)
## [1] 121.058
## [1] 747.3387

#Let’s check data on fruits for grazed plant data

sum(Fruit[Grazing=="Grazed"]);sum(Fruit[Grazing=="Grazed"]^2)
## [1] 1358.81
## [1] 104156

#Similarly fruit data for ungrazed plant

sum(Fruit[Grazing=="Ungrazed"]);sum(Fruit[Grazing=="Ungrazed"]^2) 
## [1] 1017.61
## [1] 60772.11

#To get data for sum of products of root and fruit for grazed plant.

sum(Root[Grazing=="Grazed"]*Fruit[Grazing=="Grazed"]) 
## [1] 11753.64

#Similarly for ungrazed plant

sum(Root[Grazing=="Ungrazed"]*Fruit[Grazing=="Ungrazed"])
## [1] 6509.522

#Ancova in R using lm #Find the mean of fruit for grazed and ungrazed plant

tapply(Fruit,Grazing, mean)
##   Grazed Ungrazed 
##  67.9405  50.8805

#Let’s plot the graph

plot(Root,Fruit)

#Let’s do blank plot by type=n; #select grazed and ungrazed points for fruits and root;# Use plotting character = 16 for plotting the points

plot(Root,Fruit,type="n") 
points(Root[Grazing=="Ungrazed"],Fruit[Grazing=="Ungrazed"])
points(Root[Grazing=="Grazed"],Fruit[Grazing=="Grazed"],pch=16)

#To get different slopes and inercepts of both variables, use * asterisk mark .

ancova<-lm(Fruit~Grazing*Root) 

#Summarize the result

summary(ancova)
## 
## Call:
## lm(formula = Fruit ~ Grazing * Root)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.3177  -2.8320   0.1247   3.8511  17.1313 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -125.173     12.811  -9.771 1.15e-11 ***
## GrazingUngrazed        30.806     16.842   1.829   0.0757 .  
## Root                   23.240      1.531  15.182  < 2e-16 ***
## GrazingUngrazed:Root    0.756      2.354   0.321   0.7500    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.831 on 36 degrees of freedom
## Multiple R-squared:  0.9293, Adjusted R-squared:  0.9234 
## F-statistic: 157.6 on 3 and 36 DF,  p-value: < 2.2e-16

#ANOVA table for maximal model

anova(ancova) 
## Analysis of Variance Table
## 
## Response: Fruit
##              Df  Sum Sq Mean Sq  F value    Pr(>F)    
## Grazing       1  2910.4  2910.4  62.3795 2.262e-09 ***
## Root          1 19148.9 19148.9 410.4201 < 2.2e-16 ***
## Grazing:Root  1     4.8     4.8   0.1031      0.75    
## Residuals    36  1679.6    46.7                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#update the ancova model by removing the interaction between grazing and roots

ancova2<-update(ancova, ~ . - Grazing:Root)

#Compare ancova and ancova2 model

anova(ancova,ancova2) 
## Analysis of Variance Table
## 
## Model 1: Fruit ~ Grazing * Root
## Model 2: Fruit ~ Grazing + Root
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     36 1679.7                           
## 2     37 1684.5 -1   -4.8122 0.1031   0.75

#Update ancova2 model by removing Grazing from the model

ancova3<-update(ancova2, ~ . - Grazing) 

#Compare ancova2 and ancova3 model

anova(ancova2,ancova3)
## Analysis of Variance Table
## 
## Model 1: Fruit ~ Grazing + Root
## Model 2: Fruit ~ Root
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     37 1684.5                                  
## 2     38 6948.8 -1   -5264.4 115.63 6.107e-13 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Summary of ancova2 model

summary(ancova2) 
## 
## Call:
## lm(formula = Fruit ~ Grazing + Root)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.1920  -2.8224   0.3223   3.9144  17.3290 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -127.829      9.664  -13.23 1.35e-15 ***
## GrazingUngrazed   36.103      3.357   10.75 6.11e-13 ***
## Root              23.560      1.149   20.51  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.747 on 37 degrees of freedom
## Multiple R-squared:  0.9291, Adjusted R-squared:  0.9252 
## F-statistic: 242.3 on 2 and 37 DF,  p-value: < 2.2e-16

#ANOVA table for ancova2 model

anova(ancova2)
## Analysis of Variance Table
## 
## Response: Fruit
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## Grazing    1  2910.4  2910.4  63.929 1.397e-09 ***
## Root       1 19148.9 19148.9 420.616 < 2.2e-16 ***
## Residuals 37  1684.5    45.5                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Do model simplification using step directive called as automatic model-simplification directive

step(ancova)
## Start:  AIC=157.5
## Fruit ~ Grazing * Root
## 
##                Df Sum of Sq    RSS    AIC
## - Grazing:Root  1    4.8122 1684.5 155.61
## <none>                      1679.7 157.50
## 
## Step:  AIC=155.61
## Fruit ~ Grazing + Root
## 
##           Df Sum of Sq     RSS    AIC
## <none>                  1684.5 155.61
## - Grazing  1    5264.4  6948.8 210.30
## - Root     1   19148.9 20833.4 254.22
## 
## Call:
## lm(formula = Fruit ~ Grazing + Root)
## 
## Coefficients:
##     (Intercept)  GrazingUngrazed             Root  
##         -127.83            36.10            23.56

#Summary of ancova2

summary(ancova2)
## 
## Call:
## lm(formula = Fruit ~ Grazing + Root)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17.1920  -2.8224   0.3223   3.9144  17.3290 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -127.829      9.664  -13.23 1.35e-15 ***
## GrazingUngrazed   36.103      3.357   10.75 6.11e-13 ***
## Root              23.560      1.149   20.51  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.747 on 37 degrees of freedom
## Multiple R-squared:  0.9291, Adjusted R-squared:  0.9252 
## F-statistic: 242.3 on 2 and 37 DF,  p-value: < 2.2e-16

#Use abline directive to get the line; calling plot directive again

plot(Root,Fruit)
points(Root[Grazing=="Ungrazed"],Fruit[Grazing=="Ungrazed"])
points(Root[Grazing=="Grazed"],Fruit[Grazing=="Grazed"],pch=16)
abline(-127.829 , 23.56)
abline(-127.829+36.1032 , 23.56)

#A more complex Ancova: 2 factors and 1 continuous covariate

gain<-read.table("C:\\Users\\Sweta\\Documents\\Data\\Gain.txt",header=T)
attach(gain)
## The following object is masked from photoperiod:
## 
##     Genotype
names(gain) 
## [1] "Weight"   "Sex"      "Age"      "Genotype" "Score"

#We begin by considering all interaction

m1<-lm(Weight~Sex*Age*Genotype)

#summarize m1 model

summary(m1)
## 
## Call:
## lm(formula = Weight ~ Sex * Age * Genotype)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40218 -0.12043 -0.01065  0.12592  0.44687 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 7.80053    0.24941  31.276  < 2e-16 ***
## Sexmale                    -0.51966    0.35272  -1.473  0.14936    
## Age                         0.34950    0.07520   4.648 4.39e-05 ***
## GenotypeCloneB              1.19870    0.35272   3.398  0.00167 ** 
## GenotypeCloneC             -0.41751    0.35272  -1.184  0.24429    
## GenotypeCloneD              0.95600    0.35272   2.710  0.01023 *  
## GenotypeCloneE             -0.81604    0.35272  -2.314  0.02651 *  
## GenotypeCloneF              1.66851    0.35272   4.730 3.41e-05 ***
## Sexmale:Age                -0.11283    0.10635  -1.061  0.29579    
## Sexmale:GenotypeCloneB     -0.31716    0.49882  -0.636  0.52891    
## Sexmale:GenotypeCloneC     -1.06234    0.49882  -2.130  0.04010 *  
## Sexmale:GenotypeCloneD     -0.73547    0.49882  -1.474  0.14906    
## Sexmale:GenotypeCloneE     -0.28533    0.49882  -0.572  0.57087    
## Sexmale:GenotypeCloneF     -0.19839    0.49882  -0.398  0.69319    
## Age:GenotypeCloneB         -0.10146    0.10635  -0.954  0.34643    
## Age:GenotypeCloneC         -0.20825    0.10635  -1.958  0.05799 .  
## Age:GenotypeCloneD         -0.01757    0.10635  -0.165  0.86970    
## Age:GenotypeCloneE         -0.03825    0.10635  -0.360  0.72123    
## Age:GenotypeCloneF         -0.05512    0.10635  -0.518  0.60743    
## Sexmale:Age:GenotypeCloneB  0.15469    0.15040   1.029  0.31055    
## Sexmale:Age:GenotypeCloneC  0.35322    0.15040   2.349  0.02446 *  
## Sexmale:Age:GenotypeCloneD  0.19227    0.15040   1.278  0.20929    
## Sexmale:Age:GenotypeCloneE  0.13203    0.15040   0.878  0.38585    
## Sexmale:Age:GenotypeCloneF  0.08709    0.15040   0.579  0.56616    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2378 on 36 degrees of freedom
## Multiple R-squared:  0.9742, Adjusted R-squared:  0.9577 
## F-statistic: 59.06 on 23 and 36 DF,  p-value: < 2.2e-16

#Model simplication

step(m1)
## Start:  AIC=-155.01
## Weight ~ Sex * Age * Genotype
## 
##                    Df Sum of Sq    RSS     AIC
## - Sex:Age:Genotype  5   0.34912 2.3849 -155.51
## <none>                          2.0358 -155.01
## 
## Step:  AIC=-155.51
## Weight ~ Sex + Age + Genotype + Sex:Age + Sex:Genotype + Age:Genotype
## 
##                Df Sum of Sq    RSS     AIC
## - Sex:Genotype  5  0.146901 2.5318 -161.92
## - Age:Genotype  5  0.168136 2.5531 -161.42
## - Sex:Age       1  0.048937 2.4339 -156.29
## <none>                      2.3849 -155.51
## 
## Step:  AIC=-161.92
## Weight ~ Sex + Age + Genotype + Sex:Age + Age:Genotype
## 
##                Df Sum of Sq    RSS     AIC
## - Age:Genotype  5  0.168136 2.7000 -168.07
## - Sex:Age       1  0.048937 2.5808 -162.78
## <none>                      2.5318 -161.92
## 
## Step:  AIC=-168.07
## Weight ~ Sex + Age + Genotype + Sex:Age
## 
##            Df Sum of Sq    RSS      AIC
## - Sex:Age   1     0.049  2.749 -168.989
## <none>                   2.700 -168.066
## - Genotype  5    54.958 57.658    5.612
## 
## Step:  AIC=-168.99
## Weight ~ Sex + Age + Genotype
## 
##            Df Sum of Sq    RSS      AIC
## <none>                   2.749 -168.989
## - Sex       1    10.374 13.122  -77.201
## - Age       1    10.770 13.519  -75.415
## - Genotype  5    54.958 57.707    3.662
## 
## Call:
## lm(formula = Weight ~ Sex + Age + Genotype)
## 
## Coefficients:
##    (Intercept)         Sexmale             Age  GenotypeCloneB  GenotypeCloneC  
##         7.9370         -0.8316          0.2996          0.9678         -1.0436  
## GenotypeCloneD  GenotypeCloneE  GenotypeCloneF  
##         0.8240         -0.8754          1.5346

#Consider only variables (Don’t consider interaction between the variables)

m2<-aov(Weight~Sex+Age+Genotype)

#SUmmary of model m2

summary(m2)
##             Df Sum Sq Mean Sq F value Pr(>F)    
## Sex          1  10.37  10.374   196.2 <2e-16 ***
## Age          1  10.77  10.770   203.7 <2e-16 ***
## Genotype     5  54.96  10.992   207.9 <2e-16 ***
## Residuals   52   2.75   0.053                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#To get standard error and other parameters

summary.lm(m2)
## 
## Call:
## aov(formula = Weight ~ Sex + Age + Genotype)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40005 -0.15120 -0.01668  0.16953  0.49227 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     7.93701    0.10066  78.851  < 2e-16 ***
## Sexmale        -0.83161    0.05937 -14.008  < 2e-16 ***
## Age             0.29958    0.02099  14.273  < 2e-16 ***
## GenotypeCloneB  0.96778    0.10282   9.412 8.07e-13 ***
## GenotypeCloneC -1.04361    0.10282 -10.149 6.21e-14 ***
## GenotypeCloneD  0.82396    0.10282   8.013 1.21e-10 ***
## GenotypeCloneE -0.87540    0.10282  -8.514 1.98e-11 ***
## GenotypeCloneF  1.53460    0.10282  14.925  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2299 on 52 degrees of freedom
## Multiple R-squared:  0.9651, Adjusted R-squared:  0.9604 
## F-statistic: 205.7 on 7 and 52 DF,  p-value: < 2.2e-16

#Factor level reduction

newgen<-factor(1+(Genotype=="CloneB")+(Genotype=="CloneD")+ 2*(Genotype=="CloneC") + 2*(Genotype=="CloneE") + 3*(Genotype=="CloneF")) 
newgen
##  [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3 2 2 2 2 2 3 3 3 3 3 4 4 4 4 4 1 1 1 1 1 2 2 2
## [39] 2 2 3 3 3 3 3 2 2 2 2 2 3 3 3 3 3 4 4 4 4 4
## Levels: 1 2 3 4

#Use Sex, Age, newgen and define new model m3

m3<-lm(Weight~Sex+Age+newgen)

#Compare anova table for m2 and m3

anova(m2,m3) 
## Analysis of Variance Table
## 
## Model 1: Weight ~ Sex + Age + Genotype
## Model 2: Weight ~ Sex + Age + newgen
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     52 2.7489                           
## 2     54 2.9938 -2  -0.24489 2.3163 0.1087