#Name: Sweta Kumari #ANALYSIS OF COVARIANCE
#ANCOVA calculations
photoperiod<-read.table("C:\\Users\\Sweta\\Documents\\Data\\Photoperiod.txt",header=T)
attach(photoperiod)
names(photoperiod)
## [1] "Genotype" "Growth" "Photoperiod"
#Let say Photoperiod=x and Growth=y #Find the sum of x and sum of x square
sum(Photoperiod);sum(Photoperiod^2)
## [1] 360
## [1] 6240
#Find the sum of y and sum of y square
sum(Growth);sum(Growth^2)
## [1] 57
## [1] 175
#Find the sum of x*y
sum(Growth*Photoperiod)
## [1] 932
#Find the sum and sum of squares #Calculate the sum of growth according to Genotype
tapply(Growth,Genotype,sum)
## A B C D E F
## 12 18 6 6 8 7
#Calculate the sum of growth square according to Genotype
tapply(Growth^2,Genotype,sum)
## A B C D E F
## 38 86 10 10 16 15
#Calculate the sum of Photoperiod according to Genotype
tapply(Photoperiod,Genotype,sum)
## A B C D E F
## 60 60 60 60 60 60
#Calculate the sum of Photoperiod square according to Genotype
tapply(Photoperiod^2,Genotype,sum)
## A B C D E F
## 1040 1040 1040 1040 1040 1040
#Calculate the sum of growth*Photoperiod according to Genotype
tapply(Photoperiod*Growth,Genotype,sum)
## A B C D E F
## 196 296 96 100 120 124
#Analysis of Covariance in R #Use lm directive and capture the output in model object
options(contrasts=c("contr.treatment","contr.poly"))
model<-lm(Growth~Genotype*Photoperiod)
#Summarize the model
summary(model)
##
## Call:
## lm(formula = Growth ~ Genotype * Photoperiod)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5429 -0.1786 0.0000 0.1143 0.6286
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.28571 0.48865 2.631 0.02193 *
## GenotypeB 0.42857 0.69105 0.620 0.54674
## GenotypeC -0.42857 0.69105 -0.620 0.54674
## GenotypeD -0.85714 0.69105 -1.240 0.23855
## GenotypeE 0.71429 0.69105 1.034 0.32169
## GenotypeF -1.57143 0.69105 -2.274 0.04213 *
## Photoperiod 0.11429 0.03030 3.771 0.00267 **
## GenotypeB:Photoperiod 0.07143 0.04286 1.667 0.12145
## GenotypeC:Photoperiod -0.07143 0.04286 -1.667 0.12145
## GenotypeD:Photoperiod -0.04286 0.04286 -1.000 0.33705
## GenotypeE:Photoperiod -0.11429 0.04286 -2.667 0.02054 *
## GenotypeF:Photoperiod 0.02143 0.04286 0.500 0.62612
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3586 on 12 degrees of freedom
## Multiple R-squared: 0.9611, Adjusted R-squared: 0.9254
## F-statistic: 26.93 on 11 and 12 DF, p-value: 9.445e-07
#Use aov directive to get anova table for model
model<-aov(Growth~Genotype*Photoperiod)
#Summarize the model
summary(model)
## Df Sum Sq Mean Sq F value Pr(>F)
## Genotype 5 27.875 5.575 43.361 2.85e-07 ***
## Photoperiod 1 7.058 7.058 54.898 8.17e-06 ***
## Genotype:Photoperiod 5 3.149 0.630 4.898 0.0113 *
## Residuals 12 1.543 0.129
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#use summary.aov to get ANOVA table
summary.aov(model)
## Df Sum Sq Mean Sq F value Pr(>F)
## Genotype 5 27.875 5.575 43.361 2.85e-07 ***
## Photoperiod 1 7.058 7.058 54.898 8.17e-06 ***
## Genotype:Photoperiod 5 3.149 0.630 4.898 0.0113 *
## Residuals 12 1.543 0.129
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Use summary.lm to get the parameters and standard error.
summary.lm(model)
##
## Call:
## aov(formula = Growth ~ Genotype * Photoperiod)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5429 -0.1786 0.0000 0.1143 0.6286
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.28571 0.48865 2.631 0.02193 *
## GenotypeB 0.42857 0.69105 0.620 0.54674
## GenotypeC -0.42857 0.69105 -0.620 0.54674
## GenotypeD -0.85714 0.69105 -1.240 0.23855
## GenotypeE 0.71429 0.69105 1.034 0.32169
## GenotypeF -1.57143 0.69105 -2.274 0.04213 *
## Photoperiod 0.11429 0.03030 3.771 0.00267 **
## GenotypeB:Photoperiod 0.07143 0.04286 1.667 0.12145
## GenotypeC:Photoperiod -0.07143 0.04286 -1.667 0.12145
## GenotypeD:Photoperiod -0.04286 0.04286 -1.000 0.33705
## GenotypeE:Photoperiod -0.11429 0.04286 -2.667 0.02054 *
## GenotypeF:Photoperiod 0.02143 0.04286 0.500 0.62612
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3586 on 12 degrees of freedom
## Multiple R-squared: 0.9611, Adjusted R-squared: 0.9254
## F-statistic: 26.93 on 11 and 12 DF, p-value: 9.445e-07
#Ancova with different values of the covariates
ipomopsis<-read.csv("C:\\Users\\Sweta\\Documents\\Data\\ipomopsis.csv",header=T)
attach(ipomopsis)
names(ipomopsis)
## [1] "Root" "Fruit" "Grazing"
#Find sum of root and root^2
sum(Root);sum(Root^2)
## [1] 287.246
## [1] 2148.172
#Find sum of fruit and fruit^2
sum(Fruit);sum(Fruit^2)
## [1] 2376.42
## [1] 164928.1
#Find sum of root*fruit
sum(Root*Fruit)
## [1] 18263.16
#Let’s consider only grazed plant data
sum(Root[Grazing=="Grazed"]);sum(Root[Grazing=="Grazed"]^2)
## [1] 166.188
## [1] 1400.834
#Now consider ungrazed plant data
sum(Root[Grazing=="Ungrazed"]);sum(Root[Grazing=="Ungrazed"]^2)
## [1] 121.058
## [1] 747.3387
#Let’s check data on fruits for grazed plant data
sum(Fruit[Grazing=="Grazed"]);sum(Fruit[Grazing=="Grazed"]^2)
## [1] 1358.81
## [1] 104156
#Similarly fruit data for ungrazed plant
sum(Fruit[Grazing=="Ungrazed"]);sum(Fruit[Grazing=="Ungrazed"]^2)
## [1] 1017.61
## [1] 60772.11
#To get data for sum of products of root and fruit for grazed plant.
sum(Root[Grazing=="Grazed"]*Fruit[Grazing=="Grazed"])
## [1] 11753.64
#Similarly for ungrazed plant
sum(Root[Grazing=="Ungrazed"]*Fruit[Grazing=="Ungrazed"])
## [1] 6509.522
#Ancova in R using lm #Find the mean of fruit for grazed and ungrazed plant
tapply(Fruit,Grazing, mean)
## Grazed Ungrazed
## 67.9405 50.8805
#Let’s plot the graph
plot(Root,Fruit)
#Let’s do blank plot by type=n; #select grazed and ungrazed points for fruits and root;# Use plotting character = 16 for plotting the points
plot(Root,Fruit,type="n")
points(Root[Grazing=="Ungrazed"],Fruit[Grazing=="Ungrazed"])
points(Root[Grazing=="Grazed"],Fruit[Grazing=="Grazed"],pch=16)
#To get different slopes and inercepts of both variables, use * asterisk mark .
ancova<-lm(Fruit~Grazing*Root)
#Summarize the result
summary(ancova)
##
## Call:
## lm(formula = Fruit ~ Grazing * Root)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.3177 -2.8320 0.1247 3.8511 17.1313
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -125.173 12.811 -9.771 1.15e-11 ***
## GrazingUngrazed 30.806 16.842 1.829 0.0757 .
## Root 23.240 1.531 15.182 < 2e-16 ***
## GrazingUngrazed:Root 0.756 2.354 0.321 0.7500
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.831 on 36 degrees of freedom
## Multiple R-squared: 0.9293, Adjusted R-squared: 0.9234
## F-statistic: 157.6 on 3 and 36 DF, p-value: < 2.2e-16
#ANOVA table for maximal model
anova(ancova)
## Analysis of Variance Table
##
## Response: Fruit
## Df Sum Sq Mean Sq F value Pr(>F)
## Grazing 1 2910.4 2910.4 62.3795 2.262e-09 ***
## Root 1 19148.9 19148.9 410.4201 < 2.2e-16 ***
## Grazing:Root 1 4.8 4.8 0.1031 0.75
## Residuals 36 1679.6 46.7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#update the ancova model by removing the interaction between grazing and roots
ancova2<-update(ancova, ~ . - Grazing:Root)
#Compare ancova and ancova2 model
anova(ancova,ancova2)
## Analysis of Variance Table
##
## Model 1: Fruit ~ Grazing * Root
## Model 2: Fruit ~ Grazing + Root
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 36 1679.7
## 2 37 1684.5 -1 -4.8122 0.1031 0.75
#Update ancova2 model by removing Grazing from the model
ancova3<-update(ancova2, ~ . - Grazing)
#Compare ancova2 and ancova3 model
anova(ancova2,ancova3)
## Analysis of Variance Table
##
## Model 1: Fruit ~ Grazing + Root
## Model 2: Fruit ~ Root
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 37 1684.5
## 2 38 6948.8 -1 -5264.4 115.63 6.107e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Summary of ancova2 model
summary(ancova2)
##
## Call:
## lm(formula = Fruit ~ Grazing + Root)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.1920 -2.8224 0.3223 3.9144 17.3290
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -127.829 9.664 -13.23 1.35e-15 ***
## GrazingUngrazed 36.103 3.357 10.75 6.11e-13 ***
## Root 23.560 1.149 20.51 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.747 on 37 degrees of freedom
## Multiple R-squared: 0.9291, Adjusted R-squared: 0.9252
## F-statistic: 242.3 on 2 and 37 DF, p-value: < 2.2e-16
#ANOVA table for ancova2 model
anova(ancova2)
## Analysis of Variance Table
##
## Response: Fruit
## Df Sum Sq Mean Sq F value Pr(>F)
## Grazing 1 2910.4 2910.4 63.929 1.397e-09 ***
## Root 1 19148.9 19148.9 420.616 < 2.2e-16 ***
## Residuals 37 1684.5 45.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Do model simplification using step directive called as automatic model-simplification directive
step(ancova)
## Start: AIC=157.5
## Fruit ~ Grazing * Root
##
## Df Sum of Sq RSS AIC
## - Grazing:Root 1 4.8122 1684.5 155.61
## <none> 1679.7 157.50
##
## Step: AIC=155.61
## Fruit ~ Grazing + Root
##
## Df Sum of Sq RSS AIC
## <none> 1684.5 155.61
## - Grazing 1 5264.4 6948.8 210.30
## - Root 1 19148.9 20833.4 254.22
##
## Call:
## lm(formula = Fruit ~ Grazing + Root)
##
## Coefficients:
## (Intercept) GrazingUngrazed Root
## -127.83 36.10 23.56
#Summary of ancova2
summary(ancova2)
##
## Call:
## lm(formula = Fruit ~ Grazing + Root)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.1920 -2.8224 0.3223 3.9144 17.3290
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -127.829 9.664 -13.23 1.35e-15 ***
## GrazingUngrazed 36.103 3.357 10.75 6.11e-13 ***
## Root 23.560 1.149 20.51 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.747 on 37 degrees of freedom
## Multiple R-squared: 0.9291, Adjusted R-squared: 0.9252
## F-statistic: 242.3 on 2 and 37 DF, p-value: < 2.2e-16
#Use abline directive to get the line; calling plot directive again
plot(Root,Fruit)
points(Root[Grazing=="Ungrazed"],Fruit[Grazing=="Ungrazed"])
points(Root[Grazing=="Grazed"],Fruit[Grazing=="Grazed"],pch=16)
abline(-127.829 , 23.56)
abline(-127.829+36.1032 , 23.56)
#A more complex Ancova: 2 factors and 1 continuous covariate
gain<-read.table("C:\\Users\\Sweta\\Documents\\Data\\Gain.txt",header=T)
attach(gain)
## The following object is masked from photoperiod:
##
## Genotype
names(gain)
## [1] "Weight" "Sex" "Age" "Genotype" "Score"
#We begin by considering all interaction
m1<-lm(Weight~Sex*Age*Genotype)
#summarize m1 model
summary(m1)
##
## Call:
## lm(formula = Weight ~ Sex * Age * Genotype)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40218 -0.12043 -0.01065 0.12592 0.44687
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.80053 0.24941 31.276 < 2e-16 ***
## Sexmale -0.51966 0.35272 -1.473 0.14936
## Age 0.34950 0.07520 4.648 4.39e-05 ***
## GenotypeCloneB 1.19870 0.35272 3.398 0.00167 **
## GenotypeCloneC -0.41751 0.35272 -1.184 0.24429
## GenotypeCloneD 0.95600 0.35272 2.710 0.01023 *
## GenotypeCloneE -0.81604 0.35272 -2.314 0.02651 *
## GenotypeCloneF 1.66851 0.35272 4.730 3.41e-05 ***
## Sexmale:Age -0.11283 0.10635 -1.061 0.29579
## Sexmale:GenotypeCloneB -0.31716 0.49882 -0.636 0.52891
## Sexmale:GenotypeCloneC -1.06234 0.49882 -2.130 0.04010 *
## Sexmale:GenotypeCloneD -0.73547 0.49882 -1.474 0.14906
## Sexmale:GenotypeCloneE -0.28533 0.49882 -0.572 0.57087
## Sexmale:GenotypeCloneF -0.19839 0.49882 -0.398 0.69319
## Age:GenotypeCloneB -0.10146 0.10635 -0.954 0.34643
## Age:GenotypeCloneC -0.20825 0.10635 -1.958 0.05799 .
## Age:GenotypeCloneD -0.01757 0.10635 -0.165 0.86970
## Age:GenotypeCloneE -0.03825 0.10635 -0.360 0.72123
## Age:GenotypeCloneF -0.05512 0.10635 -0.518 0.60743
## Sexmale:Age:GenotypeCloneB 0.15469 0.15040 1.029 0.31055
## Sexmale:Age:GenotypeCloneC 0.35322 0.15040 2.349 0.02446 *
## Sexmale:Age:GenotypeCloneD 0.19227 0.15040 1.278 0.20929
## Sexmale:Age:GenotypeCloneE 0.13203 0.15040 0.878 0.38585
## Sexmale:Age:GenotypeCloneF 0.08709 0.15040 0.579 0.56616
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2378 on 36 degrees of freedom
## Multiple R-squared: 0.9742, Adjusted R-squared: 0.9577
## F-statistic: 59.06 on 23 and 36 DF, p-value: < 2.2e-16
#Model simplication
step(m1)
## Start: AIC=-155.01
## Weight ~ Sex * Age * Genotype
##
## Df Sum of Sq RSS AIC
## - Sex:Age:Genotype 5 0.34912 2.3849 -155.51
## <none> 2.0358 -155.01
##
## Step: AIC=-155.51
## Weight ~ Sex + Age + Genotype + Sex:Age + Sex:Genotype + Age:Genotype
##
## Df Sum of Sq RSS AIC
## - Sex:Genotype 5 0.146901 2.5318 -161.92
## - Age:Genotype 5 0.168136 2.5531 -161.42
## - Sex:Age 1 0.048937 2.4339 -156.29
## <none> 2.3849 -155.51
##
## Step: AIC=-161.92
## Weight ~ Sex + Age + Genotype + Sex:Age + Age:Genotype
##
## Df Sum of Sq RSS AIC
## - Age:Genotype 5 0.168136 2.7000 -168.07
## - Sex:Age 1 0.048937 2.5808 -162.78
## <none> 2.5318 -161.92
##
## Step: AIC=-168.07
## Weight ~ Sex + Age + Genotype + Sex:Age
##
## Df Sum of Sq RSS AIC
## - Sex:Age 1 0.049 2.749 -168.989
## <none> 2.700 -168.066
## - Genotype 5 54.958 57.658 5.612
##
## Step: AIC=-168.99
## Weight ~ Sex + Age + Genotype
##
## Df Sum of Sq RSS AIC
## <none> 2.749 -168.989
## - Sex 1 10.374 13.122 -77.201
## - Age 1 10.770 13.519 -75.415
## - Genotype 5 54.958 57.707 3.662
##
## Call:
## lm(formula = Weight ~ Sex + Age + Genotype)
##
## Coefficients:
## (Intercept) Sexmale Age GenotypeCloneB GenotypeCloneC
## 7.9370 -0.8316 0.2996 0.9678 -1.0436
## GenotypeCloneD GenotypeCloneE GenotypeCloneF
## 0.8240 -0.8754 1.5346
#Consider only variables (Don’t consider interaction between the variables)
m2<-aov(Weight~Sex+Age+Genotype)
#SUmmary of model m2
summary(m2)
## Df Sum Sq Mean Sq F value Pr(>F)
## Sex 1 10.37 10.374 196.2 <2e-16 ***
## Age 1 10.77 10.770 203.7 <2e-16 ***
## Genotype 5 54.96 10.992 207.9 <2e-16 ***
## Residuals 52 2.75 0.053
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#To get standard error and other parameters
summary.lm(m2)
##
## Call:
## aov(formula = Weight ~ Sex + Age + Genotype)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40005 -0.15120 -0.01668 0.16953 0.49227
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.93701 0.10066 78.851 < 2e-16 ***
## Sexmale -0.83161 0.05937 -14.008 < 2e-16 ***
## Age 0.29958 0.02099 14.273 < 2e-16 ***
## GenotypeCloneB 0.96778 0.10282 9.412 8.07e-13 ***
## GenotypeCloneC -1.04361 0.10282 -10.149 6.21e-14 ***
## GenotypeCloneD 0.82396 0.10282 8.013 1.21e-10 ***
## GenotypeCloneE -0.87540 0.10282 -8.514 1.98e-11 ***
## GenotypeCloneF 1.53460 0.10282 14.925 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2299 on 52 degrees of freedom
## Multiple R-squared: 0.9651, Adjusted R-squared: 0.9604
## F-statistic: 205.7 on 7 and 52 DF, p-value: < 2.2e-16
#Factor level reduction
newgen<-factor(1+(Genotype=="CloneB")+(Genotype=="CloneD")+ 2*(Genotype=="CloneC") + 2*(Genotype=="CloneE") + 3*(Genotype=="CloneF"))
newgen
## [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3 2 2 2 2 2 3 3 3 3 3 4 4 4 4 4 1 1 1 1 1 2 2 2
## [39] 2 2 3 3 3 3 3 2 2 2 2 2 3 3 3 3 3 4 4 4 4 4
## Levels: 1 2 3 4
#Use Sex, Age, newgen and define new model m3
m3<-lm(Weight~Sex+Age+newgen)
#Compare anova table for m2 and m3
anova(m2,m3)
## Analysis of Variance Table
##
## Model 1: Weight ~ Sex + Age + Genotype
## Model 2: Weight ~ Sex + Age + newgen
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 52 2.7489
## 2 54 2.9938 -2 -0.24489 2.3163 0.1087