Complete between subject ANCOVA Tutorial

For Any advice of suggestion plz feel free to Contact me : vet.m.mohamed@gmail.com

Find the Complete Tutorial and data sets here

Note: running the test although vilating some normality here since this analysis for tutorial only

loading the libraries

library(knitr)
library(tidyverse)
library(car)
library(effects)
library(multcomp)
library(MOTE)

loading the data

data<-read.csv("./data sets/bn ancova.csv")

head(data)

##   ï..subjno attdrug phyheal menheal psydrug    emplmnt      religion
## 1         1       8       5       8       0 UNEMPLOYED    PROTESTANT
## 2         2       7       4       6       0   EMPLOYED    PROTESTANT
## 3         3       8       3       4       2   EMPLOYED      CATHOLIC
## 4         4       9       2       2       3 UNEMPLOYED NONE OR OTHER
## 5         5       7       3       6      15 UNEMPLOYED        JEWISH
## 6         6       8       5       5       3   EMPLOYED      CATHOLIC

selecting the variables

data<-data[,c("ï..subjno","attdrug","phyheal","emplmnt","religion")]

head(data)

##   ï..subjno attdrug phyheal    emplmnt      religion
## 1         1       8       5 UNEMPLOYED    PROTESTANT
## 2         2       7       4   EMPLOYED    PROTESTANT
## 3         3       8       3   EMPLOYED      CATHOLIC
## 4         4       9       2 UNEMPLOYED NONE OR OTHER
## 5         5       7       3 UNEMPLOYED        JEWISH
## 6         6       8       5   EMPLOYED      CATHOLIC

the assumptions includes : Accuracy missing outliers aditivity Normality Linearity Homogeniety + homoscdasticity

accuracy

summary(data) #we have a problem in levels of religion

##    ï..subjno        attdrug          phyheal             emplmnt   
##  Min.   :  1.0   Min.   : 5.000   Min.   : 2.000   EMPLOYED  :246  
##  1st Qu.:137.0   1st Qu.: 7.000   1st Qu.: 3.000   UNEMPLOYED:219  
##  Median :314.0   Median : 8.000   Median : 5.000                   
##  Mean   :317.4   Mean   : 7.686   Mean   : 4.972                   
##  3rd Qu.:483.0   3rd Qu.: 9.000   3rd Qu.: 6.000                   
##  Max.   :758.0   Max.   :10.000   Max.   :15.000                   
##           religion  
##               :  3  
##  CATHOLIC     :119  
##  JEWISH       : 92  
##  NONE OR OTHER: 76  
##  PROTESTANT   :175  
##

data<-data%>%mutate(religion=factor(religion,levels = c("CATHOLIC",
                                                        "JEWISH",
                                                        "NONE OR OTHER",
                                                        "PROTESTANT"),labels = c("CATHOLIC",
                                                                                 "JEWISH",
                                                                                 "NONE",
                                                                                 "PROTESTANT")))
summary(data)

##    ï..subjno        attdrug          phyheal             emplmnt   
##  Min.   :  1.0   Min.   : 5.000   Min.   : 2.000   EMPLOYED  :246  
##  1st Qu.:137.0   1st Qu.: 7.000   1st Qu.: 3.000   UNEMPLOYED:219  
##  Median :314.0   Median : 8.000   Median : 5.000                   
##  Mean   :317.4   Mean   : 7.686   Mean   : 4.972                   
##  3rd Qu.:483.0   3rd Qu.: 9.000   3rd Qu.: 6.000                   
##  Max.   :758.0   Max.   :10.000   Max.   :15.000                   
##        religion  
##  CATHOLIC  :119  
##  JEWISH    : 92  
##  NONE      : 76  
##  PROTESTANT:175  
##  NA's      :  3  
##

missing –> we have only 3 cases in religion

outliers – > mahalanobis of CV and DV

mah<-mahalanobis(x = data[,c("attdrug","phyheal")],center = colMeans(data[,c("attdrug","phyheal")]),cov = cov(data[,c("attdrug","phyheal")]))
cut<-qchisq(p = .99,df = 2)

data<-data[which(mah<cut),]

sditivity

(cor(data$attdrug,data$phyheal))

## [1] 0.1494295

normality

chi<-rchisq(nrow(data),df = 7)

fake<-lm(chi~data$attdrug+data$phyheal)

stdresid<-rstudent(fake)
stdfit<-scale(fitted(fake))

plot(stdresid,stdfit)%>%abline(v = 0)%>%abline(h= 0)

we might face a problem in homogeniety but the homoscdacity is great

qqPlot(fake)  #the linearity is not so bad

## [1]  34 265

hist(stdresid)

homogeneity

leveneTest(data$attdrug~data$emplmnt*data$religion)  #the homogeneity is good

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   7   1.152 0.3294
##       446

now lets start in ANCOVA

anc<-data%>%with(lm(attdrug~emplmnt*religion+phyheal,data = data))

summary.aov(anc)

##                   Df Sum Sq Mean Sq F value  Pr(>F)   
## emplmnt            1    2.3   2.307   1.813 0.17888   
## religion           3    7.0   2.345   1.842 0.13872   
## phyheal            1   13.9  13.943  10.954 0.00101 **
## emplmnt:religion   3    9.5   3.155   2.478 0.06067 . 
## Residuals        445  566.5   1.273                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 3 observations deleted due to missingness

here we see that the Covariance effect of physical health is significant and this is an indicator to the adjust effect of physical health on the attitude another thing is the significance effect of the interaction which mean that the significant effect of employment group is dependent on the significance level of the religion group

main effect of every effect

physical health SS Variable/(SS residual + SS Variable)

SSR<-566.5
13.9/(SSR+13.9)

## [1] 0.023949

#Employment

2.3/(SSR+2.3)

## [1] 0.004043601

#religion
7/(SSR+7)

## [1] 0.01220575

#interaction
9.5/(SSR+9.5)

## [1] 0.01649306

getting the adjusted mean using effects library

effect("emplmnt",anc) #adjusted mean fot Employment

## NOTE: emplmnt is not a high-order term in the model

## 
##  emplmnt effect
## emplmnt
##   EMPLOYED UNEMPLOYED 
##   7.629864   7.737740

effect("religion",anc)#adjusted mean fot religion

## NOTE: religion is not a high-order term in the model

## 
##  religion effect
## religion
##   CATHOLIC     JEWISH       NONE PROTESTANT 
##   7.857499   7.679772   7.406089   7.680639

effect("emplmnt*religion",anc) #adjusted mean fot interaction

## 
##  emplmnt*religion effect
##             religion
## emplmnt      CATHOLIC   JEWISH     NONE PROTESTANT
##   EMPLOYED   7.706441 7.612309 7.657600   7.573605
##   UNEMPLOYED 8.028414 7.756103 7.121515   7.801744

now we will run post hoc test for interaction and to do it you have to take care of adjusted means so the pairwise t test is not appropite we will use tukyHSD using multcomp package

then we have to separate the data sets of each condition and run ancova for each part to do paireise comparison

set1<-data%>%filter(religion=="CATHOLIC")
set2<-data%>%filter(religion=="JEWISH")
set3<-data%>%filter(religion=="PROTESTANT")
set4<-data%>%filter(religion=="NONE")

fit1<-set1%>%with(lm(attdrug~phyheal+emplmnt))
fit2<-set2%>%with(lm(attdrug~phyheal+emplmnt))
fit3<-set3%>%with(lm(attdrug~phyheal+emplmnt))
fit4<-set4%>%with(lm(attdrug~phyheal+emplmnt))

Now lets use TukeyHSD for multiple comparison after adjusting for covariate using (general linear hypothesis testing package glht())

glht(fit1,linfct=mcp(emplmnt="Tukey"))%>%summary

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lm(formula = attdrug ~ phyheal + emplmnt)
## 
## Linear Hypotheses:
##                            Estimate Std. Error t value Pr(>|t|)  
## UNEMPLOYED - EMPLOYED == 0   0.3257     0.1918   1.698   0.0921 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

glht(fit2,linfct=mcp(emplmnt="Tukey"))%>%summary

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lm(formula = attdrug ~ phyheal + emplmnt)
## 
## Linear Hypotheses:
##                            Estimate Std. Error t value Pr(>|t|)
## UNEMPLOYED - EMPLOYED == 0   0.1428     0.2452   0.582    0.562
## (Adjusted p values reported -- single-step method)

glht(fit3,linfct=mcp(emplmnt="Tukey"))%>%summary

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lm(formula = attdrug ~ phyheal + emplmnt)
## 
## Linear Hypotheses:
##                            Estimate Std. Error t value Pr(>|t|)
## UNEMPLOYED - EMPLOYED == 0   0.2243     0.1702   1.318    0.189
## (Adjusted p values reported -- single-step method)

glht(fit4,linfct=mcp(emplmnt="Tukey"))%>%summary

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: lm(formula = attdrug ~ phyheal + emplmnt)
## 
## Linear Hypotheses:
##                            Estimate Std. Error t value Pr(>|t|)  
## UNEMPLOYED - EMPLOYED == 0  -0.5513     0.3065  -1.799   0.0762 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Now lets look at the effect size of multiple comparisons of the interaction using MOTE package

data%>%group_by(emplmnt,religion)%>%summarize(sd=sd(attdrug),
                                              Num=n())

## # A tibble: 10 x 4
## # Groups:   emplmnt [?]
##    emplmnt    religion        sd   Num
##    <fct>      <fct>        <dbl> <int>
##  1 EMPLOYED   CATHOLIC     0.988    62
##  2 EMPLOYED   JEWISH       1.11     44
##  3 EMPLOYED   NONE         1.35     46
##  4 EMPLOYED   PROTESTANT   1.04     89
##  5 EMPLOYED   <NA>       NaN         1
##  6 UNEMPLOYED CATHOLIC     1.09     56
##  7 UNEMPLOYED JEWISH       1.21     46
##  8 UNEMPLOYED NONE         1.18     30
##  9 UNEMPLOYED PROTESTANT   1.20     81
## 10 UNEMPLOYED <NA>         0.707     2

we will depend on the mean estimated from effect package wich adjusted for Covariance

d.ind.t(m1 = 8.04, sd1 = 1.09, n1 = 56,
        m2 = 7.71, sd2 =  .99, n2 = 62,
        a = .05)$d #catholic Employed Vs Catholic unemployed

## [1] 0.3177309

d.ind.t(m1 = 7.78, sd1 = 1.21, n1 = 47,
        m2 = 7.61, sd2 = 1.11, n2 = 44,
        a = .05)$d #jwish Employed Vs jwish unemployed

## [1] 0.1462039

d.ind.t(m1 = 7.82, sd1 = 1.19, n1 = 83,
        m2 = 7.50, sd2 =  1.08, n2 = 92,
        a = .05)$d #protestant Employed Vs protestant unemployed

## [1] 0.2823188

d.ind.t(m1 = 7.12, sd1 = 1.18, n1 = 30,
        m2 = 7.67, sd2 =  1.35, n2 = 46,
        a = .05)$d #none Employed Vs none unemployed

## [1] -0.4276632

Complete between subject ANCOVA Tutorial

Mo’men Mohamed

April 4, 2019