ANOVA (1) when assumptions are met

1. Data

str(dat)

## 'data.frame':    167 obs. of  2 variables:
##  $ Group: Factor w/ 4 levels "A","B","C","D": 2 3 2 2 3 1 1 1 3 4 ...
##  $ X    : num  60.8 94.8 70.8 55.9 54.1 63.1 53.6 67.1 53.6 74.5 ...

par(mfrow=c(2,2))
qqnorm(datA$X,main="A") ;  qqline(datA$X,col="red")
qqnorm(datB$X,main="B") ;  qqline(datB$X,col="red")
qqnorm(datC$X,main="C") ;  qqline(datC$X,col="red")
qqnorm(datD$X,main="D") ;  qqline(datD$X,col="red")

p1=ggplot(dat,aes(x=X,fill=Group,alpha=0.3))+geom_density()
p2=ggplot(dat,aes(x=Group,y=X, fill=Group))+geom_boxplot()
grid.arrange(p1,p2,nrow=1)

library(dplyr)
dat %>% group_by(Group) %>% summarise_each(funs(n=length,min, mean,median,max,sd),X)

## # A tibble: 4 × 7
##    Group     n   min     mean median   max        sd
##   <fctr> <int> <dbl>    <dbl>  <dbl> <dbl>     <dbl>
## 1      A    48  39.8 58.93750   59.0  85.9  9.921921
## 2      B    55  41.4 64.00727   62.2  92.4 12.013308
## 3      C    43  48.1 67.13721   64.1  94.8 11.401901
## 4      D    21  50.4 67.41905   69.5  83.3  9.828154

2. Assumption check (Levene’s test)

Assumption for ANOVA

Normal distribution (qqnorm, skew,kurtoris, Shapiro-Wilk test…)
Data variance are equal (similar) across groups (Levene’s test): Yes if test is non-significant / No if test is significant –> Welch’s F test
Data are independent

library(car)
leveneTest(dat$X, dat$Group, center = median)

## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group   3  0.9085 0.4383
##       163

Levene’s test is not-significant–> data variance is equal across groups (assumption is met)

2. Main effect

model0=lm(X~1,data=dat)       #Model without predictor
model1=lm(X~Group,data=dat)   # Model with predictor as "Group"

anova(model0,model1)          #Compare model0 and model1

## Analysis of Variance Table
## 
## Model 1: X ~ 1
## Model 2: X ~ Group
##   Res.Df   RSS Df Sum of Sq      F   Pr(>F)   
## 1    166 21703                                
## 2    163 19812  3    1891.2 5.1864 0.001899 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(model1)                 #Compare model0 and model1

## Analysis of Variance Table
## 
## Response: X
##            Df  Sum Sq Mean Sq F value   Pr(>F)   
## Group       3  1891.2  630.39  5.1864 0.001899 **
## Residuals 163 19812.1  121.55                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(aov(X~Group,data=dat))   #Compare model0 and model1

##              Df Sum Sq Mean Sq F value Pr(>F)   
## Group         3   1891   630.4   5.186 0.0019 **
## Residuals   163  19812   121.5                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

There is a difference in X across groups (p=0.002)

summary(model1)

## 
## Call:
## lm(formula = X ~ Group, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.607  -9.072  -1.007   7.278  28.393 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   58.938      1.591  37.037  < 2e-16 ***
## GroupB         5.070      2.178   2.328 0.021136 *  
## GroupC         8.200      2.315   3.542 0.000518 ***
## GroupD         8.482      2.884   2.940 0.003755 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.02 on 163 degrees of freedom
## Multiple R-squared:  0.08714,    Adjusted R-squared:  0.07034 
## F-statistic: 5.186 on 3 and 163 DF,  p-value: 0.001899

There is a difference in X across groups (p=0.002)

list=c("X")
CreateContTable(list,strata = "Group",data=dat)

##                Stratified by Group
##                 A            B             C             D           
##   n             48           55            43            21          
##   X (mean (sd)) 58.94 (9.92) 64.01 (12.01) 67.14 (11.40) 67.42 (9.83)
##                Stratified by Group
##                 p      test
##   n                        
##   X (mean (sd))  0.002

There is a difference in X across groups (p=0.002)

2.3 Contrast

1st time

contrast1=c(-3,1,1,1)     #A vs (B-C-D)
contrast2=c(-1,-1,1,1)    #A-B vs C-D
contrast3=c(-1,-1,2,0)    #A-B vs C
contrasts(dat$Group)=cbind(contrast1,contrast2,contrast3)
model1=lm(X~Group,data=dat)
summary(model1)

## 
## Call:
## lm(formula = X ~ Group, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.607  -9.072  -1.007   7.278  28.393 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     64.3753     0.9137  70.458   <2e-16 ***
## Groupcontrast1   1.2674     0.5444   2.328   0.0211 *  
## Groupcontrast2   1.7763     1.9962   0.890   0.3749    
## Groupcontrast3  -0.1409     1.4675  -0.096   0.9236    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.02 on 163 degrees of freedom
## Multiple R-squared:  0.08714,    Adjusted R-squared:  0.07034 
## F-statistic: 5.186 on 3 and 163 DF,  p-value: 0.001899

Result: only A is lower significant

2nd time

contrast1=c(-1,1,0,0)     #A vs B
contrast2=c(-1,-1,0,2)    #A-B vs D
contrast3=c(-1,-1,-1,3)    #A-B-C vs D
contrasts(dat$Group)=cbind(contrast1,contrast2,contrast3)
model1=lm(X~Group,data=dat)
summary(model1)

## 
## Call:
## lm(formula = X ~ Group, data = dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.607  -9.072  -1.007   7.278  28.393 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     64.3753     0.9137  70.458  < 2e-16 ***
## Groupcontrast1   2.5349     1.0888   2.328  0.02114 *  
## Groupcontrast2   5.6648     2.0031   2.828  0.00527 ** 
## Groupcontrast3  -2.7620     1.4994  -1.842  0.06728 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.02 on 163 degrees of freedom
## Multiple R-squared:  0.08714,    Adjusted R-squared:  0.07034 
## F-statistic: 5.186 on 3 and 163 DF,  p-value: 0.001899

Result: only A is lower significant

2.4 Post-hoc test

Tukey HSD

model1=lm(X~Group,data=dat)
TukeyHSD(aov(model1))

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = model1)
## 
## $Group
##          diff        lwr       upr     p adj
## B-A 5.0697727 -0.5828277 10.722373 0.0958975
## C-A 8.1997093  2.1907746 14.208644 0.0028864
## D-A 8.4815476  0.9942359 15.968859 0.0194620
## C-B 3.1299366 -2.6955084  8.955382 0.5045073
## D-B 3.4117749 -3.9290936 10.752643 0.6236776
## D-C 0.2818383 -7.3368069  7.900484 0.9996816

Bonferroni

pairwise.t.test(dat$X,dat$Group,paired=FALSE,p.adjust.methods="bonferroni")

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  dat$X and dat$Group 
## 
##   A      B      C     
## B 0.0845 -      -     
## C 0.0031 0.4951 -     
## D 0.0188 0.4951 0.9236
## 
## P value adjustment method: holm

Note: TukeyHSD and Bonferroni post-hoc test provide similar results that A < C-D

Contrast method seems to be bias

3. Effect size

etaSquare (= Rsquared)
Omega squared (see Power analysis)

4. Residual check

Ref: https://rpubs.com/therimalaya/43190

ANOVA (1) when assumptions are met

bigcat

March 20, 2017

1. Data

2. Assumption check (Levene’s test)

2. Main effect

2.3 Contrast

1st time

2nd time

2.4 Post-hoc test

Tukey HSD

Bonferroni

3. Effect size

4. Residual check