Moths Trap Experiment

Sameer Mathur

Two-way ANOVA

---

READING AND DESCRIBING DATA

Reading Data

# reading data into R
mothTrap.df <- read.csv(paste("moth-trap-experiment.csv"))
# attaching data columns of the dataframe
attach(mothTrap.df)
# dimension of the dataframe
dim(mothTrap.df)
[1] 60  3

Number of Moths Trapped by Location Type and Type of Lure

# number of moths trapped by location type and type of lure
addmargins(table(location, type.of.lure))
        type.of.lure
location Chemical Scent Sugar Sum
  Ground        5     5     5  15
  Lower         5     5     5  15
  Middle        5     5     5  15
  Top           5     5     5  15
  Sum          20    20    20  60

Descriptive Statistics by Location Type

# descriptive statistics of  number of moths by location type
library(data.table)
dt <- data.table(mothTrap.df)
dt[, list(Count = .N,
        mean = round(mean(number.of.moths), 3),
        sd = round(mean(number.of.moths), 3),
        median = round(median(number.of.moths), 3),
        min = min(number.of.moths),
        max = max(number.of.moths)),
   by = list(location)]
   location Count   mean     sd median min max
1:      Top    15 23.333 23.333     21  13  35
2:   Middle    15 31.000 31.000     36  12  44
3:    Lower    15 33.333 33.333     34  17  44
4:   Ground    15 19.067 19.067     18  12  29

Descriptive Statistics by Type of Lure

# descriptive statistics of  number of moths by type of lure
library(data.table)
dt <- data.table(mothTrap.df)
dt[, list(Count = .N,
        mean = round(mean(number.of.moths), 3),
        sd = round(mean(number.of.moths), 3),
        median = round(median(number.of.moths), 3),
        min = min(number.of.moths),
        max = max(number.of.moths)),
   by = list(type.of.lure)]
   type.of.lure Count  mean    sd median min max
1:     Chemical    20 27.50 27.50   28.5  14  41
2:        Sugar    20 27.80 27.80   28.0  15  44
3:        Scent    20 24.75 24.75   22.0  12  44

Boxplot of Number of Moths Trapped by Location Type

# box plot of location type
boxplot(number.of.moths ~ location, data = mothTrap.df,
        main = "Boxplot of Location Type",
        xlab = "Location Type", ylab = "Number of Moths Trapaped")

plot of chunk unnamed-chunk-6

Boxplot of Number of Moths Trapped by Type of Lure

# box plot of type of lure
boxplot(number.of.moths ~ type.of.lure, data = mothTrap.df,
        main = "Boxplot of Type of Lure",
        xlab = "Type of Lure", ylab = "Number of Moths Trapaped")

plot of chunk unnamed-chunk-8

Boxplot of Number of Moths Trapped by Location Type and Type of Lure

# box plot of location type and type of lure
library(lattice)
bwplot(number.of.moths ~ location | type.of.lure, data = mothTrap.df,
        main = "Boxplot of Location Type and Type of Lure",
        ylab = "Number of Moths Trapaped",
        col = "black")

plot of chunk unnamed-chunk-10

Mean Plot of Number of Moths Trapped by Location Type

# mean plot by location type
library(gplots)
plotmeans(number.of.moths ~ location, data = mothTrap.df,
          xlab = "Location Type", ylab = "Number of Moths Trapped",
          digits=2, col = "black", ccol = "blue", barwidth = 2,
          legends = TRUE, mean.labels = TRUE, frame = TRUE)

plot of chunk unnamed-chunk-12

Mean Plot of Number of Moths Trapped by Type of Lure

# mean plot by type of lure
library(gplots)
plotmeans(number.of.moths ~ type.of.lure, data = mothTrap.df,
          xlab = "Type of Lure", ylab = "Number of Moths Trapped",
          digits=2, col = "black", ccol = "blue", barwidth = 2,
          legends = TRUE, mean.labels = TRUE, frame = TRUE)

plot of chunk unnamed-chunk-14

Interaction Plot of Location Type and Type of Lure

# interaction plot of location type and type of lure
interaction.plot(location, type.of.lure, number.of.moths,
                 type = "b", col = c(1:3), leg.bty = "o",
                 leg.bg = "beige", lwd = 2, pch = c(18, 24, 22),
                 xlab = "Location Type", ylab = "Log of Number of Moths Trapped",
                 main = "Interaction Plot of Location Type and Type of Lure")

plot of chunk unnamed-chunk-16

Two-way ANOVA

# two-way ANOVA
twoWayfit <- aov(number.of.moths ~ location * type.of.lure, data = mothTrap.df)
# summary of the ANOVA model
summary(twoWayfit)
                      Df Sum Sq Mean Sq F value   Pr(>F)    
location               3   1981   660.5  10.450 2.09e-05 ***
type.of.lure           2    113    56.5   0.894    0.416    
location:type.of.lure  6    115    19.2   0.303    0.932    
Residuals             48   3034    63.2                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ANOVA ASSUMPTIONS

Normality of Residuals

VISUAL INSPECTION: Normality of Residuals

# normal Q-Q plot
plot(twoWayfit, 2)

plot of chunk unnamed-chunk-19

STATISTICAL TEST 1: Normality of Residuals

# Anderson-Darling normality test
library(nortest)
ad.test(number.of.moths)

    Anderson-Darling normality test

data:  number.of.moths
A = 1.0008, p-value = 0.01138

STATISTICAL TEST 2: Normality of Residuals

# Shapiro-Wilk test of nirmality
shapiro.test(number.of.moths)

    Shapiro-Wilk normality test

data:  number.of.moths
W = 0.94533, p-value = 0.009448

Statistical Test Inference

Null hypothesis (\( H_0 \)): The data is normally distributed.

Both the tests are statistically significant, we reject the null hypothesis. Hence, we cannot assume normality in the data.

Homogeneity of Variance

VISUAL INSPECTION: Homogeneity of Variance

# residual versus fitted plot
plot(twoWayfit, 1)

plot of chunk unnamed-chunk-23

STATISTICAL TEST: Homogeneity of Variance

# Levene test for homogeneity of variance
library(car)
leveneTest(number.of.moths ~ location * type.of.lure, data = mothTrap.df)
Levene's Test for Homogeneity of Variance (center = median)
      Df F value Pr(>F)
group 11  0.6377 0.7875
      48               

Statistical Test Inference

Null hypothesis (\( H_0 \)): The variance of number of moths trapped are homogenous between groups.

The test is not statistically significant, we fail to reject the null hypothesis. Hence, we can assume the homogeneity of variance between the groups.

RECTIFY VIOLATION OF ANOVA ASSUMPTIONS

We use log transformation to the dependent variable (number of moths) and again check for the normality.

Log-Transformation of Variable Number of Moths

# change number of moths to log of number og moths
mothTrap.df$log.number.of.moths <- log(mothTrap.df$number.of.moths)
# attacjing data columns
attach(mothTrap.df)
# first few rows of the dataframe
head(mothTrap.df)
  number.of.moths location type.of.lure log.number.of.moths
1              32      Top     Chemical            3.465736
2              29      Top     Chemical            3.367296
3              16      Top     Chemical            2.772589
4              18      Top     Chemical            2.890372
5              20      Top     Chemical            2.995732
6              37   Middle     Chemical            3.610918

Two-way ANOVA After Log-Transformation

# two-way ANOVA after log-transformation
twoWayTransfit <- aov(log.number.of.moths ~ location * type.of.lure, data = mothTrap.df)
# summary of the ANOVA model
summary(twoWayTransfit)
                      Df Sum Sq Mean Sq F value   Pr(>F)    
location               3  2.966  0.9886   9.828 3.64e-05 ***
type.of.lure           2  0.262  0.1308   1.301    0.282    
location:type.of.lure  6  0.196  0.0326   0.324    0.921    
Residuals             48  4.828  0.1006                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Interpretation of Two-way ANOVA Test Output

The p-value < 0.05 for the location type, we can conclude that there are significant differences in number of moths trapped between the location type.

For type of lure and interaction of the location and type of lure, we didn't find any significant differences.

COMPARING ANOVA ASSUMPTIONS BEFORE-AFTER LOG TRANSFORMATION

NORMALITY ASSUMTION: Comparing Normality of Residuals

Before Log-transformation

# normal Q-Q plot before log-transformation
plot(twoWayfit, 2)

plot of chunk unnamed-chunk-27

After Log-transformation

# normal Q-Q plot after log-transformation
plot(twoWayTransfit, 2)

plot of chunk unnamed-chunk-28

NORMALITY ASSUMTION TEST 1: Comapring Statistical Tests for Normality

Before Log-transformation

# Anderson-Darling normality test before log-transformation
library(nortest)
ad.test(number.of.moths)

    Anderson-Darling normality test

data:  number.of.moths
A = 1.0008, p-value = 0.01138

After Log-transformation

# Anderson-Darling normality test after log-transformation
library(nortest)
ad.test(mothTrap.df$log.number.of.moths)

    Anderson-Darling normality test

data:  mothTrap.df$log.number.of.moths
A = 0.97034, p-value = 0.01356

NORMALITY ASSUMTION TEST 2: Comapring Statistical Tests for Normality

Before Log-transformation

# Shapiro-Wilk test of normality before log-transformation
shapiro.test(number.of.moths)

    Shapiro-Wilk normality test

data:  number.of.moths
W = 0.94533, p-value = 0.009448

After Log-transformation

# Shapiro-Wilk test of normality after log-transformation
shapiro.test(mothTrap.df$log.number.of.moths)

    Shapiro-Wilk normality test

data:  mothTrap.df$log.number.of.moths
W = 0.94746, p-value = 0.01185

HOMOGENEITY OF VARIANCES: Comparing Plots for the Homogeneity of Variance

Before Log-transformations

# residual versus fitted plot before log-transformation
plot(twoWayfit, 1)

plot of chunk unnamed-chunk-33

After Log-transformations

# residual versus fitted plot after log-transformation
plot(twoWayTransfit, 1)

plot of chunk unnamed-chunk-34

HOMOGENEITY OF VARIANCES: Comparing Statistical Test for the Homogeneity of Variances

Before Log-transformation

# test homogeneity of variance after log-transformation
library(car)
leveneTest(number.of.moths ~ location * type.of.lure, data = mothTrap.df)
Levene's Test for Homogeneity of Variance (center = median)
      Df F value Pr(>F)
group 11  0.6377 0.7875
      48               

After Log-transformation

# test homogeneity of variance after log-transformation
library(car)
leveneTest(log.number.of.moths ~ location * type.of.lure, data = mothTrap.df)
Levene's Test for Homogeneity of Variance (center = median)
      Df F value Pr(>F)
group 11  0.5978 0.8211
      48               

Inference After Log-Transformation

After log transformation of the dependent variable number of moth trapped, we don't have heterogeneity problem i.e. we have equal variances between the groups.

But for normality assumption, we fail to accept the null hypothesis. That concludes, we cannot assume the normality of the data after the transformation.

Hence, we use non-parametric Kruskal-Wallis rank sum test.

Kruskal-Wallis Rank Sum Test

A non-parametric alternative to ANOVA is Kruskal-Wallis rank sum test, which can be used when ANOVA assumptions are not met.

Kruskal-Wallis Rank Sum Test

# Kruskal-Wallis rank sum test
kruskal.test(log.number.of.moths ~ interaction(location, type.of.lure), data = mothTrap.df)

    Kruskal-Wallis rank sum test

data:  log.number.of.moths by interaction(location, type.of.lure)
Kruskal-Wallis chi-squared = 24.704, df = 11, p-value = 0.01007

Pairwise-Comparison Test

pairwise.t.test(log.number.of.moths, interaction(location, type.of.lure), data = mothTrap.df, p.adjust.method = "BH", pool.sd = FALSE)

    Pairwise comparisons using t tests with non-pooled SD 

data:  log.number.of.moths and interaction(location, type.of.lure) 

                Ground.Chemical Lower.Chemical Middle.Chemical
Lower.Chemical  0.055           -              -              
Middle.Chemical 0.158           0.587          -              
Top.Chemical    0.587           0.135          0.350          
Ground.Scent    0.605           0.055          0.106          
Lower.Scent     0.106           0.602          0.900          
Middle.Scent    0.551           0.418          0.650          
Top.Scent       0.829           0.158          0.285          
Ground.Sugar    0.788           0.106          0.231          
Lower.Sugar     0.187           0.565          0.940          
Middle.Sugar    0.106           0.646          0.837          
Top.Sugar       0.365           0.213          0.565          
                Top.Chemical Ground.Scent Lower.Scent Middle.Scent
Lower.Chemical  -            -            -           -           
Middle.Chemical -            -            -           -           
Top.Chemical    -            -            -           -           
Ground.Scent    0.350        -            -           -           
Lower.Scent     0.260        0.083        -           -           
Middle.Scent    0.776        0.351        0.600       -           
Top.Scent       0.788        0.587        0.231       0.610       
Ground.Sugar    0.776        0.556        0.187       0.604       
Lower.Sugar     0.397        0.127        0.836       0.716       
Middle.Sugar    0.231        0.083        0.940       0.587       
Top.Sugar       0.747        0.222        0.420       0.940       
                Top.Scent Ground.Sugar Lower.Sugar Middle.Sugar
Lower.Chemical  -         -            -           -           
Middle.Chemical -         -            -           -           
Top.Chemical    -         -            -           -           
Ground.Scent    -         -            -           -           
Lower.Scent     -         -            -           -           
Middle.Scent    -         -            -           -           
Top.Scent       -         -            -           -           
Ground.Sugar    0.987     -            -           -           
Lower.Sugar     0.344     0.277        -           -           
Middle.Sugar    0.222     0.175        0.788       -           
Top.Sugar       0.591     0.587        0.587       0.397       

P value adjustment method: BH