Sameer Mathur
Two-way ANOVA
---
# reading data into R
mothTrap.df <- read.csv(paste("moth-trap-experiment.csv"))
# attaching data columns of the dataframe
attach(mothTrap.df)
# dimension of the dataframe
dim(mothTrap.df)
[1] 60 3
# number of moths trapped by location type and type of lure
addmargins(table(location, type.of.lure))
type.of.lure
location Chemical Scent Sugar Sum
Ground 5 5 5 15
Lower 5 5 5 15
Middle 5 5 5 15
Top 5 5 5 15
Sum 20 20 20 60
# descriptive statistics of number of moths by location type
library(data.table)
dt <- data.table(mothTrap.df)
dt[, list(Count = .N,
mean = round(mean(number.of.moths), 3),
sd = round(mean(number.of.moths), 3),
median = round(median(number.of.moths), 3),
min = min(number.of.moths),
max = max(number.of.moths)),
by = list(location)]
location Count mean sd median min max
1: Top 15 23.333 23.333 21 13 35
2: Middle 15 31.000 31.000 36 12 44
3: Lower 15 33.333 33.333 34 17 44
4: Ground 15 19.067 19.067 18 12 29
# descriptive statistics of number of moths by type of lure
library(data.table)
dt <- data.table(mothTrap.df)
dt[, list(Count = .N,
mean = round(mean(number.of.moths), 3),
sd = round(mean(number.of.moths), 3),
median = round(median(number.of.moths), 3),
min = min(number.of.moths),
max = max(number.of.moths)),
by = list(type.of.lure)]
type.of.lure Count mean sd median min max
1: Chemical 20 27.50 27.50 28.5 14 41
2: Sugar 20 27.80 27.80 28.0 15 44
3: Scent 20 24.75 24.75 22.0 12 44
# box plot of location type
boxplot(number.of.moths ~ location, data = mothTrap.df,
main = "Boxplot of Location Type",
xlab = "Location Type", ylab = "Number of Moths Trapaped")
# box plot of type of lure
boxplot(number.of.moths ~ type.of.lure, data = mothTrap.df,
main = "Boxplot of Type of Lure",
xlab = "Type of Lure", ylab = "Number of Moths Trapaped")
# box plot of location type and type of lure
library(lattice)
bwplot(number.of.moths ~ location | type.of.lure, data = mothTrap.df,
main = "Boxplot of Location Type and Type of Lure",
ylab = "Number of Moths Trapaped",
col = "black")
# mean plot by location type
library(gplots)
plotmeans(number.of.moths ~ location, data = mothTrap.df,
xlab = "Location Type", ylab = "Number of Moths Trapped",
digits=2, col = "black", ccol = "blue", barwidth = 2,
legends = TRUE, mean.labels = TRUE, frame = TRUE)
# mean plot by type of lure
library(gplots)
plotmeans(number.of.moths ~ type.of.lure, data = mothTrap.df,
xlab = "Type of Lure", ylab = "Number of Moths Trapped",
digits=2, col = "black", ccol = "blue", barwidth = 2,
legends = TRUE, mean.labels = TRUE, frame = TRUE)
# interaction plot of location type and type of lure
interaction.plot(location, type.of.lure, number.of.moths,
type = "b", col = c(1:3), leg.bty = "o",
leg.bg = "beige", lwd = 2, pch = c(18, 24, 22),
xlab = "Location Type", ylab = "Log of Number of Moths Trapped",
main = "Interaction Plot of Location Type and Type of Lure")
# two-way ANOVA
twoWayfit <- aov(number.of.moths ~ location * type.of.lure, data = mothTrap.df)
# summary of the ANOVA model
summary(twoWayfit)
Df Sum Sq Mean Sq F value Pr(>F)
location 3 1981 660.5 10.450 2.09e-05 ***
type.of.lure 2 113 56.5 0.894 0.416
location:type.of.lure 6 115 19.2 0.303 0.932
Residuals 48 3034 63.2
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# normal Q-Q plot
plot(twoWayfit, 2)
# Anderson-Darling normality test
library(nortest)
ad.test(number.of.moths)
Anderson-Darling normality test
data: number.of.moths
A = 1.0008, p-value = 0.01138
# Shapiro-Wilk test of nirmality
shapiro.test(number.of.moths)
Shapiro-Wilk normality test
data: number.of.moths
W = 0.94533, p-value = 0.009448
Null hypothesis (\( H_0 \)): The data is normally distributed.
Both the tests are statistically significant, we reject the null hypothesis. Hence, we cannot assume normality in the data.
# residual versus fitted plot
plot(twoWayfit, 1)
# Levene test for homogeneity of variance
library(car)
leveneTest(number.of.moths ~ location * type.of.lure, data = mothTrap.df)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 11 0.6377 0.7875
48
Null hypothesis (\( H_0 \)): The variance of number of moths trapped are homogenous between groups.
The test is not statistically significant, we fail to reject the null hypothesis. Hence, we can assume the homogeneity of variance between the groups.
We use log transformation to the dependent variable (number of moths) and again check for the normality.
# change number of moths to log of number og moths
mothTrap.df$log.number.of.moths <- log(mothTrap.df$number.of.moths)
# attacjing data columns
attach(mothTrap.df)
# first few rows of the dataframe
head(mothTrap.df)
number.of.moths location type.of.lure log.number.of.moths
1 32 Top Chemical 3.465736
2 29 Top Chemical 3.367296
3 16 Top Chemical 2.772589
4 18 Top Chemical 2.890372
5 20 Top Chemical 2.995732
6 37 Middle Chemical 3.610918
# two-way ANOVA after log-transformation
twoWayTransfit <- aov(log.number.of.moths ~ location * type.of.lure, data = mothTrap.df)
# summary of the ANOVA model
summary(twoWayTransfit)
Df Sum Sq Mean Sq F value Pr(>F)
location 3 2.966 0.9886 9.828 3.64e-05 ***
type.of.lure 2 0.262 0.1308 1.301 0.282
location:type.of.lure 6 0.196 0.0326 0.324 0.921
Residuals 48 4.828 0.1006
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The p-value < 0.05 for the location type, we can conclude that there are significant differences in number of moths trapped between the location type.
For type of lure and interaction of the location and type of lure, we didn't find any significant differences.
Before Log-transformation
# normal Q-Q plot before log-transformation
plot(twoWayfit, 2)
After Log-transformation
# normal Q-Q plot after log-transformation
plot(twoWayTransfit, 2)
Before Log-transformation
# Anderson-Darling normality test before log-transformation
library(nortest)
ad.test(number.of.moths)
Anderson-Darling normality test
data: number.of.moths
A = 1.0008, p-value = 0.01138
After Log-transformation
# Anderson-Darling normality test after log-transformation
library(nortest)
ad.test(mothTrap.df$log.number.of.moths)
Anderson-Darling normality test
data: mothTrap.df$log.number.of.moths
A = 0.97034, p-value = 0.01356
Before Log-transformation
# Shapiro-Wilk test of normality before log-transformation
shapiro.test(number.of.moths)
Shapiro-Wilk normality test
data: number.of.moths
W = 0.94533, p-value = 0.009448
After Log-transformation
# Shapiro-Wilk test of normality after log-transformation
shapiro.test(mothTrap.df$log.number.of.moths)
Shapiro-Wilk normality test
data: mothTrap.df$log.number.of.moths
W = 0.94746, p-value = 0.01185
Before Log-transformations
# residual versus fitted plot before log-transformation
plot(twoWayfit, 1)
After Log-transformations
# residual versus fitted plot after log-transformation
plot(twoWayTransfit, 1)
Before Log-transformation
# test homogeneity of variance after log-transformation
library(car)
leveneTest(number.of.moths ~ location * type.of.lure, data = mothTrap.df)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 11 0.6377 0.7875
48
After Log-transformation
# test homogeneity of variance after log-transformation
library(car)
leveneTest(log.number.of.moths ~ location * type.of.lure, data = mothTrap.df)
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 11 0.5978 0.8211
48
After log transformation of the dependent variable number of moth trapped, we don't have heterogeneity problem i.e. we have equal variances between the groups.
But for normality assumption, we fail to accept the null hypothesis. That concludes, we cannot assume the normality of the data after the transformation.
Hence, we use non-parametric Kruskal-Wallis rank sum test.
A non-parametric alternative to ANOVA is Kruskal-Wallis rank sum test, which can be used when ANOVA assumptions are not met.
# Kruskal-Wallis rank sum test
kruskal.test(log.number.of.moths ~ interaction(location, type.of.lure), data = mothTrap.df)
Kruskal-Wallis rank sum test
data: log.number.of.moths by interaction(location, type.of.lure)
Kruskal-Wallis chi-squared = 24.704, df = 11, p-value = 0.01007
pairwise.t.test(log.number.of.moths, interaction(location, type.of.lure), data = mothTrap.df, p.adjust.method = "BH", pool.sd = FALSE)
Pairwise comparisons using t tests with non-pooled SD
data: log.number.of.moths and interaction(location, type.of.lure)
Ground.Chemical Lower.Chemical Middle.Chemical
Lower.Chemical 0.055 - -
Middle.Chemical 0.158 0.587 -
Top.Chemical 0.587 0.135 0.350
Ground.Scent 0.605 0.055 0.106
Lower.Scent 0.106 0.602 0.900
Middle.Scent 0.551 0.418 0.650
Top.Scent 0.829 0.158 0.285
Ground.Sugar 0.788 0.106 0.231
Lower.Sugar 0.187 0.565 0.940
Middle.Sugar 0.106 0.646 0.837
Top.Sugar 0.365 0.213 0.565
Top.Chemical Ground.Scent Lower.Scent Middle.Scent
Lower.Chemical - - - -
Middle.Chemical - - - -
Top.Chemical - - - -
Ground.Scent 0.350 - - -
Lower.Scent 0.260 0.083 - -
Middle.Scent 0.776 0.351 0.600 -
Top.Scent 0.788 0.587 0.231 0.610
Ground.Sugar 0.776 0.556 0.187 0.604
Lower.Sugar 0.397 0.127 0.836 0.716
Middle.Sugar 0.231 0.083 0.940 0.587
Top.Sugar 0.747 0.222 0.420 0.940
Top.Scent Ground.Sugar Lower.Sugar Middle.Sugar
Lower.Chemical - - - -
Middle.Chemical - - - -
Top.Chemical - - - -
Ground.Scent - - - -
Lower.Scent - - - -
Middle.Scent - - - -
Top.Scent - - - -
Ground.Sugar 0.987 - - -
Lower.Sugar 0.344 0.277 - -
Middle.Sugar 0.222 0.175 0.788 -
Top.Sugar 0.591 0.587 0.587 0.397
P value adjustment method: BH