#Anova # Null Hypothesis: Population means are equal # Alternative Hypothesis: At least two population means are not equal # Alpha : 0.05
dataoneway <- read.table("https://rstatisticsandresearch.weebly.com/uploads/1/0/2/6/1026585/onewayanova.txt",h=T)
head(dataoneway)
## Group Length
## 1 1 19.0
## 2 1 18.6
## 3 1 18.3
## 4 1 18.0
## 5 1 18.2
## 6 1 18.6
Assumption 1: All samples are independent, and collected in >2 independent categorical groups Label groups and set as categorical factors
#convert to factor variable
dataoneway$Group <- as.factor(dataoneway$Group)
table(dataoneway$Group)
##
## 1 2 3
## 35 35 35
#Assumption 2: Dependent variable is continuous
str(dataoneway)
## 'data.frame': 105 obs. of 2 variables:
## $ Group : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
## $ Length: num 19 18.6 18.3 18 18.2 18.6 18.5 18.2 18.4 18.9 ...
#Assumption 3: Normal distributions of each group, no major outliers
dataoneway$Group = factor(dataoneway$Group,labels = c("Wall lizard", "Viviparous lizard", "Snake-eyed lizard"))
Group1 <- subset(dataoneway, Group == "Wall lizard")
Group2 <- subset(dataoneway, Group == "Viviparous lizard")
Group3 <- subset(dataoneway, Group == "Snake-eyed lizard")
qqnorm(Group1$Length)
qqline(Group1$Length)
qqnorm(Group2$Length)
qqline(Group2$Length)
qqnorm(Group2$Length)
qqline(Group2$Length)
#shapiro wilk test #Ho : Data is normal #Ha : Data is not normal #if the p-value is greater than 0.05 we accept the null hypothesis, means data is statistically normal
shapiro.test(Group1$Length)
##
## Shapiro-Wilk normality test
##
## data: Group1$Length
## W = 0.98269, p-value = 0.8425
shapiro.test(Group2$Length)
##
## Shapiro-Wilk normality test
##
## data: Group2$Length
## W = 0.97817, p-value = 0.6986
shapiro.test(Group3$Length)
##
## Shapiro-Wilk normality test
##
## data: Group3$Length
## W = 0.97219, p-value = 0.5063
#Assumption 4: Homogeneneity of variances #equal variance among the three groups -
bartlett.test(Length ~ Group, data = dataoneway)
##
## Bartlett test of homogeneity of variances
##
## data: Length by Group
## Bartlett's K-squared = 0.43292, df = 2, p-value = 0.8054
#Since the p-value is greater tha 0.05, therefore accept the null hypothesis hence statistically equal variance among the group
#One Way ANOVA - Test if the means of the k populations are equal
model = lm(Length ~ Group, data = dataoneway)
anova(model)
## Analysis of Variance Table
##
## Response: Length
## Df Sum Sq Mean Sq F value Pr(>F)
## Group 2 10.615 5.3074 7.0982 0.0013 **
## Residuals 102 76.267 0.7477
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#command 2
model_1 = aov(Length ~ Group, data=dataoneway)
summary(model_1)
## Df Sum Sq Mean Sq F value Pr(>F)
## Group 2 10.61 5.307 7.098 0.0013 **
## Residuals 102 76.27 0.748
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Since the p-value of 0.0013 which is less than 0.05 therefore we reject the Ho
#Post-hoc test TukeyHSD - Test which of the groups have different means
#tukey's honest significant difference
TukeyHSD(aov(model))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = model)
##
## $Group
## diff lwr upr p adj
## Viviparous lizard-Wall lizard -0.7200000 -1.2116284 -0.2283716 0.0020955
## Snake-eyed lizard-Wall lizard -0.1028571 -0.5944855 0.3887713 0.8726158
## Snake-eyed lizard-Viviparous lizard 0.6171429 0.1255145 1.1087713 0.0098353
#command 2#
TukeyHSD(model_1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Length ~ Group, data = dataoneway)
##
## $Group
## diff lwr upr p adj
## Viviparous lizard-Wall lizard -0.7200000 -1.2116284 -0.2283716 0.0020955
## Snake-eyed lizard-Wall lizard -0.1028571 -0.5944855 0.3887713 0.8726158
## Snake-eyed lizard-Viviparous lizard 0.6171429 0.1255145 1.1087713 0.0098353
plot(TukeyHSD(model_1))
library("ggplot2")
## Warning: package 'ggplot2' was built under R version 4.0.5
ggplot(dataoneway, aes(x = Group, y = Length)) +
geom_boxplot(fill = "grey80", colour = "black") +
scale_x_discrete() + xlab("Treatment Group") +
ylab("Length (cm)")