Invertebrate ANOVA for 2022-2023 Shiawassee Master’s Project
Team
University of Michigan School for Environment and sustainability
Primary sheet(s) used: “CORRECTED_ANOVA_Full.csv”
if(!require("car")) install.packages("car")
if(!require("dplyr")) install.packages("dplyr")
library(car)
library(dplyr)
Sys.setenv(LANG = "en")
setwd("~/Desktop/SNWR/UM-SEAS_2021 R Code/Inverts/Corrected Data")
(i.e., Total Species Count / Effort) # Note - CPUE for Inverts is total number of inverts at site (i.e., “UE” = 1); CPUE for Fyke Nets is total number of individual fish at site per net night (i.e., “UE” = 2). CPUE for Electrofishing is total number of individual fish per site per second or minute (i.e., “UE” = ~600 seconds or 10 minutes)
#Load data.
#Columns: siteID, unit, month, veg, cpue
anovadata <- read.csv("~/Desktop/SNWR/UM-SEAS_2021 R Code/Inverts/Corrected Data/CORRECTED_ANOVA_Full.csv")
# renaming months, removing extra row from data frame
anovadata$edate[anovadata$edate == "5232022"] = "May"
anovadata$edate[anovadata$edate == "5242022"] = "May"
anovadata$edate[anovadata$edate == "6222022"] = "June"
anovadata$edate[anovadata$edate == "7112022"] = "July"
anovadata$edate[anovadata$edate == "8112022"] = "August"
anovadata$Vegetation.Type[anovadata$Vegetation.Type ==
"Submerged Aquatic Vegetation"] = "SAV"
anovadata_sub = subset(anovadata[,1:5])
# Box plots of each variable with CPUE (count)
boxplot(SumOfCount ~ Vegetation.Type, data = anovadata_sub, xlab = "Vegetation",
ylab = "Count", par(cex.axis = .75), horizontal = FALSE, las = 1)
boxplot(SumOfCount ~ Unit, data = anovadata_sub, xlab = "Unit", ylab = "Count")
anovadata_sub$edate = factor(anovadata_sub$edate,
levels = c("May","June","July","August"))
boxplot(SumOfCount ~ edate, data = anovadata_sub, xlab = "Month", ylab = "Count")
boxplot_dat = read.csv("~/Desktop/SNWR/UM-SEAS_2021 R Code/Inverts/Corrected Data/PERMANOVA_correct2.csv")
bp = boxplot_dat[,1:6]
bp$month[bp$month == "5232022"] = "May"
bp$month[bp$month == "5242022"] = "May"
bp$month[bp$month == "6222022"] = "June"
bp$month[bp$month == "7112022"] = "July"
bp$month[bp$month == "8112022"] = "August"
bp$veg[bp$veg == "Submerged Aquatic Vegetation"] = "SAV"
bp$month = factor(bp$month, levels = c("May","June","July","August"))
boxplot(CPUE ~ veg, data = bp, xlab = "Vegetation", ylab = "CPUE",
par(cex.axis = .75), horizontal = FALSE, las = 1)
boxplot(CPUE ~ Unit, data = bp, xlab = "Unit", ylab = "CPUE")
boxplot(CPUE ~ month, data = bp, xlab = "Month", ylab = "CPUE")
# Testing normality of all CPUE data with Shapiro-Wilks
shapiro.test(anovadata_sub$SumOfCount) # W = 0.84, p = 2.6e-6 # data normal
##
## Shapiro-Wilk normality test
##
## data: anovadata_sub$SumOfCount
## W = 0.83505, p-value = 2.617e-06
# ANOVA - all variables, residuals
ANOVA_FULL = aov(SumOfCount ~ Vegetation.Type + Unit + edate,
data = anovadata_sub)
summary(ANOVA_FULL)
## Df Sum Sq Mean Sq F value Pr(>F)
## Vegetation.Type 7 13374 1911 1.708 0.13397
## Unit 3 7975 2658 2.377 0.08383 .
## edate 3 15337 5112 4.571 0.00749 **
## Residuals 41 45853 1118
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqPlot(resid(ANOVA_FULL)) # Checking for normal distribution. If it does not
## [1] 18 33
# meet assumptions, mention it in report
# ANOVA of vegetation and count # tukey of ANOVA # Levene for normal variance
# in data (is variation significantly different?)
ANOVA_VEG = aov(SumOfCount ~ Vegetation.Type, data = anovadata_sub)
summary(ANOVA_VEG)
## Df Sum Sq Mean Sq F value Pr(>F)
## Vegetation.Type 7 13374 1910 1.298 0.272
## Residuals 47 69166 1472
qqPlot(resid(ANOVA_VEG))
## [1] 18 33
TukeyHSD(ANOVA_VEG)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = SumOfCount ~ Vegetation.Type, data = anovadata_sub)
##
## $Vegetation.Type
## diff lwr upr p adj
## Mixed Emergent-Forest 53.583333 -24.94471 132.11137 0.3913511
## Nymphaea-Forest 46.250000 -39.77316 132.27316 0.6840122
## Open Water-Forest 12.416667 -80.49896 105.33230 0.9998698
## Phalaris-Forest 23.250000 -82.10642 128.60642 0.9966072
## River Bulrush-Forest 17.750000 -87.60642 123.10642 0.9993957
## SAV-Forest 27.171053 -39.75381 94.09592 0.8989433
## Typha-Forest 9.416667 -59.04249 77.87582 0.9998421
## Nymphaea-Mixed Emergent -7.333333 -85.86137 71.19471 0.9999886
## Open Water-Mixed Emergent -41.166667 -127.18982 44.85649 0.7941635
## Phalaris-Mixed Emergent -30.333333 -129.66432 68.99765 0.9767157
## River Bulrush-Mixed Emergent -35.833333 -135.16432 63.49765 0.9432612
## SAV-Mixed Emergent -26.412281 -83.38251 30.55795 0.8188969
## Typha-Mixed Emergent -44.166667 -102.93167 14.59834 0.2731347
## Open Water-Nymphaea -33.833333 -126.74896 59.08230 0.9405084
## Phalaris-Nymphaea -23.000000 -128.35642 82.35642 0.9968282
## River Bulrush-Nymphaea -28.500000 -133.85642 76.85642 0.9883675
## SAV-Nymphaea -19.078947 -86.00381 47.84592 0.9842275
## Typha-Nymphaea -36.833333 -105.29249 31.62582 0.6832395
## Phalaris-Open Water 10.833333 -100.22209 121.88875 0.9999846
## River Bulrush-Open Water 5.333333 -105.72209 116.38875 0.9999999
## SAV-Open Water 14.754386 -60.82516 90.33393 0.9984340
## Typha-Open Water -3.000000 -79.94145 73.94145 1.0000000
## River Bulrush-Phalaris -5.500000 -127.15512 116.15512 0.9999999
## SAV-Phalaris 3.921053 -86.51638 94.35849 0.9999999
## Typha-Phalaris -13.833333 -105.41198 77.74531 0.9997054
## SAV-River Bulrush 9.421053 -81.01638 99.85849 0.9999759
## Typha-River Bulrush -8.333333 -99.91198 83.24531 0.9999904
## Typha-SAV -17.754386 -59.77355 24.26478 0.8786246
leveneTest(SumOfCount ~ Vegetation.Type, data = anovadata_sub)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
# ANOVA of month and count # tukey of ANOVA # levene test
ANOVA_MONTH = aov(SumOfCount ~ edate, data = anovadata_sub)
summary(ANOVA_MONTH)
## Df Sum Sq Mean Sq F value Pr(>F)
## edate 3 13858 4619 3.43 0.0237 *
## Residuals 51 68682 1347
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqPlot(resid(ANOVA_MONTH))
## [1] 18 33
TukeyHSD(ANOVA_MONTH)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = SumOfCount ~ edate, data = anovadata_sub)
##
## $edate
## diff lwr upr p adj
## June-May 43.554945 6.01638 81.093510 0.0169396
## July-May 16.340659 -21.19791 53.879225 0.6568804
## August-May 12.197802 -25.34076 49.736368 0.8237911
## July-June -27.214286 -64.05113 9.622562 0.2158410
## August-June -31.357143 -68.19399 5.479705 0.1209836
## August-July -4.142857 -40.97971 32.693991 0.9906257
leveneTest(SumOfCount ~ edate, data = anovadata_sub)
# ANOVA of unit and count # tukey of ANOVA # levene test
ANOVA_UNIT = aov(SumOfCount ~ Unit, data = anovadata_sub)
summary(ANOVA_UNIT)
## Df Sum Sq Mean Sq F value Pr(>F)
## Unit 3 4046 1349 0.876 0.46
## Residuals 51 78494 1539
qqPlot(resid(ANOVA_UNIT))
## [1] 18 33
TukeyHSD(ANOVA_UNIT)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = SumOfCount ~ Unit, data = anovadata_sub)
##
## $Unit
## diff lwr upr p adj
## MN-MC 13.142045 -27.66679 53.95088 0.8276320
## MS-MC 22.312500 -14.52445 59.14945 0.3829590
## P1A-MC 9.854167 -29.93428 49.64261 0.9123340
## MS-MN 9.170455 -31.63838 49.97929 0.9325770
## P1A-MN -3.287879 -46.77950 40.20374 0.9970974
## P1A-MS -12.458333 -52.24678 27.33011 0.8392165
leveneTest(SumOfCount ~ Unit, data = anovadata_sub)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
If you need to “factorize” month (i.e., tell R to read in the month column as a factor rather than as a number) Check to see what “form” your data values are in by running the str() function on your data frame