Invertebrate ANOVA for 2022-2023 Shiawassee Master’s Project Team
University of Michigan School for Environment and sustainability
Primary sheet(s) used: “CORRECTED_ANOVA_Full.csv”

Loading packages

if(!require("car")) install.packages("car")
if(!require("dplyr")) install.packages("dplyr")
library(car)
library(dplyr)

Set up environment and output language

Sys.setenv(LANG = "en")

setwd("~/Desktop/SNWR/UM-SEAS_2021 R Code/Inverts/Corrected Data")

ANOVA of Vegetation, Unit, Month on TOTAL LOTU CPUE

(i.e., Total Species Count / Effort) # Note - CPUE for Inverts is total number of inverts at site (i.e., “UE” = 1); CPUE for Fyke Nets is total number of individual fish at site per net night (i.e., “UE” = 2). CPUE for Electrofishing is total number of individual fish per site per second or minute (i.e., “UE” = ~600 seconds or 10 minutes)

#Load data.
#Columns: siteID, unit, month, veg, cpue
anovadata <- read.csv("~/Desktop/SNWR/UM-SEAS_2021 R Code/Inverts/Corrected Data/CORRECTED_ANOVA_Full.csv")

# renaming months, removing extra row from data frame
anovadata$edate[anovadata$edate == "5232022"] = "May"
anovadata$edate[anovadata$edate == "5242022"] = "May"
anovadata$edate[anovadata$edate == "6222022"] = "June"
anovadata$edate[anovadata$edate == "7112022"] = "July"
anovadata$edate[anovadata$edate == "8112022"] = "August"
anovadata$Vegetation.Type[anovadata$Vegetation.Type == 
                            "Submerged Aquatic Vegetation"] = "SAV"
anovadata_sub = subset(anovadata[,1:5])

# Box plots of each variable with CPUE (count)
boxplot(SumOfCount ~ Vegetation.Type, data = anovadata_sub, xlab = "Vegetation", 
        ylab = "Count", par(cex.axis = .75), horizontal = FALSE, las = 1)

boxplot(SumOfCount ~ Unit, data = anovadata_sub, xlab = "Unit", ylab = "Count")

anovadata_sub$edate = factor(anovadata_sub$edate, 
                             levels = c("May","June","July","August"))
boxplot(SumOfCount ~ edate, data = anovadata_sub, xlab = "Month", ylab = "Count")

boxplot_dat = read.csv("~/Desktop/SNWR/UM-SEAS_2021 R Code/Inverts/Corrected Data/PERMANOVA_correct2.csv")
bp = boxplot_dat[,1:6]

bp$month[bp$month == "5232022"] = "May"
bp$month[bp$month == "5242022"] = "May"
bp$month[bp$month == "6222022"] = "June"
bp$month[bp$month == "7112022"] = "July"
bp$month[bp$month == "8112022"] = "August"
bp$veg[bp$veg == "Submerged Aquatic Vegetation"] = "SAV"

bp$month = factor(bp$month, levels = c("May","June","July","August"))

boxplot(CPUE ~ veg, data = bp, xlab = "Vegetation", ylab = "CPUE", 
        par(cex.axis = .75), horizontal = FALSE, las = 1)

boxplot(CPUE ~ Unit, data = bp, xlab = "Unit", ylab = "CPUE")

boxplot(CPUE ~ month, data = bp, xlab = "Month", ylab = "CPUE")

# Testing normality of all CPUE data with Shapiro-Wilks
shapiro.test(anovadata_sub$SumOfCount) # W = 0.84, p = 2.6e-6 # data normal
## 
##  Shapiro-Wilk normality test
## 
## data:  anovadata_sub$SumOfCount
## W = 0.83505, p-value = 2.617e-06
# ANOVA - all variables, residuals
ANOVA_FULL = aov(SumOfCount ~ Vegetation.Type + Unit + edate, 
                 data = anovadata_sub)
summary(ANOVA_FULL)
##                 Df Sum Sq Mean Sq F value  Pr(>F)   
## Vegetation.Type  7  13374    1911   1.708 0.13397   
## Unit             3   7975    2658   2.377 0.08383 . 
## edate            3  15337    5112   4.571 0.00749 **
## Residuals       41  45853    1118                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqPlot(resid(ANOVA_FULL)) # Checking for normal distribution. If it does not 

## [1] 18 33
# meet assumptions, mention it in report

# ANOVA of vegetation and count  # tukey of ANOVA  # Levene for normal variance 
# in data (is variation significantly different?)
ANOVA_VEG = aov(SumOfCount ~ Vegetation.Type, data = anovadata_sub)
summary(ANOVA_VEG)
##                 Df Sum Sq Mean Sq F value Pr(>F)
## Vegetation.Type  7  13374    1910   1.298  0.272
## Residuals       47  69166    1472
qqPlot(resid(ANOVA_VEG))

## [1] 18 33
TukeyHSD(ANOVA_VEG)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = SumOfCount ~ Vegetation.Type, data = anovadata_sub)
## 
## $Vegetation.Type
##                                    diff        lwr       upr     p adj
## Mixed Emergent-Forest         53.583333  -24.94471 132.11137 0.3913511
## Nymphaea-Forest               46.250000  -39.77316 132.27316 0.6840122
## Open Water-Forest             12.416667  -80.49896 105.33230 0.9998698
## Phalaris-Forest               23.250000  -82.10642 128.60642 0.9966072
## River Bulrush-Forest          17.750000  -87.60642 123.10642 0.9993957
## SAV-Forest                    27.171053  -39.75381  94.09592 0.8989433
## Typha-Forest                   9.416667  -59.04249  77.87582 0.9998421
## Nymphaea-Mixed Emergent       -7.333333  -85.86137  71.19471 0.9999886
## Open Water-Mixed Emergent    -41.166667 -127.18982  44.85649 0.7941635
## Phalaris-Mixed Emergent      -30.333333 -129.66432  68.99765 0.9767157
## River Bulrush-Mixed Emergent -35.833333 -135.16432  63.49765 0.9432612
## SAV-Mixed Emergent           -26.412281  -83.38251  30.55795 0.8188969
## Typha-Mixed Emergent         -44.166667 -102.93167  14.59834 0.2731347
## Open Water-Nymphaea          -33.833333 -126.74896  59.08230 0.9405084
## Phalaris-Nymphaea            -23.000000 -128.35642  82.35642 0.9968282
## River Bulrush-Nymphaea       -28.500000 -133.85642  76.85642 0.9883675
## SAV-Nymphaea                 -19.078947  -86.00381  47.84592 0.9842275
## Typha-Nymphaea               -36.833333 -105.29249  31.62582 0.6832395
## Phalaris-Open Water           10.833333 -100.22209 121.88875 0.9999846
## River Bulrush-Open Water       5.333333 -105.72209 116.38875 0.9999999
## SAV-Open Water                14.754386  -60.82516  90.33393 0.9984340
## Typha-Open Water              -3.000000  -79.94145  73.94145 1.0000000
## River Bulrush-Phalaris        -5.500000 -127.15512 116.15512 0.9999999
## SAV-Phalaris                   3.921053  -86.51638  94.35849 0.9999999
## Typha-Phalaris               -13.833333 -105.41198  77.74531 0.9997054
## SAV-River Bulrush              9.421053  -81.01638  99.85849 0.9999759
## Typha-River Bulrush           -8.333333  -99.91198  83.24531 0.9999904
## Typha-SAV                    -17.754386  -59.77355  24.26478 0.8786246
leveneTest(SumOfCount ~ Vegetation.Type, data = anovadata_sub)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
# ANOVA of month and count  # tukey of ANOVA  # levene test
ANOVA_MONTH = aov(SumOfCount ~ edate, data = anovadata_sub)
summary(ANOVA_MONTH)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## edate        3  13858    4619    3.43 0.0237 *
## Residuals   51  68682    1347                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqPlot(resid(ANOVA_MONTH))

## [1] 18 33
TukeyHSD(ANOVA_MONTH)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = SumOfCount ~ edate, data = anovadata_sub)
## 
## $edate
##                   diff       lwr       upr     p adj
## June-May     43.554945   6.01638 81.093510 0.0169396
## July-May     16.340659 -21.19791 53.879225 0.6568804
## August-May   12.197802 -25.34076 49.736368 0.8237911
## July-June   -27.214286 -64.05113  9.622562 0.2158410
## August-June -31.357143 -68.19399  5.479705 0.1209836
## August-July  -4.142857 -40.97971 32.693991 0.9906257
leveneTest(SumOfCount ~ edate, data = anovadata_sub)
# ANOVA of unit and count  # tukey of ANOVA  # levene test
ANOVA_UNIT = aov(SumOfCount ~ Unit, data = anovadata_sub)
summary(ANOVA_UNIT)
##             Df Sum Sq Mean Sq F value Pr(>F)
## Unit         3   4046    1349   0.876   0.46
## Residuals   51  78494    1539
qqPlot(resid(ANOVA_UNIT))

## [1] 18 33
TukeyHSD(ANOVA_UNIT)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = SumOfCount ~ Unit, data = anovadata_sub)
## 
## $Unit
##              diff       lwr      upr     p adj
## MN-MC   13.142045 -27.66679 53.95088 0.8276320
## MS-MC   22.312500 -14.52445 59.14945 0.3829590
## P1A-MC   9.854167 -29.93428 49.64261 0.9123340
## MS-MN    9.170455 -31.63838 49.97929 0.9325770
## P1A-MN  -3.287879 -46.77950 40.20374 0.9970974
## P1A-MS -12.458333 -52.24678 27.33011 0.8392165
leveneTest(SumOfCount ~ Unit, data = anovadata_sub)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.

If you need to “factorize” month (i.e., tell R to read in the month column as a factor rather than as a number) Check to see what “form” your data values are in by running the str() function on your data frame