Loading packages

if(!require("car")) install.packages("car")
if(!require("dplyr")) install.packages("dplyr")
library(car)
library(dplyr)

Set up environment and output language

ANOVA of Vegetation, Unit, month on TOTAL LOTU CPUE (i.e., Total Species Count / Effort)

# Note - CPUE for Inverts is total number of inverts at site (i.e., “UE” = 1); CPUE for Fyke Nets is total number of individual fish at site per net night (i.e., “UE” = 2). CPUE for Electrofishing is total number of individual fish per site per second or minute (i.e., “UE” = ~600 seconds or 10 minutes)

#Load data.
#Columns: siteID, Unit, month, veg, cpue
Data_EF <- read.csv("C:/Users/hayde/OneDrive/Desktop/UMichigan/Masters project/code and anlysis/Electrofishing/ANOVA/final versions/EF_anova_SNWR_final.csv")

Data_EF$month[Data_EF$month == "6"] = "June"
Data_EF$month[Data_EF$month == "7"] = "July"
Data_EF$month[Data_EF$month == "8"] = "August"

# Box plots of each variable with CPUE (count)
boxplot(cpue ~ Vegetation.Type, data = Data_EF, xlab = "Vegetation type", ylab = "Count")

boxplot(cpue ~ Unit, data = Data_EF, xlab = "Unit", ylab = "Count")

boxplot(cpue ~ month, data = Data_EF, xlab = "Month", ylab = "Count")

# Testing normality of all CPUE data with Shapiro-Wilks
shapiro.test(Data_EF$cpue)
## 
##  Shapiro-Wilk normality test
## 
## data:  Data_EF$cpue
## W = 0.81967, p-value = 0.0001529
# ANOVA - all variables, residuals
ANOVA_FULL = aov(cpue ~ Vegetation.Type + Unit + month, data = Data_EF)
library(car)

summary(ANOVA_FULL)
##                 Df Sum Sq Mean Sq F value Pr(>F)  
## Vegetation.Type  1   1.35   1.347   0.630 0.4358  
## Unit             4  25.62   6.404   2.997 0.0408 *
## month            2  11.37   5.684   2.660 0.0923 .
## Residuals       22  47.01   2.137                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqPlot(resid(ANOVA_FULL)) # Checking for normal distribution. If it does not meet assumptions, mention it in report

## [1] 9 1
qqPlot(ANOVA_FULL)

## [1] 1 9
summary(ANOVA_FULL)
##                 Df Sum Sq Mean Sq F value Pr(>F)  
## Vegetation.Type  1   1.35   1.347   0.630 0.4358  
## Unit             4  25.62   6.404   2.997 0.0408 *
## month            2  11.37   5.684   2.660 0.0923 .
## Residuals       22  47.01   2.137                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ANOVA of vegetation and count  # tukey of ANOVA  # Levene for normal variance in data (is variation significantly different?)
ANOVA_VEG = aov(cpue ~ Vegetation.Type, data = Data_EF)
summary(ANOVA_VEG)
##                 Df Sum Sq Mean Sq F value Pr(>F)
## Vegetation.Type  1   1.35   1.347   0.449  0.508
## Residuals       28  83.99   3.000
qqPlot(resid(ANOVA_VEG))

## [1] 1 9
TukeyHSD(ANOVA_VEG)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = cpue ~ Vegetation.Type, data = Data_EF)
## 
## $Vegetation.Type
##                                              diff        lwr      upr     p adj
## Submerged Aquatic Vegetation-Open Water 0.4237164 -0.8717463 1.719179 0.5083541
leveneTest(cpue ~ Vegetation.Type, data = Data_EF)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
# ANOVA of month and count  # tukey of ANOVA  # levene test
ANOVA_month = aov(cpue ~ month, data = Data_EF)
summary(ANOVA_month)
##             Df Sum Sq Mean Sq F value Pr(>F)
## month        2  12.26   6.130   2.265  0.123
## Residuals   27  73.08   2.707
qqPlot(resid(ANOVA_month))

## [1] 1 9
TukeyHSD(ANOVA_month)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = cpue ~ month, data = Data_EF)
## 
## $month
##                   diff        lwr       upr     p adj
## July-August -1.0159359 -2.8493449 0.8174731 0.3683270
## June-August  0.5785143 -1.2037629 2.3607916 0.7033060
## June-July    1.5944502 -0.2797566 3.4686570 0.1067658
leveneTest(cpue ~ month, data = Data_EF)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
# ANOVA of Unit and count  # tukey of ANOVA  # levene test
ANOVA_Unit = aov(cpue ~ Unit, data = Data_EF)
summary(ANOVA_Unit)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Unit         4  25.86   6.466   2.718 0.0525 .
## Residuals   25  59.48   2.379                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqPlot(resid(ANOVA_Unit))

## [1] 1 9
TukeyHSD(ANOVA_Unit)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = cpue ~ Unit, data = Data_EF)
## 
## $Unit
##               diff       lwr        upr     p adj
## MN-MC   -1.0434913 -3.570122 1.48313979 0.7440745
## MS-MC   -2.4694665 -4.996098 0.05716456 0.0576830
## SHR-MC  -1.2743722 -3.801003 1.25225889 0.5834387
## SPD-MC  -2.0852079 -4.472650 0.30223403 0.1080782
## MS-MN   -1.4259752 -4.290906 1.43895513 0.5954205
## SHR-MN  -0.2308809 -3.095811 2.63404946 0.9992551
## SPD-MN  -1.0417166 -3.784679 1.70124534 0.7969757
## SHR-MS   1.1950943 -1.669836 4.06002469 0.7371729
## SPD-MS   0.3842586 -2.358703 3.12722058 0.9935968
## SPD-SHR -0.8108357 -3.553798 1.93212625 0.9058090
leveneTest(cpue ~ Unit, data = Data_EF)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
##str(Data_EF)

If you need to “factorize” month (i.e., tell R to read in the month column as a factor rather than as a number)

Check to see what “form” your data values are in by running the str() function on your data frame

#Factorized month ("factmo") to perform ANOVA, otherwise R treats as numerical data.
# Can be prevented by the way data is managed in Access

#factorized_month = as.factor(monthCPUE$month)
##factomonth = data.frame(factorized_month, monthCPUE)
##head(factomonth)