Set up

setwd("C:/Users/16mfr/OneDrive/Desktop/Capstone/Data and Code/Without compromised")
#Load data.

if(!require("car")) install.packages("car")
if(!require("dplyr")) install.packages("dplyr")
library(car)
library(dplyr)
library(ggplot2)

Read and prep data

#Columns: siteID, unit, month, veg, cpue
Site.CPUE <- read.csv("Site_CPUE.csv")

#log transform CPUE
Site.CPUE$logCPUE <- log(Site.CPUE$CPUE+1)
shapiro.test(Site.CPUE$logCPUE)
## 
##  Shapiro-Wilk normality test
## 
## data:  Site.CPUE$logCPUE
## W = 0.95506, p-value = 0.004245
#Change month from number to short text
Site.CPUE$Month[Site.CPUE$Month == "5"] = "May"
Site.CPUE$Month[Site.CPUE$Month == "6"] = "June"
Site.CPUE$Month[Site.CPUE$Month == "7"] = "July"
Site.CPUE$Month[Site.CPUE$Month == "8"] = "August"
Site.CPUE$Month[Site.CPUE$Month == "10"] = "October"

#put months in order (not alphabetical)
Site.CPUE$Month <- factor(Site.CPUE$Month , levels=c('May', 'June', 'July', 'August', 'October'))

#Change veg type from long to short text
Site.CPUE$Vegetation.Type[Site.CPUE$Vegetation.Type == "Open Water"] = "OW"
Site.CPUE$Vegetation.Type[Site.CPUE$Vegetation.Type == "Submerged Aquatic Vegetation"] = "SAV"
Site.CPUE$Vegetation.Type[Site.CPUE$Vegetation.Type == "Mixed Emergent"] = "ME"
Site.CPUE$Vegetation.Type[Site.CPUE$Vegetation.Type == "River Bulrush"] = "RB"

Visualize and Test data

# Box plots of each variable with CPUE (count)
boxplot(logCPUE ~ Vegetation.Type, data = Site.CPUE, xlab = "Vegetation Type", ylab = "logCPUE")

boxplot(logCPUE ~ Unit, data = Site.CPUE, xlab = "Unit", ylab = "logCPUE")

boxplot(logCPUE ~ Month, data = Site.CPUE, aes(x=factor(Month, level=c('May','June','July','August','October'), xlab = "Month", ylab = "logCPUE")))

# Testing normality of all CPUE data with Shapiro-Wilks
shapiro.test(Site.CPUE$logCPUE)
## 
##  Shapiro-Wilk normality test
## 
## data:  Site.CPUE$logCPUE
## W = 0.95506, p-value = 0.004245
# ANOVA - all variables, residuals
ANOVA_FULL = aov(logCPUE ~ Vegetation.Type + Unit + Month, data = Site.CPUE)
summary(ANOVA_FULL)
##                 Df Sum Sq Mean Sq F value   Pr(>F)    
## Vegetation.Type  5  26.20   5.239   3.137  0.01277 *  
## Unit             4  24.77   6.193   3.708  0.00838 ** 
## Month            4  59.91  14.977   8.966 6.02e-06 ***
## Residuals       73 121.94   1.670                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqPlot(resid(ANOVA_FULL)) # Checking for normal distribution. If it does not meet assumptions, mention it in report

## [1] 85  9
summary(ANOVA_FULL)
##                 Df Sum Sq Mean Sq F value   Pr(>F)    
## Vegetation.Type  5  26.20   5.239   3.137  0.01277 *  
## Unit             4  24.77   6.193   3.708  0.00838 ** 
## Month            4  59.91  14.977   8.966 6.02e-06 ***
## Residuals       73 121.94   1.670                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# ANOVA of vegetation and count  # tukey of ANOVA  # Levene for normal variance in data (is variation significantly different?)
ANOVA_VEG = aov(logCPUE ~ Vegetation.Type, data = Site.CPUE)
summary(ANOVA_VEG)
##                 Df Sum Sq Mean Sq F value Pr(>F)  
## Vegetation.Type  5   26.2   5.239   2.054 0.0797 .
## Residuals       81  206.6   2.551                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqPlot(resid(ANOVA_VEG))

## [1] 85 80
TukeyHSD(ANOVA_VEG)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = logCPUE ~ Vegetation.Type, data = Site.CPUE)
## 
## $Vegetation.Type
##                     diff          lwr       upr     p adj
## ME-Forest     1.65202679 -1.039624689 4.3436783 0.4768803
## OW-Forest     1.74011034 -0.457613889 3.9378346 0.2016105
## RB-Forest     1.88765349 -3.147965335 6.9232723 0.8823193
## SAV-Forest    2.02069337  0.001954754 4.0394320 0.0496341
## Typha-Forest  1.00685133 -1.510958089 3.5246607 0.8509461
## OW-ME         0.08808355 -2.109640684 2.2858078 0.9999968
## RB-ME         0.23562670 -4.799992130 5.2712455 0.9999931
## SAV-ME        0.36866657 -1.650072041 2.3874052 0.9946533
## Typha-ME     -0.64517547 -3.162984884 1.8726339 0.9751303
## RB-OW         0.14754315 -4.642285771 4.9373721 0.9999991
## SAV-OW        0.28058302 -1.007947025 1.5691131 0.9879670
## Typha-OW     -0.73325902 -2.714260872 1.2477428 0.8877685
## SAV-RB        0.13303987 -4.577350223 4.8434300 0.9999994
## Typha-RB     -0.88080217 -5.825681694 4.0640774 0.9952425
## Typha-SAV    -1.01384204 -2.794202152 0.7665181 0.5603715
leveneTest(CPUE ~ Vegetation.Type, data = Site.CPUE)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.
# ANOVA of month and count  # tukey of ANOVA  # levene test
ANOVA_MONTH = aov(logCPUE ~ Month, data = Site.CPUE)
summary(ANOVA_MONTH)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## Month        4  61.64  15.410   7.383 3.96e-05 ***
## Residuals   82 171.17   2.087                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqPlot(resid(ANOVA_MONTH))

## [1] 85 50
TukeyHSD(ANOVA_MONTH)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = logCPUE ~ Month, data = Site.CPUE)
## 
## $Month
##                       diff        lwr        upr     p adj
## June-May        0.36206651 -0.9001131  1.6242462 0.9298306
## July-May        2.33797534  0.9601528  3.7157979 0.0000877
## August-May      1.49515614  0.2329765  2.7573358 0.0119970
## October-May     0.42757606 -0.9822593  1.8374114 0.9153183
## July-June       1.97590882  0.5564149  3.3954028 0.0018987
## August-June     1.13308963 -0.1744522  2.4406314 0.1210227
## October-June    0.06550954 -1.3850779  1.5160969 0.9999420
## August-July    -0.84281919 -2.2623131  0.5766747 0.4665436
## October-July   -1.91039928 -3.4626561 -0.3581425 0.0081240
## October-August -1.06758008 -2.5181675  0.3830073 0.2506608
leveneTest(CountOfSite.Number ~ Month, data = Site.CPUE)
# ANOVA of unit and count  # tukey of ANOVA  # levene test
ANOVA_UNIT = aov(logCPUE ~ Unit, data = Site.CPUE)
summary(ANOVA_UNIT)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Unit         4  32.16   8.039   3.285  0.015 *
## Residuals   82 200.65   2.447                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqPlot(resid(ANOVA_UNIT))

## [1] 85 80
TukeyHSD(ANOVA_UNIT)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = logCPUE ~ Unit, data = Site.CPUE)
## 
## $Unit
##                diff        lwr       upr     p adj
## MN-MC   -0.24567367 -1.7092113 1.2178640 0.9899684
## MS-MC   -1.19549842 -2.6291648 0.2381680 0.1471773
## P1A-MC  -1.11611791 -2.8750712 0.6428354 0.3979769
## SHR-MC   0.32879366 -1.1517624 1.8093497 0.9715864
## MS-MN   -0.94982475 -2.2979366 0.3982871 0.2921422
## P1A-MN  -0.87044424 -2.5603919 0.8195035 0.6059952
## SHR-MN   0.57446732 -0.8234070 1.9723417 0.7813912
## P1A-MS   0.07938052 -1.5847649 1.7435260 0.9999279
## SHR-MS   1.52429208  0.1577235 2.8908606 0.0210144
## SHR-P1A  1.44491156 -0.2597958 3.1496189 0.1357174
leveneTest(CountOfSite.Number ~ Unit, data = Site.CPUE)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.

Is unit still significant when SHR is excluded?

##WITHOUT SHR
wetland_unit <- subset(Site.CPUE, Unit %in% c('MC','MN','MS','P1A'))
boxplot(logCPUE ~ Unit, data = wetland_unit, xlab = "Unit", ylab = "logCPUE")

# ANOVA of unit (without SHR) and count  # tukey of ANOVA  # levene test
ANOVA_UNIT_2 = aov(logCPUE ~ Unit, data = wetland_unit)
summary(ANOVA_UNIT_2)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Unit         3   18.7   6.233   3.307 0.0256 *
## Residuals   64  120.6   1.885                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qqPlot(resid(ANOVA_UNIT_2))

## [1]  3 41
TukeyHSD(ANOVA_UNIT_2)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = logCPUE ~ Unit, data = wetland_unit)
## 
## $Unit
##               diff       lwr          upr     p adj
## MN-MC  -0.24567367 -1.460377  0.969029888 0.9505801
## MS-MC  -1.19549842 -2.385410 -0.005587342 0.0485065
## P1A-MC -1.11611791 -2.576010  0.343774081 0.1926483
## MS-MN  -0.94982475 -2.068728  0.169078016 0.1237256
## P1A-MN -0.87044424 -2.273063  0.532174614 0.3655428
## P1A-MS  0.07938052 -1.301823  1.460584081 0.9987453
leveneTest(CountOfSite.Number ~ Unit, data = wetland_unit)
## Warning in leveneTest.default(y = y, group = group, ...): group coerced to
## factor.