This code compiles the field microplankton flow cytometry data into dinoflagellates and diatoms. It then performs an anova comparing microplankton biovolume by inlet, location, year, and month.

Report is located here: https://rpubs.com/HailaSchultz/field_ANOVA-microplankton

load packages

library(janitor)
library(reshape2)
library(ggplot2)
library(dplyr)
library(stringr)
library(forcats)

Data wrangling

Load data

load raw biovolume data - make sure to read without headers for easier wrangling in the future

data_2019 <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Microplankton/Microplankton_biovolume_2019.csv", header=FALSE)

data_2020 <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Microplankton/Microplankton_biovolume_2020.csv", header=FALSE)

data_Jul_2021 <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Microplankton/Microplankton_biovolume_July_2021.csv", header=FALSE)

data_Aug_2021 <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Microplankton/Microplankton_biovolume_August_2021.csv", header=FALSE)

Load taxa tables - for 2020 and 2021, these files match the taxa to either a diatom or dinoflagellate category

taxa_2020 <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Microplankton/Microplankton_2020_taxa.csv")

taxa_2021 <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Microplankton/Microplankton_2021_taxa.csv")

2019

extract taxa category information

#remove excess columns
taxa_2019 <- data_2019[, -c(1:9)]
#remove excess rows
taxa_2019 <- taxa_2019[-(3:45),]

#change from wide to long format
taxa_2019<-t(taxa_2019)

#remove row names
row.names(taxa_2019) <- NULL

#rename columns
colnames(taxa_2019)[1] ="group"
colnames(taxa_2019)[2] ="taxa"

#convert to dataframe
taxa_2019<-as.data.frame(taxa_2019)

reformat dataframe

#remove first row
data_2019_reformat <- data_2019[-(1),]

#make first row column names
data_2019_reformat<-data_2019_reformat %>%
  row_to_names(row_number = 1)

#wide to long format
data_2019_reformat<-data_2019_reformat %>%
  melt(id.vars=c(1:9))

subset

data_2019_reformat<-subset(data_2019_reformat,Type=="Field")

#remove unnecessary columns
data_2019_reformat <- data_2019_reformat[, -c(2,4,6:8)]

Add category

data_2019_reformat$group <- taxa_2019$group[match(data_2019_reformat$variable, taxa_2019$taxa)]

#recode so all DF categories are the same
data_2019_reformat$group <- gsub("DF-all", "DF", data_2019_reformat$group)
data_2019_reformat$group <- gsub("DF-h", "DF", data_2019_reformat$group)
data_2019_reformat$group <- gsub("DF-m", "DF", data_2019_reformat$group)

reformat

#split date and time into two columns
data_2019_reformat[c('Date', 'Time')] <- str_split_fixed(data_2019_reformat$COLLECTDATE, ' ', 2)
data_2019_reformat <- data_2019_reformat[, -c(4)]

#add month
data_2019_reformat<-data_2019_reformat %>%
  mutate(Month = case_when(
    startsWith(Date, "9") ~ "Sep",
    startsWith(Date, "8") ~ "Aug"
    ))

#rename columns
colnames(data_2019_reformat)[1] ="Station"
colnames(data_2019_reformat)[2] ="Inlet"
colnames(data_2019_reformat)[3] ="Location"
colnames(data_2019_reformat)[4] ="taxa"
colnames(data_2019_reformat)[5] ="biovolume"

#add year
data_2019_reformat$Year="2019"

#reorder columns
data_2019_reformat <- data_2019_reformat[, c(1,2, 3,10, 9, 7,8,6, 4,5)]

#recode location
data_2019_reformat$Location <- gsub("Out of smack", "OS", data_2019_reformat$Location)
data_2019_reformat$Location <- gsub("In smack", "IN", data_2019_reformat$Location)
data_2019_reformat$Location <- gsub("Outside bay", "OB", data_2019_reformat$Location)

#recode inlets
data_2019_reformat$Inlet <- gsub("Quartermaster", "QM", data_2019_reformat$Inlet)
data_2019_reformat$Inlet <- gsub("Sinclair", "SC", data_2019_reformat$Inlet)

remove incomplete rows

data_2019_reformat <- data_2019_reformat[-which(data_2019_reformat$biovolume == ""), ]

2020

reformat taxa table

#remove excess columns
taxa_2020 <- taxa_2020[, -c(2,3,5:26)]
#remove excess rows
taxa_2020 <- taxa_2020[-c(1:7,55:999),]

#rename columns
colnames(taxa_2020)[1] ="taxa"
colnames(taxa_2020)[2] ="group"

reformat dataframe

#remove excess rows at bottom of dataframe
data_2020_reformat <- data_2020[-(27:1000),]

#remove first row
data_2020_reformat <- data_2020_reformat[-(1),]

#make first row column names
data_2020_reformat<-data_2020_reformat %>%
  row_to_names(row_number = 1)

#wide to long format
data_2020_reformat<-data_2020_reformat %>%
  melt(id.vars=c(1:4))

remove unnecessary columns

data_2020_reformat <- data_2020_reformat[, -c(1,2)]

list of station names from database

Database <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Final_Aurelia_Database_Jan11_2023.csv")

#subset
FieldData<-subset(Database, Trial.Type=="Field")
#remove commas from jelly density
FieldData$Jelly.Density <- as.numeric(gsub(",","",FieldData$Jelly.Density..g.m3.))


Stations<-FieldData %>%
  group_by(Site,Location,Station,Sample.Date,Sample.Time,Sample.Year,Latitude, Longitude)%>%
  summarise(jelly_biomass=mean(Jelly.Density))

rename stations

data_2020_reformat$Station <-recode(data_2020_reformat$Station, 
                                 'Budd 1S'='BUDD1s',
                                 'Budd 2C'='BUDD2a',
                                 'Budd 2S'='BUDD2s',
                                 'Budd 3C'='BUDD3',
                                 'Budd 3S'='BUDD3s',
                                 'Budd 4C'='BUDD4',
                                 'Budd 4S'='BUDD4s',
                                 'Budd 5C'='BUDD5',
                                 'Budd 7C'='BUDD7',
                                 'Eld 1C'='Eld1',
                                 'Eld 1S'='Eld1s',
                                 'Eld 2C'='Eld2',
                                 'Eld 2S'='Eld2s',
                                 'Eld 3C'='Eld3',
                                 'Eld 3S'='Eld3s',
                                 'Eld 4C'='Eld4',
                                 'Eld 4S'='Eld4s',
                                 'Eld 5C'='Eld5',
                                 'Qmaster 3C'='QM3a',
                                 'Qmaster 4C'='QM4a',
                                 'Qmaster 5C'='QM5a',
                                 'Qmaster 7C'='QM7a',
                                 'Qmaster 8C'='QM8a',
                                 'Totten 1S'='Totten'
                                )

add inlet and location

#Inlet
data_2020_reformat$Inlet <- Stations$Site[match(data_2020_reformat$Station, Stations$Station)]
data_2020_reformat$Location <- Stations$Location[match(data_2020_reformat$Station, Stations$Station)]

Add category

data_2020_reformat$group <- taxa_2020$group[match(data_2020_reformat$variable, taxa_2020$taxa)]

#recode so all DF categories are the same
data_2020_reformat$group <- gsub("DF-all", "DF", data_2020_reformat$group)
data_2020_reformat$group <- gsub("DF-h", "DF", data_2020_reformat$group)
data_2020_reformat$group <- gsub("DF-m", "DF", data_2020_reformat$group)

reformat

#split date and time into two columns
data_2020_reformat[c('Date', 'Time')] <- str_split_fixed(data_2020_reformat$COLLECTDATE, ' ', 2)
data_2020_reformat <- data_2020_reformat[, -c(2)]

#add month
data_2020_reformat<-data_2020_reformat %>%
  mutate(Month = case_when(
    startsWith(Date, "9") ~ "Sep",
    startsWith(Date, "8") ~ "Aug",
    startsWith(Date,"7") ~ "Jul"
    ))

#add year
data_2020_reformat$Year="2020"

#remove station
data_2020_reformat <- data_2020_reformat[, -c(1)]

#rename columns
colnames(data_2020_reformat)[1] ="taxa"
colnames(data_2020_reformat)[2] ="biovolume"

#reorder columns
data_2020_reformat <- data_2020_reformat[, c(3,4,5,9,10,7,8,6,1,2)]

subset to Quartermaster

data_2020_reformat<-subset(data_2020_reformat,Inlet=="QM")

remove incomplete rows

data_2020_reformat <- data_2020_reformat[-which(data_2020_reformat$biovolume == ""), ]

2021

reformat taxa table

#remove excess columns
taxa_2021 <- taxa_2021[, -c(2:3)]
#remove excess rows
taxa_2021 <- taxa_2021[-c(1:7),]

#rename columns
colnames(taxa_2021)[1] ="taxa"
colnames(taxa_2021)[2] ="group"

reformat dataframes

July

#remove first row
data_2021_Jul_reformat <- data_Jul_2021[-(1),]

#make first row column names
data_2021_Jul_reformat<-data_2021_Jul_reformat %>%
  row_to_names(row_number = 1)

#wide to long format
data_2021_Jul_reformat<-data_2021_Jul_reformat %>%
  melt(id.vars=c(1:2))

Aug

#remove first row
data_2021_Aug_reformat <- data_Aug_2021[-(1),]

#make first row column names
data_2021_Aug_reformat<-data_2021_Aug_reformat %>%
  row_to_names(row_number = 1)

#wide to long format
data_2021_Aug_reformat<-data_2021_Aug_reformat %>%
  melt(id.vars=c(1:2))

I decided only to use August sampes (because of poor-quality samples in July)

data_2021_reformat<-data_2021_Aug_reformat

match with stations

#import data
Stations_2021 <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Microplankton/Microplankton_2021_Stations.csv")

data_2021_reformat$Station <- Stations_2021$Station[match(data_2021_reformat$LABSAMPLENUM, Stations_2021$Sample_num)]

rename stations

data_2021_reformat$Station <-recode(data_2021_reformat$Station, 
                                 'QM 10'='QM10',
                                 'QM 5'='QM5c',
                                 'QM 6'='QM6c',
                                 'QM 8'='QM8c',
                                 'QM 9'='QM9',
                                 'QMH 1'='QM1',
                                 'QMH 3'='QM3',
                                 'QMH 5'='QM5',
                                 'SC 4'='SC4c',
                                 'SC 5'='SC5d',
                                 'SC 6'='SC6c',
                                 'SC 7'='SC7',
                                 'SC 8'='SC8',
                                 'Sinclair 1'='SC1a',
                                 'Sinclair 3'='SC3a',
                                 'Sinclair 6'='SC6a'
                                )

remove old station column

data_2021_reformat <- data_2021_reformat[, -c(1)]

add inlet and location

#Inlet
data_2021_reformat$Inlet <- Stations$Site[match(data_2021_reformat$Station, Stations$Station)]
data_2021_reformat$Location <- Stations$Location[match(data_2021_reformat$Station, Stations$Station)]

Add category

data_2021_reformat$group <- taxa_2021$group[match(data_2021_reformat$variable, taxa_2021$taxa)]

#recode so all DF categories are the same
data_2021_reformat$group <- gsub("DF-all", "DF", data_2021_reformat$group)
data_2021_reformat$group <- gsub("DF-h", "DF", data_2021_reformat$group)
data_2021_reformat$group <- gsub("DF-m", "DF", data_2021_reformat$group)

#make NA in group DF
data_2021_reformat$group <- replace(data_2021_reformat$group, is.na(data_2021_reformat$group), "DF")

reformat

#split date and time into two columns
data_2021_reformat[c('Date', 'Time')] <- str_split_fixed(data_2021_reformat$COLLECTDATE, ' ', 2)
data_2021_reformat <- data_2021_reformat[, -c(1)]

#add month
data_2021_reformat<-data_2021_reformat %>%
  mutate(Month = case_when(
    startsWith(Date, "9") ~ "Sep",
    startsWith(Date, "8") ~ "Aug",
    startsWith(Date,"7") ~ "Jul"
    ))

#add year
data_2021_reformat$Year="2021"

#rename columns
colnames(data_2021_reformat)[1] ="taxa"
colnames(data_2021_reformat)[2] ="biovolume"

#reorder columns
data_2021_reformat <- data_2021_reformat[, c(3,4,5,9,10,7,8,6,1,2)]

remove incomplete rows

data_2021_reformat <- data_2021_reformat[-which(data_2021_reformat$biovolume == ""), ]
data_2021_reformat <- data_2021_reformat[complete.cases(data_2021_reformat), ]

Add years together

Microplankton_QMSC<-rbind(data_2019_reformat,data_2020_reformat,data_2021_reformat)

2-way ANOVA:location and group

subset

#just diatoms and dinoflagellates
Microplankton_QMSC<-subset(Microplankton_QMSC, group==c("DF","DT"))

add up diatom and dinoflagellates per Location

Microplankton_QMSC$biovolume<-as.numeric(Microplankton_QMSC$biovolume)
Microplankton_QMSC_sum<-aggregate(data=Microplankton_QMSC, biovolume~Year+Month+Inlet+Location+Station+group, FUN=sum)

unransformed model

Micro_mod1 <- aov(biovolume ~ group+Location+Month+Inlet+Year+group:Location+Location:Month+Location:Inlet+Location:Year, data = Microplankton_QMSC_sum)

summary(Micro_mod1)

##                Df Sum Sq Mean Sq F value Pr(>F)  
## group           1   82.5   82.52   2.144  0.156  
## Location        2  358.8  179.42   4.662  0.019 *
## Month           1   60.9   60.88   1.582  0.220  
## Inlet           1   21.9   21.88   0.568  0.458  
## Year            2   23.3   11.67   0.303  0.741  
## group:Location  2   24.5   12.25   0.318  0.730  
## Location:Month  2   40.5   20.23   0.526  0.598  
## Location:Inlet  2   51.6   25.79   0.670  0.521  
## Location:Year   3   36.2   12.06   0.313  0.816  
## Residuals      25  962.2   38.49                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(Micro_mod1, which = "Location")

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = biovolume ~ group + Location + Month + Inlet + Year + group:Location + Location:Month + Location:Inlet + Location:Year, data = Microplankton_QMSC_sum)
## 
## $Location
##            diff        lwr        upr     p adj
## OB-IN -6.321283 -12.785583  0.1430165 0.0561636
## OS-IN -5.545330 -10.930047 -0.1606130 0.0426505
## OS-OB  0.775953  -6.072694  7.6246000 0.9571216

check normality assumptions

ggplot(Microplankton_QMSC_sum, aes(x=biovolume)) + geom_histogram()

qqnorm(Micro_mod1$residuals)
qqline(Micro_mod1$residuals)

shapiro.test(Micro_mod1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  Micro_mod1$residuals
## W = 0.91217, p-value = 0.0034

Histogram is logarithmic, qqplot looks weird, and shapiro test is significant, need to transform

sqrt transformation

Micro_mod2 <- aov(sqrt(biovolume) ~ group+Location+Month+Inlet+Year+group:Location+Location:Month+Location:Inlet+Location+Year, data = Microplankton_QMSC_sum)

check normality assumptions

ggplot(Microplankton_QMSC_sum, aes(x=sqrt(biovolume))) + geom_histogram()

qqnorm(Micro_mod2$residuals)
qqline(Micro_mod2$residuals)

shapiro.test(Micro_mod2$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  Micro_mod2$residuals
## W = 0.96136, p-value = 0.1652

boxplot(sqrt(biovolume) ~ Location, xlab='Location', ylab='Density', data=Microplankton_QMSC_sum)

boxplot(sqrt(biovolume) ~ group, xlab='group', ylab='Density', data=Microplankton_QMSC_sum)

looks good!!!

summary(Micro_mod2)

##                Df Sum Sq Mean Sq F value  Pr(>F)   
## group           1   3.74   3.736   2.829 0.10369   
## Location        2  18.45   9.226   6.986 0.00346 **
## Month           1   3.95   3.949   2.990 0.09480 . 
## Inlet           1   1.78   1.785   1.351 0.25487   
## Year            2   0.09   0.046   0.035 0.96607   
## group:Location  2   0.62   0.311   0.236 0.79156   
## Location:Month  2   1.97   0.983   0.744 0.48421   
## Location:Inlet  2   3.67   1.837   1.391 0.26544   
## Residuals      28  36.98   1.321                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(Micro_mod2, which = "Location")

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = sqrt(biovolume) ~ group + Location + Month + Inlet + Year + group:Location + Location:Month + Location:Inlet + Location + Year, data = Microplankton_QMSC_sum)
## 
## $Location
##             diff       lwr        upr     p adj
## OB-IN -1.4481444 -2.637702 -0.2585872 0.0146027
## OS-IN -1.2462532 -2.237146 -0.2553602 0.0114726
## OS-OB  0.2018912 -1.058393  1.4621758 0.9172959

none of the interaction terms are significant, so it looks like we can interpret main effects. Biovolume is higher inside aggregation than outside of aggregation and outside of bay.

without interaction terms

Micro_mod3 <- aov(sqrt(biovolume) ~ group+Location+Month+Inlet+Year, data = Microplankton_QMSC_sum)

check normality assumptions

qqnorm(Micro_mod3$residuals)
qqline(Micro_mod3$residuals)

shapiro.test(Micro_mod3$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  Micro_mod3$residuals
## W = 0.97317, p-value = 0.4194

summary(Micro_mod3)

##             Df Sum Sq Mean Sq F value  Pr(>F)   
## group        1   3.74   3.736   2.938 0.09564 . 
## Location     2  18.45   9.226   7.254 0.00238 **
## Month        1   3.95   3.949   3.105 0.08705 . 
## Inlet        1   1.78   1.785   1.403 0.24441   
## Year         2   0.09   0.046   0.036 0.96478   
## Residuals   34  43.24   1.272                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(Micro_mod3, which = "Location")

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = sqrt(biovolume) ~ group + Location + Month + Inlet + Year, data = Microplankton_QMSC_sum)
## 
## $Location
##             diff       lwr        upr     p adj
## OB-IN -1.4481444 -2.604215 -0.2920737 0.0113916
## OS-IN -1.2462532 -2.209252 -0.2832543 0.0087819
## OS-OB  0.2018912 -1.022916  1.4266982 0.9142241

make plot

get sample sizes

#count sample sizes per category
Microplankton_QMSC_sum %>%
  group_by(group,Location) %>%
  summarize(count = n())

## # A tibble: 6 × 3
## # Groups:   group [2]
##   group Location count
##   <chr> <chr>    <int>
## 1 DF    IN          10
## 2 DF    OB           4
## 3 DF    OS           7
## 4 DT    IN          10
## 5 DT    OB           4
## 6 DT    OS           7

#recode stations
Microplankton_QMSC_sum <- Microplankton_QMSC_sum %>%
                   mutate(Location = fct_recode(Location, 
                                     "Inside Aggregation" = "IN", 
                                     "Outside Aggregation" = "OS",
                                     "Outside Bay"="OB"))
#recode group
Microplankton_QMSC_sum <- Microplankton_QMSC_sum %>%
                   mutate(group = fct_recode(group, 
                                     "Dinoflagellates" = "DF", 
                                     "Diatoms" = "DT"))

Microplankton_QMSC_sum$Location <- factor(Microplankton_QMSC_sum$Location,
                         c("Inside Aggregation", "Outside Aggregation", "Outside Bay"))

micro_plot<- ggplot(Microplankton_QMSC_sum, aes(x=group, y=sqrt(biovolume), fill=Location)) + 
  geom_boxplot() +
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank(), axis.line = element_line(colour = "black"))+theme_classic()+
  xlab(bquote('Microplankton Taxon'))+ 
  ylab(expression(plain('sqrt (Biovolume)') ~ plain('(' ~ mm^3 ~ '/L)')))+
  scale_fill_manual(values = c("Inside Aggregation" = "#619CFF", "Outside Aggregation" = "#F8766D", "Outside Bay" = "#00BA38"))+
  theme(axis.text.y=element_text(size=15,colour="black"),
        legend.position=c(0.28, 0.85),
        legend.text=element_text(size=15,colour="black"),
        legend.title=element_text(size=15,colour="black"),
        axis.title.y=element_text(size=20,colour="black"),
        axis.line = element_line(colour = "black"),
        axis.title.x=element_blank(),
        axis.text.x=element_text(size=15,colour="black"))+
  annotate("text",x=0.75, y=4.1, label= "10",size=6)+
  annotate("text",x=1, y=1.4, label= "7",size=6)+
  annotate("text",x=1.25, y=1.3, label= "4",size=6)+
  annotate("text",x=1.75, y=5.7, label= "10",size=6)+
  annotate("text",x=2, y=3.4, label= "7",size=6)+
  annotate("text",x=2.25, y=1.7, label= "4",size=6)+ 
  annotate("text",x=0.75, y=3.7, label= "*",size=10)+
  annotate("text",x=1.75, y=5.3, label= "*",size=10)+ 
  scale_y_continuous(breaks=seq(0,7,1),expand = c(0, 0), limits = c(0, 7))+
  theme(plot.margin=margin(0.5, 0.5, 0.5, 0.5, unit = "cm"))
micro_plot

save plot

setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(filename = "Field_Microplankton_ANOVA.png", plot = micro_plot, width = 164, height = 164, units="mm", device='png', dpi=600)
ggsave(filename = "Field_Microplankton_ANOVA.tif", plot = micro_plot, width = 164, height = 164, units="mm", device='tiff', dpi=600)

Other factors

Inlet

ANOVA

#remove 2019
Micro_mod3 <- aov(sqrt(biovolume) ~ group+Inlet, data = Microplankton_QMSC_sum)

summary(Micro_mod3)

##             Df Sum Sq Mean Sq F value Pr(>F)
## group        1   3.74   3.736   2.294  0.138
## Inlet        1   3.99   3.994   2.452  0.125
## Residuals   39  63.53   1.629

TukeyHSD(Micro_mod3, which = "Inlet")

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = sqrt(biovolume) ~ group + Inlet, data = Microplankton_QMSC_sum)
## 
## $Inlet
##            diff      lwr      upr     p adj
## SC-QM 0.6350403 -0.18522 1.455301 0.1254383

TukeyHSD(Micro_mod3, which = "group")

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = sqrt(biovolume) ~ group + Inlet, data = Microplankton_QMSC_sum)
## 
## $group
##                              diff        lwr      upr    p adj
## Diatoms-Dinoflagellates 0.5965339 -0.2001372 1.393205 0.137948

Month

ANOVA

#remove 2019
Micro_mod4 <- aov(sqrt(biovolume) ~ group+Month, data = Microplankton_QMSC_sum)

summary(Micro_mod4)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## group        1   3.74   3.736   2.380 0.1310  
## Month        1   6.30   6.298   4.012 0.0522 .
## Residuals   39  61.22   1.570                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(Micro_mod4, which = "Month")

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = sqrt(biovolume) ~ group + Month, data = Microplankton_QMSC_sum)
## 
## $Month
##              diff       lwr        upr     p adj
## Sep-Aug -1.106594 -2.224105 0.01091744 0.0521703

TukeyHSD(Micro_mod4, which = "group")

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = sqrt(biovolume) ~ group + Month, data = Microplankton_QMSC_sum)
## 
## $group
##                              diff       lwr      upr     p adj
## Diatoms-Dinoflagellates 0.5965339 -0.185561 1.378629 0.1309593

Year

ANOVA

#remove 2019
Micro_mod3 <- aov(sqrt(biovolume) ~ group+Year, data = Microplankton_QMSC_sum)

summary(Micro_mod3)

##             Df Sum Sq Mean Sq F value Pr(>F)
## group        1   3.74   3.736   2.184  0.148
## Year         2   2.52   1.260   0.736  0.485
## Residuals   38  65.00   1.711

TukeyHSD(Micro_mod3, which = "Year")

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = sqrt(biovolume) ~ group + Year, data = Microplankton_QMSC_sum)
## 
## $Year
##                diff        lwr      upr     p adj
## 2020-2019 0.1673280 -1.1533304 1.487986 0.9487996
## 2021-2019 0.5483470 -0.5882947 1.684989 0.4741963
## 2021-2020 0.3810191 -0.8770128 1.639051 0.7422302

TukeyHSD(Micro_mod3, which = "group")

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = sqrt(biovolume) ~ group + Year, data = Microplankton_QMSC_sum)
## 
## $group
##                              diff        lwr      upr     p adj
## Diatoms-Dinoflagellates 0.5965339 -0.2205534 1.413621 0.1476607

16_field_ANOVA-microplankton

2023-08-28

Data wrangling

Load data

2019

2020

2021

Add years together

2-way ANOVA:location and group

unransformed model

sqrt transformation

without interaction terms

make plot

Other factors

Inlet

Month

Year