load packages
library(reshape2)
library(dplyr)
library(ggplot2)
library(forcats)
library(stringr)
Read database into R. If current database changes, just change the path
Database <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Final_Aurelia_Database_Jan11_2023.csv")
Subset to field stations and sites inside of inlets but out of jellyfish aggregations (OS). These stations serve as a “baseline” zooplankton abundance.
Field_data<-subset(Database,Trial.Type=="Field")
Field_data<-subset(Field_data,Location=="OS")
Combine species and life history stage
Field_data$Species_lifestage <- paste(Field_data$Genus.species, Field_data$Life.History.Stage, sep="_")
Combine the following taxa: - Acartia and small calanoida are added to medium calanoida - Calanus pacificus Female Adult, Calanus pacificus Male Adult, and Calanus pacificus C5-adult are combined into Calanus pacficus - Ditrichocorycaeus anlgicus large and small are combined
Field_data <- Field_data %>%
mutate(Species_lifestage_combined = fct_recode(Species_lifestage,
"CALANOIDA_Medium" = "ACARTIA_Copepodite",
"CALANOIDA_Medium" = "ACARTIA_Female, Adult",
"CALANUS PACIFICUS" = "CALANUS PACIFICUS_C5-adult",
"CALANUS PACIFICUS" = "CALANUS PACIFICUS_Female, Adult",
"CALANUS PACIFICUS" = "CALANUS PACIFICUS_Male, Adult",
"DITRICHOCORYCAEUS ANGLICUS" = "DITRICHOCORYCAEUS ANGLICUS_Large",
"DITRICHOCORYCAEUS ANGLICUS" = "DITRICHOCORYCAEUS ANGLICUS_Small"))
Remove centric diatoms
Field_data<- subset(Field_data, Genus.species !="Diatom-Centric")
Sum grouped taxa
Field_data_sum <-aggregate(Density....m3. ~ Sample.Code+Sample.Year+Sample.Date+Site+Station+Broad.Group+Species_lifestage_combined, data = Field_data, sum)
Deal with duplicate station names
# combine station and date for unique stations
Field_data_sum$Station_unique <- paste(Field_data_sum$Station, Field_data_sum$Sample.Date)
unique(Field_data_sum$Station_unique)
## [1] "BUDD1a 07/28/2021" "BUDD1s 08/27/2020" "BUDD2a 08/27/2020"
## [4] "BUDD2s 07/28/2021" "BUDD2s 08/27/2020" "BUDD3 08/27/2021"
## [7] "BUDD3s 08/27/2020" "BUDD4 08/27/2020" "BUDD4s 08/27/2020"
## [10] "BUDD5 08/27/2020" "BUDD6 08/27/2020" "Eld1b 07/28/2021"
## [13] "Eld1s 08/28/2020" "Eld2 08/28/2020" "Eld2b 07/28/2021"
## [16] "Eld2s 08/28/2020" "Eld3b 07/28/2021" "Eld3s 08/28/2020"
## [19] "Eld4 08/28/2020" "Eld4s 08/28/2020" "QM10 08/24/2021"
## [22] "QM11 08/24/2021" "QM3 07/24/2019" "QM3 08/28/2019"
## [25] "QM3a 08/29/2020" "QM3b 07/29/2021" "QM4 07/24/2019"
## [28] "QM4 08/28/2019" "QM4b 07/29/2021" "QM4c 08/22/2021"
## [31] "QM7a 08/29/2020" "QM9 08/23/2021" "SC12 08/26/2021"
## [34] "SC17 08/27/2021" "SC3 09/25/2019" "SC3a 07/27/2021"
## [37] "SC4 09/25/2019" "SC4a 07/27/2021" "SC4c 08/25/2021"
rename duplicate stations
Field_data_sum$Station_unique <-recode(Field_data_sum$Station_unique,
'BUDD2s 08/27/2020'='BUDD2s_2020 08/27/2020',
'QM3 07/24/2019'='QM3s 08/27/2020',
'QM4 07/24/2019'='QM4s 08/27/2020')
#remove date
Field_data_sum$Station_unique<-str_sub(Field_data_sum$Station_unique, end=-11)
Summarize by experiment, taxon, and life history stage.
Field_data_averaged <- Field_data_sum %>%
group_by(Site,Sample.Year,Broad.Group,Species_lifestage_combined) %>%
summarise(
mean = mean(Density....m3.),
sd = sd(Density....m3.),
n = n(),
se = sd / sqrt(n)
)
Mean table
#reformat table
Field_data_mean<-dcast(Field_data_averaged,Broad.Group+Species_lifestage_combined~Site+Sample.Year, value.var = "mean")
Standard Error table
#reformat table
Field_data_SE<-dcast(Field_data_averaged,Broad.Group+Species_lifestage_combined~Site+Sample.Year, value.var = "se")
# Round dataframes to 2 decimal points
Field_data_mean[, -c(1,2)]<-round(Field_data_mean[, -c(1,2)],2)
Field_data_SE[, -c(1,2)]<-round(Field_data_SE[, -c(1,2)],2)
# Exclude first column
columns_to_combine <- names(Field_data_mean)[-c(1,2)]
# Combine means and standard errors for selected columns
Field_densities <- data.frame(
Field_data_mean[, c(1,2)],
sapply(columns_to_combine, function(col) {
paste(Field_data_mean[[col]], " ± ", Field_data_SE[[col]])
})
)
reformat species
Field_densities$Species_lifestage_combined <- gsub("_", " ", Field_densities$Species_lifestage_combined)
write.csv(Field_densities,"/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output/Field_densities.csv", row.names = FALSE)
From this point on, the table can be edited in excel to fit the correct format for the table
Field_stacked_barplot<-ggplot(Field_data_averaged, aes(x = Sample.Year, y = mean, fill = Species_lifestage_combined)) +
geom_bar(stat = "identity",position = "fill") +
labs(x = "Year", y = "Mean Density") +
theme_minimal()+ facet_grid(cols = vars(Site))
Field_stacked_barplot
save plot as image
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
ggsave(plot = Field_stacked_barplot, width = 20, height = 10, dpi = 300, filename = "Field_stacked_barplot.png")