This code calculates the means for environmental data collected from the CTD and creates Table I in the manuscript. The original CTD files were converted from original .hex format using SeaBird software outside of R. They were then trimmed individually and the small boat file fluorescence values were calibrated using this code: https://rpubs.com/HailaSchultz/field_CTD-profiles
This code also provides a correlation between salinity and temperature in sinclair inlet and quartermaster harbor, as a justification for excluding salinity from generalized models.
This code also conducts an ANOVA of fluorescence inside and outside of aggeregations
The final purpose of this code is to add nutrient data to the environmental spreadsheet, primarily for use in NMDS plots.
Report is here: https://rpubs.com/HailaSchultz/field_CTD-table
load packages
library(tidyverse)
library(dplyr)
library(ggpubr)
library(janitor)
list of station names from database and calculate jelly density
Database <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Final_Aurelia_Database_Jan11_2023.csv")
#subset
FieldData<-subset(Database, Trial.Type=="Field")
#remove commas from jelly density
FieldData$Jelly.Density <- as.numeric(gsub(",","",FieldData$Jelly.Density..g.m3.))
Stations<-FieldData %>%
group_by(Site,Location,Station,Sample.Date,Sample.Year,Latitude, Longitude)%>%
summarise(jelly_biomass=mean(Jelly.Density))
import CTD downcasts into one file
#set working directory to folder with CTD files
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/CTD/CTD_Downcasts-edited")
CTDCombined <-
list.files(pattern = "*.csv") %>%
map_df(~read_csv(.))
loop through stations: interpolate to equal intervals and take mean
results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$salinity, method = "linear", n = 100))
summarize <- table %>%
summarise(
FactorValue = factor_value,
Smean = mean(y),
Smin = min(y),
Smax = max(y),
Ssd = sd(y)
)
results_list[[factor_value]] <- summarize
}
salinity <- bind_rows(results_list)
results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$Temp, method = "linear", n = 100))
summarize <- table %>%
summarise(
FactorValue = factor_value,
Tmean = mean(y),
Tmin = min(y),
Tmax = max(y),
Tsd = sd(y)
)
results_list[[factor_value]] <- summarize
}
Temp <- bind_rows(results_list)
results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$Oxygen, method = "linear", n = 100))
summarize <- table %>%
summarise(
FactorValue = factor_value,
Omean = mean(y),
Omin = min(y),
Omax = max(y),
Osd = sd(y)
)
results_list[[factor_value]] <- summarize
}
Oxygen <- bind_rows(results_list)
results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$pH, method = "linear", n = 100))
summarize <- table %>%
summarise(
FactorValue = factor_value,
pHmean = mean(y),
pHmin = min(y),
pHmax = max(y),
pHsd = sd(y)
)
results_list[[factor_value]] <- summarize
}
pH <- bind_rows(results_list)
results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$fluorescence, method = "linear", n = 100))
summarize <- table %>%
summarise(
FactorValue = factor_value,
Fmean = mean(y),
Fmin = min(y),
Fmax = max(y),
Fsd = sd(y)
)
results_list[[factor_value]] <- summarize
}
fluorescence <- bind_rows(results_list)
Env<-merge(salinity, Oxygen,by=c("FactorValue"))
Env1<-merge(Env,Temp,by=c("FactorValue"),all.x = TRUE)
Env2<-merge(Env1,pH,by=c("FactorValue"))
Env3<-merge(Env2, fluorescence,by=c("FactorValue"))
#rename station column
Env3 <- Env3 %>%
rename("Station" = "FactorValue")
get station names to see if they match up
unique(Stations$Station)
## [1] "BUDD1" "BUDD3a" "BUDD4a" "BUDD7" "BUDD1a" "BUDD1s" "BUDD2a" "BUDD2s"
## [9] "BUDD3" "BUDD3s" "BUDD4" "BUDD4s" "BUDD5" "BUDD6" "Eld1" "Eld1b"
## [17] "Eld1s" "Eld2" "Eld2b" "Eld2s" "Eld3b" "Eld3s" "Eld4" "Eld4s"
## [25] "QM1" "QM1a" "QM1b" "QM1c" "QM2" "QM2a" "QM2b" "QM2c"
## [33] "QM3c" "QM4a" "QM5a" "QM5c" "QM6c" "QM12" "QM5" "QM5b"
## [41] "QM6" "QM6b" "QM8a" "QM10" "QM11" "QM3" "QM3a" "QM3b"
## [49] "QM4" "QM4b" "QM4c" "QM7a" "QM9" "SC1" "SC10" "SC11"
## [57] "SC14" "SC16" "SC1a" "SC2" "SC2a" "SC2c" "SC3c" "SC5c"
## [65] "SC5d" "SC6c" "SC7" "SC5" "SC5a" "SC6" "SC6a" "SC8"
## [73] "SC12" "SC17" "SC3" "SC3a" "SC4" "SC4a" "SC4c"
unique(Env3$Station)
## [1] "2020_BUDD2s" "BUDD1" "BUDD1s" "BUDD2a" "BUDD3"
## [6] "BUDD3s" "BUDD4" "BUDD4s" "BUDD5" "BUDD6"
## [11] "BUDD7" "Eld1" "Eld1s" "Eld2" "Eld2s"
## [16] "Eld3" "Eld3s" "Eld4" "Eld4s" "Eld5"
## [21] "Gig1" "Gig1a" "Gig2" "Gig2a" "Gig3"
## [26] "Gig4" "QM10" "QM11" "QM12" "QM1a"
## [31] "QM1c" "QM2a" "QM2c" "QM3a" "QM3c"
## [36] "QM4a" "QM4c" "QM5a" "QM5c" "QM6c"
## [41] "QM7a" "QM8_8-23-21" "QM8a" "QM9" "SC10"
## [46] "SC11" "SC12" "SC14" "SC16" "SC17"
## [51] "SC2c" "SC3c" "SC4c" "SC5a" "SC5c"
## [56] "SC6c" "SC7" "SC8"
Rename stations so the match between spreadsheets
#deal with duplicate station names
Stations$Station[9]="BUDD2s_2020"
Stations$Station[26]="QM1s"
Stations$Station[31]="QM2s"
Stations$Station[42]="QM5s"
Stations$Station[45]="QM6s"
Stations$Station[51]="QM3s"
Stations$Station[55]="QM4s"
recode to match database names
Env3$Station <-recode(Env3$Station, '2020_BUDD2s'='BUDD2s_2020', 'Budd1a'='BUDD1a', 'Budd2s'='BUDD2s','Budd3a'='BUDD3a','Budd4a'='BUDD4a')
add station info and jelly biomass
CTD_Summary <- merge(Stations,Env3,by.x = "Station", all.x = TRUE)
remove stations that didn’t get full water column profile due to low battery or some other reason
CTD_Summary_Sub<-subset(CTD_Summary, ! Station %in% c("BUDD1a","BUDD2s","BUDD3a","BUDD4a","Eld1b","Eld2b","Eld3b","QM1b","QM2b","QM3b","QM4b","QM5b","QM6b"))
note that July 2021 sinclair inlet are also missing due - I can’t remember exactly why, but correigh never added them to the folder
remove rows with missing CTD data
CTD_Summary_Sub <- na.omit(CTD_Summary_Sub)
Subset to QM and SC for paper table
CTD_SCQM<-subset(CTD_Summary_Sub, Site %in% c("QM","SC"))
#remove OB
CTD_SCQM<-subset(CTD_SCQM, Location!="OB")
Keep only means for each variable
colnames(CTD_SCQM)
## [1] "Station" "Site" "Location" "Sample.Date"
## [5] "Sample.Year" "Latitude" "Longitude" "jelly_biomass"
## [9] "Smean" "Smin" "Smax" "Ssd"
## [13] "Omean" "Omin" "Omax" "Osd"
## [17] "Tmean" "Tmin" "Tmax" "Tsd"
## [21] "pHmean" "pHmin" "pHmax" "pHsd"
## [25] "Fmean" "Fmin" "Fmax" "Fsd"
CTD_table1 = subset(CTD_SCQM, select = c(Site, Sample.Year,Station,Location,Latitude,Longitude,jelly_biomass,Smean,Omean,Tmean,pHmean,Fmean))
round to 2 decimal points
CTD_table1[, -c(1:5)]<-round(CTD_table1[, -c(1:5)],2)
remove station column
CTD_table1 <- CTD_table1[,-3]
sort by inlet, year, and jelly biomass
CTD_table1<-arrange(CTD_table1, Site, Location, Sample.Year,jelly_biomass)
CTD_table1
## Site Sample.Year Location Latitude Longitude jelly_biomass Smean Omean Tmean
## 1 QM 2020 IN 47.37897 -122.45 35.65 29.71 4.26 15.26
## 2 QM 2020 IN 47.37268 -122.47 42.79 29.78 9.53 14.21
## 3 QM 2020 IN 47.38730 -122.44 584.74 29.72 9.61 14.89
## 4 QM 2020 IN 47.37792 -122.46 749.91 29.71 9.70 14.65
## 5 QM 2021 IN 47.38870 -122.88 38.08 29.73 7.89 14.89
## 6 QM 2021 IN 47.37800 -122.45 51.43 29.77 8.58 14.67
## 7 QM 2021 IN 47.38750 -122.46 156.51 29.68 8.32 15.42
## 8 QM 2021 IN 47.38370 -122.46 218.75 29.76 8.25 14.71
## 9 QM 2021 IN 47.38890 -122.44 251.65 29.71 7.16 14.98
## 10 QM 2020 OS 47.37687 -122.46 0.00 29.69 10.93 15.24
## 11 QM 2020 OS 47.36708 -122.47 0.00 29.69 3.54 15.10
## 12 QM 2021 OS 47.36390 -122.48 0.00 29.93 6.91 13.57
## 13 QM 2021 OS 47.37780 -122.46 0.00 29.83 7.24 14.11
## 14 QM 2021 OS 47.37540 -122.47 0.00 29.90 6.83 13.71
## 15 QM 2021 OS 47.37680 -122.46 40.90 29.83 8.16 14.30
## 16 SC 2021 IN 47.50420 -122.69 48.03 29.78 7.12 15.04
## 17 SC 2021 IN 47.54380 -122.66 67.64 29.75 10.07 15.49
## 18 SC 2021 IN 47.54450 -122.64 82.52 29.74 9.25 15.41
## 19 SC 2021 IN 47.54190 -122.66 429.60 29.77 9.10 15.28
## 20 SC 2021 IN 47.54170 -122.67 534.71 29.81 5.68 14.75
## 21 SC 2021 IN 47.54440 -122.67 536.49 29.81 6.73 14.82
## 22 SC 2021 IN 47.54130 -122.65 652.29 29.85 6.91 14.76
## 23 SC 2021 IN 47.54280 -122.66 735.44 29.78 5.48 14.79
## 24 SC 2021 IN 47.54290 -122.67 1382.86 29.76 9.40 15.37
## 25 SC 2021 OS 47.56480 -122.95 0.00 29.90 7.25 14.63
## 26 SC 2021 OS 47.57470 -122.58 0.00 29.96 6.76 14.18
## 27 SC 2021 OS 47.55200 -122.61 0.00 29.78 7.67 14.97
## pHmean Fmean
## 1 8.68 10.34
## 2 8.54 9.99
## 3 8.51 9.55
## 4 8.57 12.08
## 5 8.68 6.99
## 6 8.71 8.90
## 7 8.79 8.04
## 8 8.73 7.03
## 9 8.63 5.93
## 10 8.69 10.43
## 11 8.66 12.96
## 12 8.58 3.81
## 13 8.64 7.02
## 14 8.54 3.05
## 15 8.68 6.73
## 16 8.63 13.44
## 17 8.81 14.10
## 18 8.75 8.16
## 19 8.72 10.68
## 20 8.49 7.66
## 21 8.59 4.67
## 22 8.59 5.31
## 23 8.47 7.23
## 24 8.75 9.51
## 25 8.62 1.79
## 26 8.59 1.45
## 27 8.69 2.04
Save table as csv
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
write.csv(CTD_table1,"/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output/Field_CTD_Data.csv", row.names = FALSE)
ggscatter(CTD_table1, x = "Tmean", y = "Smean",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Mean Temperature", ylab = "Mean Salinity")
## Normality checks
shapiro.test(CTD_table1$Tmean)
##
## Shapiro-Wilk normality test
##
## data: CTD_table1$Tmean
## W = 0.936, p-value = 0.09708
shapiro.test(CTD_table1$Smean)
##
## Shapiro-Wilk normality test
##
## data: CTD_table1$Smean
## W = 0.93028, p-value = 0.07019
ggqqplot(CTD_table1$Tmean)
ggqqplot(CTD_table1$Smean)
The Temperature distribution is on the verge, but I’m going to take it
as okay qqplots look okay
cor.test(CTD_table1$Tmean, CTD_table1$Smean,
method = "pearson")
##
## Pearson's product-moment correlation
##
## data: CTD_table1$Tmean and CTD_table1$Smean
## t = -5.7049, df = 25, p-value = 6.093e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.8804378 -0.5208896
## sample estimates:
## cor
## -0.7520406
#rsquared
(-0.756)^2
## [1] 0.571536
visualize data
ggboxplot(CTD_table1, x = "Location", y = "Fmean",
xlab = "Location", ylab = "Mean Fluorescence")
run ANOVA
# Compute the analysis of variance
F.aov <- aov(Fmean ~ Location, data = CTD_table1)
# Summary of the analysis
summary(F.aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## Location 1 69.02 69.02 6.845 0.0149 *
## Residuals 25 252.10 10.08
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
get means and se
group_by(CTD_table1, Location) %>%
summarise(
n = n(),
mean = mean(Fmean, na.rm = TRUE),
sd = sd(Fmean, na.rm = TRUE),
se = sd / sqrt(n)
)
## # A tibble: 2 × 5
## Location n mean sd se
## <chr> <int> <dbl> <dbl> <dbl>
## 1 IN 18 8.87 2.63 0.619
## 2 OS 9 5.48 4.10 1.37
Import data
Nutrients1 <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Nutrients/jk2101.csv")
Nutrients2 <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Nutrients/jk2102.csv")
reformat dataframes
#remove header rows
Nutrients1<-Nutrients1[-(1:19),]
Nutrients2<-Nutrients2[-(1:19),]
#convert first row into column names
Nutrients1<-Nutrients1 %>%
row_to_names(row_number = 1)
Nutrients2<-Nutrients2 %>%
row_to_names(row_number = 1)
#remove unnecessary rows and columns
Nutrients1<-Nutrients1[-(1:4),]
Nutrients2<-Nutrients2[-(1:4),]
Nutrients1<-Nutrients1[-(85:88),]
Nutrients2<-Nutrients2[-(57:73),]
Nutrients1 <- Nutrients1[ -c(1,3:5) ]
Nutrients2 <- Nutrients2[ -c(1,3:5) ]
#remove rows with empty cells
Nutrients1<-Nutrients1[!apply(Nutrients1 == "", 1, all), ]
Nutrients2<-Nutrients2[!apply(Nutrients2 == "", 1, all), ]
#combine two dataframes
Nutrients<-rbind(Nutrients1, Nutrients2)
add station info
Nutrient_stations <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Nutrients/Nutrients_station-match.csv")
Nutrients<-merge(Nutrient_stations, Nutrients, by.x = "Bottle..",by.y = "Bottle#")
summarize by station
#add depth category
Nutrients$Depth_category<-factor(ifelse(Nutrients$Depth<5,"shallow","deep"))
#subset to shallow depths
Nutrients_shallow<- subset(Nutrients, Depth_category =="shallow")
#remove replicates
Nutrients_shallow<- subset(Nutrients_shallow, Notes !="replicate 2")
recode station names
# combine station and date for unique stations
Nutrients_shallow$Station_Date <- paste(Nutrients_shallow$Station, Nutrients_shallow$Date)
unique(Nutrients_shallow$Station_Date)
## [1] "Budd 2 8/27/20" "Budd 3 8/27/20" "Budd 3S 8/27/20"
## [4] "Eld 4S 8/28/20" "Budd 4S 8/27/20" "Budd 1S 8/27/20"
## [7] "Eld 2S 8/28/20" "Eld 1S 8/28/20" "Budd 2S 8/27/20"
## [10] "Eld 3S 8/28/20" "SC 5 8/25/21" "SC 8 8/25/21"
## [13] "SC 3 8/24/21" "SC 7 8/25/21" "SC 6 8/25/21"
## [16] "SC 12 8/26/21" "SC 17 8/27/21" "SC 2 8/24/21"
## [19] "SC 10 8/26/21" "SC 1S 7/27/21" "SC 3S 7/27/21"
## [22] "SC 6S 7/27/21" "Eld 1S 7/28/21" "Eld 2S 7/28/21"
## [25] "Qmaster 1S 7/29/21" "Qmaster 3S 7/29/21" "Qmaster 5S 7/29/21"
## [28] "Qmaster 1 8/22/21" "Qmaster 2 8/22/21" "Qmaster 3 8/22/21"
## [31] "Qmaster 4 8/23/21" "Qmaster 5 8/23/21" "Qmaster 6 8/23/21"
## [34] "Qmaster 9 8/23/21" "Qmaster 10 8/23/21" "Qmaster 11 8/24/21"
## [37] "Qmaster 12 8/24/21" "SC 4 8/25/21" "Budd 4 8/27/20"
## [40] "Budd 5 8/27/20" "Budd 7 8/27/20" "Eld 1 8/28/20"
## [43] "Eld 2 8/28/20" "Eld 3 8/28/20" "Eld 4 8/28/20"
## [46] "Eld 5 8/28/20" "Qmaster 1 8/28/20" "Qmaster 2 8/28/20"
## [49] "Qmaster 3 8/29/20" "Qmaster 4 8/29/20" "Qmaster 5 8/29/20"
## [52] "Qmaster 7 8/29/20" "Qmaster 8 8/29/20"
Nutrients_shallow$Station_Date <-recode(Nutrients_shallow$Station_Date,
'Budd 2 8/27/20'='BUDD2a',
'Budd 3 8/27/20'='BUDD3',
'Budd 3S 8/27/20'='BUDD3s',
'Eld 4S 8/28/20'='Eld4s',
'Budd 4S 8/27/20'='BUDD4s',
'Budd 1S 8/27/20'='BUDD1s',
'Eld 2S 8/28/20'='Eld2s',
'Eld 1S 8/28/20'='Eld1s',
'Budd 2S 8/27/20'='BUDD2s_2020',
'Eld 3S 8/28/20'='Eld3s',
'SC 5 8/25/21'='SC5c',
'SC 8 8/25/21'='SC8',
'SC 3 8/24/21'='SC3c',
'SC 7 8/25/21'='SC7',
'SC 6 8/25/21'='SC6c',
'SC 12 8/26/21'='SC12',
'SC 17 8/27/21'='SC17',
'SC 2 8/24/21'='SC2c',
'SC 10 8/26/21'='SC10',
'SC 1S 7/27/21'='SC1a',
'SC 3S 7/27/21'='SC3a',
'SC 6S 7/27/21'='SC6a',
'Eld 1S 7/28/21'='Eld1b',
'Eld 2S 7/28/21'='Eld2b',
'Qmaster 1S 7/29/21'='QM1b',
'Qmaster 3S 7/29/21'='QM3b',
'Qmaster 5S 7/29/21'='QM5b',
'Qmaster 1 8/22/21'='QM1c',
'Qmaster 2 8/22/21'='QM2c',
'Qmaster 3 8/22/21'='QM3c',
'Qmaster 4 8/23/21'='QM4c',
'Qmaster 5 8/23/21'='QM5c',
'Qmaster 6 8/23/21'='QM6c',
'Qmaster 9 8/23/21'='QM9',
'Qmaster 10 8/23/21'='QM10',
'Qmaster 11 8/24/21'='QM11',
'Qmaster 12 8/24/21'='QM12',
'SC 4 8/25/21'='SC4c',
'Budd 4 8/27/20'='BUDD4',
'Budd 5 8/27/20'='BUDD5',
'Budd 7 8/27/20'='BUDD7',
'Eld 1 8/28/20'='Eld1',
'Eld 2 8/28/20'='Eld2',
'Eld 3 8/28/20'='Eld3',
'Eld 4 8/28/20'='Eld4',
'Eld 5 8/28/20'='Eld5',
'Qmaster 1 8/28/20'='QM1a',
'Qmaster 2 8/28/20'='QM2a',
'Qmaster 3 8/29/20'='QM3a',
'Qmaster 4 8/29/20'='QM4a',
'Qmaster 5 8/29/20'='QM5a',
'Qmaster 7 8/29/20'='QM7a',
'Qmaster 8 8/29/20'='QM8a')
cut down columns and rename
Nutrients_shallow <- Nutrients_shallow[ -c(1:6,12) ]
colnames(Nutrients_shallow)
## [1] "[ PO4 ]" "[ Si(OH)4 ]" "[ NO3 ]" "[ NO2 ]" "[ NH4 ]"
## [6] "Station_Date"
Nutrients_shallow <- Nutrients_shallow %>%
rename("PO4" = "[ PO4 ]",
"SiOH4" = "[ Si(OH)4 ]",
"NO3" = "[ NO3 ]",
"NO2" = "[ NO2 ]",
"NH4" = "[ NH4 ]",
"Station" = "Station_Date")
CTD_Nutrients <- merge(CTD_Summary,Nutrients_shallow,by.x = "Station", all.x = TRUE)
#keep only stations with CTD and Nutrient values
CTD_Nutrients_full <- na.omit(CTD_Nutrients)
save environmental data
write.csv(CTD_Nutrients_full,"/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output/Field_Environmental_Data.csv", row.names = FALSE)