This code calculates the means for environmental data collected from the CTD and creates Table I in the manuscript. The original CTD files were converted from original .hex format using SeaBird software outside of R. They were then trimmed individually and the small boat file fluorescence values were calibrated using this code: https://rpubs.com/HailaSchultz/field_CTD-profiles

This code also provides a correlation between salinity and temperature in sinclair inlet and quartermaster harbor, as a justification for excluding salinity from generalized models.

This code also conducts an ANOVA of fluorescence inside and outside of aggeregations

The final purpose of this code is to add nutrient data to the environmental spreadsheet, primarily for use in NMDS plots.

Report is here: https://rpubs.com/HailaSchultz/field_CTD-table

load packages

library(tidyverse)
library(dplyr)
library(ggpubr)
library(janitor)

CTD

import data

list of station names from database and calculate jelly density

Database <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Final_Aurelia_Database_Jan11_2023.csv")

#subset
FieldData<-subset(Database, Trial.Type=="Field")
#remove commas from jelly density
FieldData$Jelly.Density <- as.numeric(gsub(",","",FieldData$Jelly.Density..g.m3.))


Stations<-FieldData %>%
  group_by(Site,Location,Station,Sample.Date,Sample.Year,Latitude, Longitude)%>%
  summarise(jelly_biomass=mean(Jelly.Density))

import CTD downcasts into one file

#set working directory to folder with CTD files
setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/CTD/CTD_Downcasts-edited")

CTDCombined <-
    list.files(pattern = "*.csv") %>% 
    map_df(~read_csv(.))

Calculate means per station

salinity

loop through stations: interpolate to equal intervals and take mean

results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
  subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
   table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$salinity, method = "linear", n = 100))
    summarize <- table %>%
    summarise(
      FactorValue = factor_value,
      Smean = mean(y), 
      Smin = min(y),
      Smax = max(y),
      Ssd = sd(y)
    )
    results_list[[factor_value]] <- summarize
}

salinity <- bind_rows(results_list)

Temperature

results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
  subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
   table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$Temp, method = "linear", n = 100))
    summarize <- table %>%
    summarise(
      FactorValue = factor_value,
      Tmean = mean(y), 
      Tmin = min(y),
      Tmax = max(y),
      Tsd = sd(y)
    )
    results_list[[factor_value]] <- summarize
}

Temp <- bind_rows(results_list)

Oxygen

results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
  subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
   table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$Oxygen, method = "linear", n = 100))
    summarize <- table %>%
    summarise(
      FactorValue = factor_value,
      Omean = mean(y), 
      Omin = min(y),
      Omax = max(y),
      Osd = sd(y)
    )
    results_list[[factor_value]] <- summarize
}

Oxygen <- bind_rows(results_list)

pH

results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
  subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
   table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$pH, method = "linear", n = 100))
    summarize <- table %>%
    summarise(
      FactorValue = factor_value,
      pHmean = mean(y), 
      pHmin = min(y),
      pHmax = max(y),
      pHsd = sd(y)
    )
    results_list[[factor_value]] <- summarize
}

pH <- bind_rows(results_list)

fluorescence

results_list <- list()
unique_factors <-unique(CTDCombined$Station)
for (factor_value in unique_factors) {
  subset_data <- CTDCombined[CTDCombined$Station == factor_value, ]
   table <- as.data.frame(approx(x = subset_data$Depth, y = subset_data$fluorescence, method = "linear", n = 100))
    summarize <- table %>%
    summarise(
      FactorValue = factor_value,
      Fmean = mean(y), 
      Fmin = min(y),
      Fmax = max(y),
      Fsd = sd(y)
    )
    results_list[[factor_value]] <- summarize
}

fluorescence <- bind_rows(results_list)

combine dataframes

Env<-merge(salinity, Oxygen,by=c("FactorValue"))
Env1<-merge(Env,Temp,by=c("FactorValue"),all.x = TRUE)
Env2<-merge(Env1,pH,by=c("FactorValue"))
Env3<-merge(Env2, fluorescence,by=c("FactorValue"))
#rename station column
Env3 <- Env3 %>% 
       rename("Station" = "FactorValue")

get station names to see if they match up

unique(Stations$Station)
##  [1] "BUDD1"  "BUDD3a" "BUDD4a" "BUDD7"  "BUDD1a" "BUDD1s" "BUDD2a" "BUDD2s"
##  [9] "BUDD3"  "BUDD3s" "BUDD4"  "BUDD4s" "BUDD5"  "BUDD6"  "Eld1"   "Eld1b" 
## [17] "Eld1s"  "Eld2"   "Eld2b"  "Eld2s"  "Eld3b"  "Eld3s"  "Eld4"   "Eld4s" 
## [25] "QM1"    "QM1a"   "QM1b"   "QM1c"   "QM2"    "QM2a"   "QM2b"   "QM2c"  
## [33] "QM3c"   "QM4a"   "QM5a"   "QM5c"   "QM6c"   "QM12"   "QM5"    "QM5b"  
## [41] "QM6"    "QM6b"   "QM8a"   "QM10"   "QM11"   "QM3"    "QM3a"   "QM3b"  
## [49] "QM4"    "QM4b"   "QM4c"   "QM7a"   "QM9"    "SC1"    "SC10"   "SC11"  
## [57] "SC14"   "SC16"   "SC1a"   "SC2"    "SC2a"   "SC2c"   "SC3c"   "SC5c"  
## [65] "SC5d"   "SC6c"   "SC7"    "SC5"    "SC5a"   "SC6"    "SC6a"   "SC8"   
## [73] "SC12"   "SC17"   "SC3"    "SC3a"   "SC4"    "SC4a"   "SC4c"
unique(Env3$Station)
##  [1] "2020_BUDD2s" "BUDD1"       "BUDD1s"      "BUDD2a"      "BUDD3"      
##  [6] "BUDD3s"      "BUDD4"       "BUDD4s"      "BUDD5"       "BUDD6"      
## [11] "BUDD7"       "Eld1"        "Eld1s"       "Eld2"        "Eld2s"      
## [16] "Eld3"        "Eld3s"       "Eld4"        "Eld4s"       "Eld5"       
## [21] "Gig1"        "Gig1a"       "Gig2"        "Gig2a"       "Gig3"       
## [26] "Gig4"        "QM10"        "QM11"        "QM12"        "QM1a"       
## [31] "QM1c"        "QM2a"        "QM2c"        "QM3a"        "QM3c"       
## [36] "QM4a"        "QM4c"        "QM5a"        "QM5c"        "QM6c"       
## [41] "QM7a"        "QM8_8-23-21" "QM8a"        "QM9"         "SC10"       
## [46] "SC11"        "SC12"        "SC14"        "SC16"        "SC17"       
## [51] "SC2c"        "SC3c"        "SC4c"        "SC5a"        "SC5c"       
## [56] "SC6c"        "SC7"         "SC8"

Rename stations so the match between spreadsheets

#deal with duplicate station names
Stations$Station[9]="BUDD2s_2020"
Stations$Station[26]="QM1s"
Stations$Station[31]="QM2s"
Stations$Station[42]="QM5s"
Stations$Station[45]="QM6s"
Stations$Station[51]="QM3s"
Stations$Station[55]="QM4s"

recode to match database names

Env3$Station <-recode(Env3$Station, '2020_BUDD2s'='BUDD2s_2020', 'Budd1a'='BUDD1a', 'Budd2s'='BUDD2s','Budd3a'='BUDD3a','Budd4a'='BUDD4a')

add station info and jelly biomass

CTD_Summary <- merge(Stations,Env3,by.x = "Station", all.x = TRUE)

remove stations that didn’t get full water column profile due to low battery or some other reason

CTD_Summary_Sub<-subset(CTD_Summary, ! Station %in% c("BUDD1a","BUDD2s","BUDD3a","BUDD4a","Eld1b","Eld2b","Eld3b","QM1b","QM2b","QM3b","QM4b","QM5b","QM6b"))

note that July 2021 sinclair inlet are also missing due - I can’t remember exactly why, but correigh never added them to the folder

remove rows with missing CTD data

CTD_Summary_Sub <- na.omit(CTD_Summary_Sub)

format for table 1

Subset to QM and SC for paper table

CTD_SCQM<-subset(CTD_Summary_Sub, Site %in% c("QM","SC"))
#remove OB
CTD_SCQM<-subset(CTD_SCQM, Location!="OB")

Keep only means for each variable

colnames(CTD_SCQM)
##  [1] "Station"       "Site"          "Location"      "Sample.Date"  
##  [5] "Sample.Year"   "Latitude"      "Longitude"     "jelly_biomass"
##  [9] "Smean"         "Smin"          "Smax"          "Ssd"          
## [13] "Omean"         "Omin"          "Omax"          "Osd"          
## [17] "Tmean"         "Tmin"          "Tmax"          "Tsd"          
## [21] "pHmean"        "pHmin"         "pHmax"         "pHsd"         
## [25] "Fmean"         "Fmin"          "Fmax"          "Fsd"
CTD_table1 = subset(CTD_SCQM, select = c(Site, Sample.Year,Station,Location,Latitude,Longitude,jelly_biomass,Smean,Omean,Tmean,pHmean,Fmean))

round to 2 decimal points

CTD_table1[, -c(1:5)]<-round(CTD_table1[, -c(1:5)],2)

remove station column

CTD_table1 <- CTD_table1[,-3]

sort by inlet, year, and jelly biomass

CTD_table1<-arrange(CTD_table1, Site, Location, Sample.Year,jelly_biomass)
CTD_table1
##    Site Sample.Year Location Latitude Longitude jelly_biomass Smean Omean Tmean
## 1    QM        2020       IN 47.37897   -122.45         35.65 29.71  4.26 15.26
## 2    QM        2020       IN 47.37268   -122.47         42.79 29.78  9.53 14.21
## 3    QM        2020       IN 47.38730   -122.44        584.74 29.72  9.61 14.89
## 4    QM        2020       IN 47.37792   -122.46        749.91 29.71  9.70 14.65
## 5    QM        2021       IN 47.38870   -122.88         38.08 29.73  7.89 14.89
## 6    QM        2021       IN 47.37800   -122.45         51.43 29.77  8.58 14.67
## 7    QM        2021       IN 47.38750   -122.46        156.51 29.68  8.32 15.42
## 8    QM        2021       IN 47.38370   -122.46        218.75 29.76  8.25 14.71
## 9    QM        2021       IN 47.38890   -122.44        251.65 29.71  7.16 14.98
## 10   QM        2020       OS 47.37687   -122.46          0.00 29.69 10.93 15.24
## 11   QM        2020       OS 47.36708   -122.47          0.00 29.69  3.54 15.10
## 12   QM        2021       OS 47.36390   -122.48          0.00 29.93  6.91 13.57
## 13   QM        2021       OS 47.37780   -122.46          0.00 29.83  7.24 14.11
## 14   QM        2021       OS 47.37540   -122.47          0.00 29.90  6.83 13.71
## 15   QM        2021       OS 47.37680   -122.46         40.90 29.83  8.16 14.30
## 16   SC        2021       IN 47.50420   -122.69         48.03 29.78  7.12 15.04
## 17   SC        2021       IN 47.54380   -122.66         67.64 29.75 10.07 15.49
## 18   SC        2021       IN 47.54450   -122.64         82.52 29.74  9.25 15.41
## 19   SC        2021       IN 47.54190   -122.66        429.60 29.77  9.10 15.28
## 20   SC        2021       IN 47.54170   -122.67        534.71 29.81  5.68 14.75
## 21   SC        2021       IN 47.54440   -122.67        536.49 29.81  6.73 14.82
## 22   SC        2021       IN 47.54130   -122.65        652.29 29.85  6.91 14.76
## 23   SC        2021       IN 47.54280   -122.66        735.44 29.78  5.48 14.79
## 24   SC        2021       IN 47.54290   -122.67       1382.86 29.76  9.40 15.37
## 25   SC        2021       OS 47.56480   -122.95          0.00 29.90  7.25 14.63
## 26   SC        2021       OS 47.57470   -122.58          0.00 29.96  6.76 14.18
## 27   SC        2021       OS 47.55200   -122.61          0.00 29.78  7.67 14.97
##    pHmean Fmean
## 1    8.68 10.34
## 2    8.54  9.99
## 3    8.51  9.55
## 4    8.57 12.08
## 5    8.68  6.99
## 6    8.71  8.90
## 7    8.79  8.04
## 8    8.73  7.03
## 9    8.63  5.93
## 10   8.69 10.43
## 11   8.66 12.96
## 12   8.58  3.81
## 13   8.64  7.02
## 14   8.54  3.05
## 15   8.68  6.73
## 16   8.63 13.44
## 17   8.81 14.10
## 18   8.75  8.16
## 19   8.72 10.68
## 20   8.49  7.66
## 21   8.59  4.67
## 22   8.59  5.31
## 23   8.47  7.23
## 24   8.75  9.51
## 25   8.62  1.79
## 26   8.59  1.45
## 27   8.69  2.04

Save table as csv

setwd("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output")
write.csv(CTD_table1,"/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output/Field_CTD_Data.csv", row.names = FALSE)

Salinity/temp correlation

plot

ggscatter(CTD_table1, x = "Tmean", y = "Smean", 
          add = "reg.line", conf.int = TRUE, 
          cor.coef = TRUE, cor.method = "pearson",
          xlab = "Mean Temperature", ylab = "Mean Salinity")

## Normality checks

shapiro.test(CTD_table1$Tmean)
## 
##  Shapiro-Wilk normality test
## 
## data:  CTD_table1$Tmean
## W = 0.936, p-value = 0.09708
shapiro.test(CTD_table1$Smean) 
## 
##  Shapiro-Wilk normality test
## 
## data:  CTD_table1$Smean
## W = 0.93028, p-value = 0.07019
ggqqplot(CTD_table1$Tmean)

ggqqplot(CTD_table1$Smean)

The Temperature distribution is on the verge, but I’m going to take it as okay qqplots look okay

correlation

cor.test(CTD_table1$Tmean, CTD_table1$Smean, 
                    method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  CTD_table1$Tmean and CTD_table1$Smean
## t = -5.7049, df = 25, p-value = 6.093e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.8804378 -0.5208896
## sample estimates:
##        cor 
## -0.7520406
#rsquared
(-0.756)^2
## [1] 0.571536

Fluorescence comparison

visualize data

ggboxplot(CTD_table1, x = "Location", y = "Fmean",
          xlab = "Location", ylab = "Mean Fluorescence")

run ANOVA

# Compute the analysis of variance
F.aov <- aov(Fmean ~ Location, data = CTD_table1)
# Summary of the analysis
summary(F.aov)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Location     1  69.02   69.02   6.845 0.0149 *
## Residuals   25 252.10   10.08                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

get means and se

group_by(CTD_table1, Location) %>%
  summarise(
    n = n(),
    mean = mean(Fmean, na.rm = TRUE),
    sd = sd(Fmean, na.rm = TRUE),
    se = sd / sqrt(n)
  )
## # A tibble: 2 × 5
##   Location     n  mean    sd    se
##   <chr>    <int> <dbl> <dbl> <dbl>
## 1 IN          18  8.87  2.63 0.619
## 2 OS           9  5.48  4.10 1.37

Nutrients

prepare data

Import data

Nutrients1 <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Nutrients/jk2101.csv")
Nutrients2 <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Nutrients/jk2102.csv")

reformat dataframes

#remove header rows
Nutrients1<-Nutrients1[-(1:19),]
Nutrients2<-Nutrients2[-(1:19),]

#convert first row into column names
Nutrients1<-Nutrients1 %>%
  row_to_names(row_number = 1)
Nutrients2<-Nutrients2 %>%
  row_to_names(row_number = 1)

#remove unnecessary rows and columns
Nutrients1<-Nutrients1[-(1:4),]
Nutrients2<-Nutrients2[-(1:4),]

Nutrients1<-Nutrients1[-(85:88),]
Nutrients2<-Nutrients2[-(57:73),]

Nutrients1 <- Nutrients1[ -c(1,3:5) ]
Nutrients2 <- Nutrients2[ -c(1,3:5) ]

#remove rows with empty cells
Nutrients1<-Nutrients1[!apply(Nutrients1 == "", 1, all), ] 
Nutrients2<-Nutrients2[!apply(Nutrients2 == "", 1, all), ]

#combine two dataframes
Nutrients<-rbind(Nutrients1, Nutrients2)

add station info

Nutrient_stations <- read.csv("/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/data/current_data/Nutrients/Nutrients_station-match.csv")

Nutrients<-merge(Nutrient_stations, Nutrients, by.x = "Bottle..",by.y = "Bottle#")

summarize by station

#add depth category
Nutrients$Depth_category<-factor(ifelse(Nutrients$Depth<5,"shallow","deep"))

#subset to shallow depths
Nutrients_shallow<- subset(Nutrients, Depth_category =="shallow")

#remove replicates
Nutrients_shallow<- subset(Nutrients_shallow, Notes !="replicate 2")

recode station names

# combine station and date for unique stations
Nutrients_shallow$Station_Date <- paste(Nutrients_shallow$Station, Nutrients_shallow$Date)
unique(Nutrients_shallow$Station_Date)
##  [1] "Budd 2 8/27/20"     "Budd 3 8/27/20"     "Budd 3S 8/27/20"   
##  [4] "Eld 4S 8/28/20"     "Budd 4S 8/27/20"    "Budd 1S 8/27/20"   
##  [7] "Eld 2S 8/28/20"     "Eld 1S 8/28/20"     "Budd 2S 8/27/20"   
## [10] "Eld 3S 8/28/20"     "SC 5 8/25/21"       "SC 8 8/25/21"      
## [13] "SC 3 8/24/21"       "SC 7 8/25/21"       "SC 6 8/25/21"      
## [16] "SC 12 8/26/21"      "SC 17 8/27/21"      "SC 2 8/24/21"      
## [19] "SC 10 8/26/21"      "SC 1S 7/27/21"      "SC 3S 7/27/21"     
## [22] "SC 6S 7/27/21"      "Eld 1S 7/28/21"     "Eld 2S 7/28/21"    
## [25] "Qmaster 1S 7/29/21" "Qmaster 3S 7/29/21" "Qmaster 5S 7/29/21"
## [28] "Qmaster 1 8/22/21"  "Qmaster 2 8/22/21"  "Qmaster 3 8/22/21" 
## [31] "Qmaster 4 8/23/21"  "Qmaster 5 8/23/21"  "Qmaster 6 8/23/21" 
## [34] "Qmaster 9 8/23/21"  "Qmaster 10 8/23/21" "Qmaster 11 8/24/21"
## [37] "Qmaster 12 8/24/21" "SC 4 8/25/21"       "Budd 4 8/27/20"    
## [40] "Budd 5 8/27/20"     "Budd 7 8/27/20"     "Eld 1 8/28/20"     
## [43] "Eld 2 8/28/20"      "Eld 3 8/28/20"      "Eld 4 8/28/20"     
## [46] "Eld 5 8/28/20"      "Qmaster 1 8/28/20"  "Qmaster 2 8/28/20" 
## [49] "Qmaster 3 8/29/20"  "Qmaster 4 8/29/20"  "Qmaster 5 8/29/20" 
## [52] "Qmaster 7 8/29/20"  "Qmaster 8 8/29/20"
Nutrients_shallow$Station_Date <-recode(Nutrients_shallow$Station_Date, 
                                        'Budd 2 8/27/20'='BUDD2a', 
                                        'Budd 3 8/27/20'='BUDD3',
                                        'Budd 3S 8/27/20'='BUDD3s',
                                        'Eld 4S 8/28/20'='Eld4s',
                                        'Budd 4S 8/27/20'='BUDD4s',
                                        'Budd 1S 8/27/20'='BUDD1s',
                                        'Eld 2S 8/28/20'='Eld2s',
                                        'Eld 1S 8/28/20'='Eld1s',
                                        'Budd 2S 8/27/20'='BUDD2s_2020',
                                        'Eld 3S 8/28/20'='Eld3s',
                                        'SC 5 8/25/21'='SC5c',
                                        'SC 8 8/25/21'='SC8',
                                        'SC 3 8/24/21'='SC3c',
                                        'SC 7 8/25/21'='SC7',
                                        'SC 6 8/25/21'='SC6c',
                                        'SC 12 8/26/21'='SC12',
                                        'SC 17 8/27/21'='SC17',
                                        'SC 2 8/24/21'='SC2c',
                                        'SC 10 8/26/21'='SC10',
                                        'SC 1S 7/27/21'='SC1a',
                                        'SC 3S 7/27/21'='SC3a',
                                        'SC 6S 7/27/21'='SC6a',
                                        'Eld 1S 7/28/21'='Eld1b',
                                        'Eld 2S 7/28/21'='Eld2b',
                                        'Qmaster 1S 7/29/21'='QM1b',
                                        'Qmaster 3S 7/29/21'='QM3b',
                                        'Qmaster 5S 7/29/21'='QM5b',
                                        'Qmaster 1 8/22/21'='QM1c',
                                        'Qmaster 2 8/22/21'='QM2c',
                                        'Qmaster 3 8/22/21'='QM3c',
                                        'Qmaster 4 8/23/21'='QM4c',
                                        'Qmaster 5 8/23/21'='QM5c',
                                        'Qmaster 6 8/23/21'='QM6c',
                                        'Qmaster 9 8/23/21'='QM9',
                                        'Qmaster 10 8/23/21'='QM10',
                                        'Qmaster 11 8/24/21'='QM11',
                                        'Qmaster 12 8/24/21'='QM12',
                                        'SC 4 8/25/21'='SC4c',
                                        'Budd 4 8/27/20'='BUDD4',
                                        'Budd 5 8/27/20'='BUDD5',
                                        'Budd 7 8/27/20'='BUDD7',
                                        'Eld 1 8/28/20'='Eld1',
                                        'Eld 2 8/28/20'='Eld2',
                                        'Eld 3 8/28/20'='Eld3',
                                        'Eld 4 8/28/20'='Eld4',
                                        'Eld 5 8/28/20'='Eld5',
                                        'Qmaster 1 8/28/20'='QM1a',
                                        'Qmaster 2 8/28/20'='QM2a',
                                        'Qmaster 3 8/29/20'='QM3a',
                                        'Qmaster 4 8/29/20'='QM4a',
                                        'Qmaster 5 8/29/20'='QM5a',
                                        'Qmaster 7 8/29/20'='QM7a',
                                        'Qmaster 8 8/29/20'='QM8a')

cut down columns and rename

Nutrients_shallow <- Nutrients_shallow[ -c(1:6,12) ]
colnames(Nutrients_shallow)
## [1] "[ PO4 ]"      "[ Si(OH)4 ]"  "[ NO3 ]"      "[ NO2 ]"      "[ NH4 ]"     
## [6] "Station_Date"
Nutrients_shallow <- Nutrients_shallow %>% 
                    rename("PO4" = "[ PO4 ]",
                           "SiOH4" = "[ Si(OH)4 ]",
                           "NO3" = "[ NO3 ]",
                           "NO2" = "[ NO2 ]",
                           "NH4" = "[ NH4 ]",
                           "Station" = "Station_Date")

merge with CTD

CTD_Nutrients <- merge(CTD_Summary,Nutrients_shallow,by.x = "Station", all.x = TRUE)

#keep only stations with CTD and Nutrient values
CTD_Nutrients_full <- na.omit(CTD_Nutrients)

save environmental data

write.csv(CTD_Nutrients_full,"/Users/hailaschultz/Dropbox/Other studies/Aurelia project/Data Analysis/output/Field_Environmental_Data.csv", row.names = FALSE)