Run Correlations bw MLD and biogeochem variables

Goal for this R code: Compile a data frame with the MLD (based on delta rho of 0.01) that Craig and Nick Calculated (9.10.2019) with the KM dataset.

Things to note: There are repeated stations (often for a bunch of 10m water) and I’m using the following to use the cast with the biogeochem suite of samples:

SS212 - use C02 SS216 - use C01 SS228 - use C01

(I’ve added ‘B’ and ‘C’ labels in the KM df to the casts that I am not using so that it is clear which cast was used)

Brief outline of code:

  1. Load data sets and wrangle data
  2. Run Model II Regression using RMA

Set working directory:

setwd("~/Desktop/km_working_folder/2019")

1. Load dataset & Wrangle:

Load dfs

First, load the MLD dataset from Craig and Nick:

mld.p01 <- read_csv(file="KM_mlds_estimates_CC_2019_R.csv")
## Parsed with column specification:
## cols(
##   STATION = col_character(),
##   cast = col_double(),
##   stat_numb_2019 = col_double(),
##   `matlab serial time` = col_double(),
##   Longitude = col_double(),
##   Latitude = col_double(),
##   mld_n2_boyancy_freq = col_double(),
##   `b_avg_mld_n2_boyancy _freq` = col_double(),
##   mld_p01_kgm3_from2pt5db = col_double(),
##   b_ave_mld_p01_kgm3_from2pt5db = col_double(),
##   MLD_MLD_p02_kgm3_from2pt5db = col_double(),
##   b_ave_MLD_MLD_p02_kgm3_from2pt5db = col_double()
## )

Next, load the KM biogeochem data set:

biogeochem.data <- read_csv(file="KM14-16_biogeochem_data.v4.FINAL.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   CRUISE = col_character(),
##   STATION = col_character(),
##   TYPE = col_character(),
##   `yyyy-mm-ddThh:mm` = col_character(),
##   `DATE LOCAL` = col_character(),
##   `TIME LOCAL` = col_time(format = ""),
##   `fluor [ug/L]` = col_logical(),
##   `NTURT [upoly]` = col_logical(),
##   turbidity_NTU = col_logical(),
##   VO_trans_volts = col_logical(),
##   V3_turbidity_volts = col_logical(),
##   `Silicate [uM]` = col_character(),
##   `CALC BP:PP` = col_character(),
##   `CALC BACT HNA:TOTAL [cells/mL]` = col_character(),
##   mag_wind_stress_24hr = col_logical(),
##   LOCATION = col_character(),
##   near_bw_other = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 425 parsing failures.
##  row       col               expected actual                                   file
## 1200 STATION # no trailing characters      B 'KM14-16_biogeochem_data.v4.FINAL.csv'
## 1201 STATION # no trailing characters      B 'KM14-16_biogeochem_data.v4.FINAL.csv'
## 1202 STATION # no trailing characters      B 'KM14-16_biogeochem_data.v4.FINAL.csv'
## 1203 STATION # no trailing characters      B 'KM14-16_biogeochem_data.v4.FINAL.csv'
## 1204 STATION # no trailing characters      B 'KM14-16_biogeochem_data.v4.FINAL.csv'
## .... ......... ...................... ...... ......................................
## See problems(...) for more details.
# Remove -999 and set to Nan:
# biogeochem.data <- as.data.frame(na_if(biogeochem.data, -999))

Merge dfs

Now, merge the 2 dfs by station

mld.biogeochem <- merge(mld.p01, biogeochem.data, by = "STATION")

Wrangle Data

Add conversions

Need to calculate conversions of interest and add to merged df:

# First, calculate conversions
# NPP from mgC_m-3_d-1 to µmolC_L-1_d-1
mld.biogeochem$NPP_umolC_L_d <- with(mld.biogeochem, Primary_Production_mg_m3_day / 12.011)

# BP from pmol_L-1_hr-1 to umolC_L-1_d-1
mld.biogeochem$BP_umolC_L_d <- with(mld.biogeochem, (BP_pmol_L_hr * 1.5 * 24)/(12.011*1000))

# BC from yield corrected bacterial abundance (cells/mL)
mld.biogeochem$BC_umolC_L <- with(mld.biogeochem, (YIELD_CORR_BACT_cells_mL * 10/(1000000*12.011)))

# Next, calculate log(10) transformations of NPP, Chl, BP, BC, POC, and DOC
mld.biogeochem$NPP_umolC_L_d_log10 <- with(mld.biogeochem,log10(NPP_umolC_L_d))
mld.biogeochem$Chla_ug_L_log10 <- with(mld.biogeochem,log10(Chla_ug_L))
mld.biogeochem$BC_umolC_L_log10 <- with(mld.biogeochem,log10(BC_umolC_L))
mld.biogeochem$BP_umolC_L_d_log10 <- with(mld.biogeochem,log10(BP_umolC_L_d))
## Warning in eval(substitute(expr), data, enclos = parent.frame()): NaNs
## produced
mld.biogeochem$POC_umol_L_log10 <- with(mld.biogeochem,log10(POC_umol_L))
mld.biogeochem$DOC_uM_C_log10 <- with(mld.biogeochem,log10(DOC_uM_C))

Add depth-bin ids

Add depth bin identifiers in a column called depth_bin to merged df:

depth_bin IDs: upper_euphotic is 0-75m, lower_euphotic is 75-150m, and upper_mesopelagic is 150-300m

Note, here: I’ve gone back-and-forth regarding how to id specific depth bins to avoid using the same data for multiple depth bins, but also to include the data bw our identified depth bins (e.g. if you say <75 m, essentially this excludes depths between 50 and 75 bc 50 is the closest value less than 75). BUT: doesn’t really matter bc the correlations I will run will be in the upper euphotic and upper mesopelagic, and I’ve sent these to <= to include as much data as possible.

mld.biogeochem <- mld.biogeochem %>%
  mutate(depth_bin = case_when(NOM_DEPTH_m <= 75 ~ "upper_euphotic",
                             NOM_DEPTH_m > 75 & NOM_DEPTH_m < 150 ~ "lower_euphotic",
                             NOM_DEPTH_m >= 150 & NOM_DEPTH_m <= 300 ~ "upper_mesopelagic"))

Calculate means and sds

Calculate means and sds by STATION and depth_bin:

means.sd <- as.data.frame(mld.biogeochem %>% 
                               group_by(STATION, depth_bin) %>% 
                               summarise_at(vars("NPP_umolC_L_d", "Chla_ug_L", "BC_umolC_L", "BP_umolC_L_d", "POC_umol_L", "DOC_uM_C", "NPP_umolC_L_d_log10", "Chla_ug_L_log10", "BC_umolC_L_log10", "BP_umolC_L_d_log10", "POC_umol_L_log10", "DOC_uM_C_log10", "mld_p01_kgm3_from2pt5db"), funs(mean, sd), na.rm=TRUE))

Remove NA rows from depth_bin:

means.sd <- filter(means.sd, depth_bin != "NA")

Write to csv and save to working directory:

write.csv(means.sd, "means.sd.mld.p01.biogeochem.9.2019.csv") # write results to csv and save in working directory

2. Run Correlations

Do this using Model II Regression code developed by Legendre 2018 (https://cran.r-project.org/web/packages/lmodel2/lmodel2.pdf)

lmodel2(formula, data = NULL, range.y=NULL, range.x=NULL, nperm=0) HAVE TO REMOVE INF VALUES

control shift m for %>%

Upper Euphotic (0 to 75m) Correlations

To get at the question: do pulses of key biogeochem variables in the upper euphotic zone correlate with MLD at those stations, compare log(10) transformed npp, chl, bc, bp, poc, and doc to log(10) tranformed mlds.

Isolate upper_euphotic means

upper.euphotic <- filter(means.sd, depth_bin == "upper_euphotic")

Make -Inf values NA

upper.euphotic[ upper.euphotic == "-Inf" ] <- NA

Run regression model

NPP vs MLD:

lmodel2(NPP_umolC_L_d_log10_mean ~ mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y = "interval", range.x = "interval", nperm = 99)
## 
## Model II regression
## 
## Call: lmodel2(formula = NPP_umolC_L_d_log10_mean ~
## mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y =
## "interval", range.x = "interval", nperm = 99)
## 
## n = 35   r = -0.500935   r-square = 0.2509359 
## Parametric P-values:   2-tailed = 0.002175979    1-tailed = 0.001087989 
## Angle between the two OLS regression lines = 0.7053418 degrees
## 
## Permutation tests of OLS, MA, RMA slopes: 1-tailed, tail corresponding to sign
## A permutation test of r is equivalent to a permutation test of the OLS slope
## P-perm for SMA = NA because the SMA slope cannot be tested
## 
## Regression results
##   Method  Intercept        Slope Angle (degrees) P-perm (1-tailed)
## 1    OLS -0.6626691 -0.004124509      -0.2363156              0.03
## 2     MA -0.6626625 -0.004124718      -0.2363276              0.03
## 3    SMA -0.5333025 -0.008233620      -0.4717410                NA
## 4    RMA -0.5985303 -0.006161768      -0.3530388              0.03
## 
## Confidence intervals
##   Method 2.5%-Intercept 97.5%-Intercept   2.5%-Slope  97.5%-Slope
## 1    OLS     -0.7659117      -0.5594264 -0.006648305 -0.001600712
## 2     MA     -0.7421223      -0.5832010 -0.006648677 -0.001600812
## 3    SMA     -0.6008546      -0.4419419 -0.011135535 -0.006087942
## 4    RMA     -0.7058310      -0.4549259 -0.010723121 -0.002753543
## 
## Eigenvalues: 702.3464 0.03566458 
## 
## H statistic used for computing C.I. of MA: 6.369978e-06
# RMA p-perm (1-tailed): 0.01

Chl vs. MLD:

lmodel2(Chla_ug_L_log10_mean ~ mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y = "interval", range.x = "interval", nperm = 99)
## 
## Model II regression
## 
## Call: lmodel2(formula = Chla_ug_L_log10_mean ~
## mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y =
## "interval", range.x = "interval", nperm = 99)
## 
## n = 41   r = -0.05801434   r-square = 0.003365663 
## Parametric P-values:   2-tailed = 0.7186312    1-tailed = 0.3593156 
## Angle between the two OLS regression lines = 4.920752 degrees
## 
## Permutation tests of OLS, MA, RMA slopes: 1-tailed, tail corresponding to sign
## A permutation test of r is equivalent to a permutation test of the OLS slope
## P-perm for SMA = NA because the SMA slope cannot be tested
## 
## Regression results
##   Method  Intercept         Slope Angle (degrees) P-perm (1-tailed)
## 1    OLS -0.9614034 -0.0002907530     -0.01665892              0.34
## 2     MA -0.9614032 -0.0002907603     -0.01665934              0.34
## 3    SMA -0.7965948 -0.0050117434     -0.28714934                NA
## 4    RMA -0.9382952 -0.0009526958     -0.05458543              0.34
## 
## Confidence intervals
##   Method 2.5%-Intercept 97.5%-Intercept   2.5%-Slope  97.5%-Slope
## 1    OLS     -1.0326755      -0.8901313 -0.001911271  0.001329765
## 2     MA     -1.0179765      -0.9048297 -0.001911322  0.001329800
## 3    SMA     -0.8442479      -0.7311041 -0.006887743 -0.003646706
## 4    RMA     -1.2288696      -0.4261570 -0.015623042  0.007370892
## 
## Eigenvalues: 733.532 0.01836253 
## 
## H statistic used for computing C.I. of MA: 2.626211e-06
# RMA p-perm (1-tailed): 0.34

BC vs. MLD:

lmodel2(BC_umolC_L_log10_mean ~ mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y = "interval", range.x = "interval", nperm = 99)
## 
## Model II regression
## 
## Call: lmodel2(formula = BC_umolC_L_log10_mean ~
## mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y =
## "interval", range.x = "interval", nperm = 99)
## 
## n = 40   r = 0.05047749   r-square = 0.002547977 
## Parametric P-values:   2-tailed = 0.7570769    1-tailed = 0.3785384 
## Angle between the two OLS regression lines = 3.135568 degrees
## 
## Permutation tests of OLS, MA, RMA slopes: 1-tailed, tail corresponding to sign
## A permutation test of r is equivalent to a permutation test of the OLS slope
## P-perm for SMA = NA because the SMA slope cannot be tested
## 
## Regression results
##   Method  Intercept        Slope Angle (degrees) P-perm (1-tailed)
## 1    OLS -0.2610067 0.0001399376     0.008017831              0.41
## 2     MA -0.2610067 0.0001399386     0.008017893              0.41
## 3    SMA -0.3522699 0.0027722762     0.158839322                NA
## 4    RMA -0.2638718 0.0002225765     0.012752695              0.41
## 
## Confidence intervals
##   Method 2.5%-Intercept 97.5%-Intercept    2.5%-Slope 97.5%-Slope
## 1    OLS     -0.3009848      -0.2210286 -0.0007693176 0.001049193
## 2     MA     -0.2925309      -0.2294826 -0.0007693238 0.001049201
## 3    SMA     -0.3888313      -0.3257836  0.0020083227 0.003826833
## 4    RMA     -0.3176356      -0.2118458 -0.0012780279 0.001773307
## 
## Eigenvalues: 749.9232 0.005748861 
## 
## H statistic used for computing C.I. of MA: 8.267576e-07
# RMA p-perm (1-tailed): 0.4

BP vs. MLD:

lmodel2(BP_umolC_L_d_log10_mean ~ mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y = "interval", range.x = "interval", nperm = 99)
## 
## Model II regression
## 
## Call: lmodel2(formula = BP_umolC_L_d_log10_mean ~
## mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y =
## "interval", range.x = "interval", nperm = 99)
## 
## n = 39   r = -0.2676165   r-square = 0.0716186 
## Parametric P-values:   2-tailed = 0.09954016    1-tailed = 0.04977008 
## Angle between the two OLS regression lines = 0.8437079 degrees
## 
## Permutation tests of OLS, MA, RMA slopes: 1-tailed, tail corresponding to sign
## A permutation test of r is equivalent to a permutation test of the OLS slope
## P-perm for SMA = NA because the SMA slope cannot be tested
## 
## Regression results
##   Method Intercept        Slope Angle (degrees) P-perm (1-tailed)
## 1    OLS -1.293325 -0.001136078     -0.06509244              0.05
## 2     MA -1.293324 -0.001136097     -0.06509353              0.05
## 3    SMA -1.190358 -0.004245171     -0.24322893                NA
## 4    RMA -1.280262 -0.001530508     -0.08769157              0.05
## 
## Confidence intervals
##   Method 2.5%-Intercept 97.5%-Intercept   2.5%-Slope   97.5%-Slope
## 1    OLS      -1.350354       -1.236296 -0.002498583  0.0002264276
## 2     MA      -1.338449       -1.248200 -0.002498629  0.0002264305
## 3    SMA      -1.228418       -1.138171 -0.005820969 -0.0030959583
## 4    RMA      -1.339991       -1.215039 -0.003499938  0.0002729942
## 
## Eigenvalues: 672.3508 0.01124894 
## 
## H statistic used for computing C.I. of MA: 1.856478e-06
# RMA p-perm (1-tailed): 0.09

POC vs. MLD:

lmodel2(POC_umol_L_log10_mean ~ mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y = "interval", range.x = "interval", nperm = 99)
## 
## Model II regression
## 
## Call: lmodel2(formula = POC_umol_L_log10_mean ~
## mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y =
## "interval", range.x = "interval", nperm = 99)
## 
## n = 39   r = -0.3021468   r-square = 0.09129269 
## Parametric P-values:   2-tailed = 0.0615568    1-tailed = 0.0307784 
## Angle between the two OLS regression lines = 0.3222401 degrees
## 
## Permutation tests of OLS, MA, RMA slopes: 1-tailed, tail corresponding to sign
## A permutation test of r is equivalent to a permutation test of the OLS slope
## P-perm for SMA = NA because the SMA slope cannot be tested
## 
## Regression results
##   Method Intercept         Slope Angle (degrees) P-perm (1-tailed)
## 1    OLS 0.2500394 -0.0005650346     -0.03237409              0.02
## 2     MA 0.2500394 -0.0005650364     -0.03237420              0.02
## 3    SMA 0.2954879 -0.0018700664     -0.10714679                NA
## 4    RMA 0.2725182 -0.0012105019     -0.06935662              0.02
## 
## Confidence intervals
##   Method 2.5%-Intercept 97.5%-Intercept   2.5%-Slope   97.5%-Slope
## 1    OLS      0.2238649       0.2762139 -0.001158846  2.877729e-05
## 2     MA      0.2293595       0.2707194 -0.001158850  2.877728e-05
## 3    SMA      0.2780125       0.3193723 -0.002555893 -1.368269e-03
## 4    RMA      0.2332980       0.3383745 -0.003101531 -8.431634e-05
## 
## Eigenvalues: 749.329 0.002381279 
## 
## H statistic used for computing C.I. of MA: 3.526146e-07
# RMA p-perm (1-tailed): 0.02

Finally, DOC vs. MLD:

lmodel2(DOC_uM_C_log10_mean ~ mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y = "interval", range.x = "interval", nperm = 99)
## 
## Model II regression
## 
## Call: lmodel2(formula = DOC_uM_C_log10_mean ~
## mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y =
## "interval", range.x = "interval", nperm = 99)
## 
## n = 41   r = 0.2277968   r-square = 0.0518914 
## Parametric P-values:   2-tailed = 0.1520253    1-tailed = 0.07601263 
## Angle between the two OLS regression lines = 0.1158677 degrees
## 
## Permutation tests of OLS, MA, RMA slopes: 1-tailed, tail corresponding to sign
## A permutation test of r is equivalent to a permutation test of the OLS slope
## P-perm for SMA = NA because the SMA slope cannot be tested
## 
## Regression results
##   Method Intercept        Slope Angle (degrees) P-perm (1-tailed)
## 1    OLS  1.840913 0.0001106822     0.006341625              0.06
## 2     MA  1.840913 0.0001106823     0.006341627              0.06
## 3    SMA  1.827815 0.0004858814     0.027838949                NA
## 4    RMA  1.837626 0.0002048466     0.011736844              0.06
## 
## Confidence intervals
##   Method 2.5%-Intercept 97.5%-Intercept    2.5%-Slope  97.5%-Slope
## 1    OLS       1.834174        1.847652 -4.255224e-05 0.0002639167
## 2     MA       1.835564        1.846262 -4.255225e-05 0.0002639168
## 3    SMA       1.821642        1.832341  3.562373e-04 0.0006627063
## 4    RMA       1.825396        1.847154 -6.809853e-05 0.0005551816
## 
## Eigenvalues: 733.5319 0.0001641865 
## 
## H statistic used for computing C.I. of MA: 2.348082e-08
# RMA p-perm (1-tailed): 0.09