Goal for this R code: Compile a data frame with the MLD (based on delta rho of 0.01) that Craig and Nick Calculated (9.10.2019) with the KM dataset.
Things to note: There are repeated stations (often for a bunch of 10m water) and I’m using the following to use the cast with the biogeochem suite of samples:
SS212 - use C02 SS216 - use C01 SS228 - use C01
(I’ve added ‘B’ and ‘C’ labels in the KM df to the casts that I am not using so that it is clear which cast was used)
Set working directory:
setwd("~/Desktop/km_working_folder/2019")
First, load the MLD dataset from Craig and Nick:
mld.p01 <- read_csv(file="KM_mlds_estimates_CC_2019_R.csv")
## Parsed with column specification:
## cols(
## STATION = col_character(),
## cast = col_double(),
## stat_numb_2019 = col_double(),
## `matlab serial time` = col_double(),
## Longitude = col_double(),
## Latitude = col_double(),
## mld_n2_boyancy_freq = col_double(),
## `b_avg_mld_n2_boyancy _freq` = col_double(),
## mld_p01_kgm3_from2pt5db = col_double(),
## b_ave_mld_p01_kgm3_from2pt5db = col_double(),
## MLD_MLD_p02_kgm3_from2pt5db = col_double(),
## b_ave_MLD_MLD_p02_kgm3_from2pt5db = col_double()
## )
Next, load the KM biogeochem data set:
biogeochem.data <- read_csv(file="KM14-16_biogeochem_data.v4.FINAL.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## CRUISE = col_character(),
## STATION = col_character(),
## TYPE = col_character(),
## `yyyy-mm-ddThh:mm` = col_character(),
## `DATE LOCAL` = col_character(),
## `TIME LOCAL` = col_time(format = ""),
## `fluor [ug/L]` = col_logical(),
## `NTURT [upoly]` = col_logical(),
## turbidity_NTU = col_logical(),
## VO_trans_volts = col_logical(),
## V3_turbidity_volts = col_logical(),
## `Silicate [uM]` = col_character(),
## `CALC BP:PP` = col_character(),
## `CALC BACT HNA:TOTAL [cells/mL]` = col_character(),
## mag_wind_stress_24hr = col_logical(),
## LOCATION = col_character(),
## near_bw_other = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 425 parsing failures.
## row col expected actual file
## 1200 STATION # no trailing characters B 'KM14-16_biogeochem_data.v4.FINAL.csv'
## 1201 STATION # no trailing characters B 'KM14-16_biogeochem_data.v4.FINAL.csv'
## 1202 STATION # no trailing characters B 'KM14-16_biogeochem_data.v4.FINAL.csv'
## 1203 STATION # no trailing characters B 'KM14-16_biogeochem_data.v4.FINAL.csv'
## 1204 STATION # no trailing characters B 'KM14-16_biogeochem_data.v4.FINAL.csv'
## .... ......... ...................... ...... ......................................
## See problems(...) for more details.
# Remove -999 and set to Nan:
# biogeochem.data <- as.data.frame(na_if(biogeochem.data, -999))
Now, merge the 2 dfs by station
mld.biogeochem <- merge(mld.p01, biogeochem.data, by = "STATION")
Need to calculate conversions of interest and add to merged df:
# First, calculate conversions
# NPP from mgC_m-3_d-1 to µmolC_L-1_d-1
mld.biogeochem$NPP_umolC_L_d <- with(mld.biogeochem, Primary_Production_mg_m3_day / 12.011)
# BP from pmol_L-1_hr-1 to umolC_L-1_d-1
mld.biogeochem$BP_umolC_L_d <- with(mld.biogeochem, (BP_pmol_L_hr * 1.5 * 24)/(12.011*1000))
# BC from yield corrected bacterial abundance (cells/mL)
mld.biogeochem$BC_umolC_L <- with(mld.biogeochem, (YIELD_CORR_BACT_cells_mL * 10/(1000000*12.011)))
# Next, calculate log(10) transformations of NPP, Chl, BP, BC, POC, and DOC
mld.biogeochem$NPP_umolC_L_d_log10 <- with(mld.biogeochem,log10(NPP_umolC_L_d))
mld.biogeochem$Chla_ug_L_log10 <- with(mld.biogeochem,log10(Chla_ug_L))
mld.biogeochem$BC_umolC_L_log10 <- with(mld.biogeochem,log10(BC_umolC_L))
mld.biogeochem$BP_umolC_L_d_log10 <- with(mld.biogeochem,log10(BP_umolC_L_d))
## Warning in eval(substitute(expr), data, enclos = parent.frame()): NaNs
## produced
mld.biogeochem$POC_umol_L_log10 <- with(mld.biogeochem,log10(POC_umol_L))
mld.biogeochem$DOC_uM_C_log10 <- with(mld.biogeochem,log10(DOC_uM_C))
Add depth bin identifiers in a column called depth_bin
to merged df:
depth_bin IDs: upper_euphotic is 0-75m, lower_euphotic is 75-150m, and upper_mesopelagic is 150-300m
Note, here: I’ve gone back-and-forth regarding how to id specific depth bins to avoid using the same data for multiple depth bins, but also to include the data bw our identified depth bins (e.g. if you say <75 m, essentially this excludes depths between 50 and 75 bc 50 is the closest value less than 75). BUT: doesn’t really matter bc the correlations I will run will be in the upper euphotic and upper mesopelagic, and I’ve sent these to <= to include as much data as possible.
mld.biogeochem <- mld.biogeochem %>%
mutate(depth_bin = case_when(NOM_DEPTH_m <= 75 ~ "upper_euphotic",
NOM_DEPTH_m > 75 & NOM_DEPTH_m < 150 ~ "lower_euphotic",
NOM_DEPTH_m >= 150 & NOM_DEPTH_m <= 300 ~ "upper_mesopelagic"))
Calculate means and sds by STATION
and depth_bin
:
means.sd <- as.data.frame(mld.biogeochem %>%
group_by(STATION, depth_bin) %>%
summarise_at(vars("NPP_umolC_L_d", "Chla_ug_L", "BC_umolC_L", "BP_umolC_L_d", "POC_umol_L", "DOC_uM_C", "NPP_umolC_L_d_log10", "Chla_ug_L_log10", "BC_umolC_L_log10", "BP_umolC_L_d_log10", "POC_umol_L_log10", "DOC_uM_C_log10", "mld_p01_kgm3_from2pt5db"), funs(mean, sd), na.rm=TRUE))
Remove NA rows from depth_bin
:
means.sd <- filter(means.sd, depth_bin != "NA")
Write to csv and save to working directory:
write.csv(means.sd, "means.sd.mld.p01.biogeochem.9.2019.csv") # write results to csv and save in working directory
Do this using Model II Regression code developed by Legendre 2018 (https://cran.r-project.org/web/packages/lmodel2/lmodel2.pdf)
lmodel2(formula, data = NULL, range.y=NULL, range.x=NULL, nperm=0) HAVE TO REMOVE INF VALUES
control shift m for %>%
To get at the question: do pulses of key biogeochem variables in the upper euphotic zone correlate with MLD at those stations, compare log(10) transformed npp, chl, bc, bp, poc, and doc to log(10) tranformed mlds.
Isolate upper_euphotic means
upper.euphotic <- filter(means.sd, depth_bin == "upper_euphotic")
Make -Inf values NA
upper.euphotic[ upper.euphotic == "-Inf" ] <- NA
Run regression model
NPP vs MLD:
lmodel2(NPP_umolC_L_d_log10_mean ~ mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y = "interval", range.x = "interval", nperm = 99)
##
## Model II regression
##
## Call: lmodel2(formula = NPP_umolC_L_d_log10_mean ~
## mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y =
## "interval", range.x = "interval", nperm = 99)
##
## n = 35 r = -0.500935 r-square = 0.2509359
## Parametric P-values: 2-tailed = 0.002175979 1-tailed = 0.001087989
## Angle between the two OLS regression lines = 0.7053418 degrees
##
## Permutation tests of OLS, MA, RMA slopes: 1-tailed, tail corresponding to sign
## A permutation test of r is equivalent to a permutation test of the OLS slope
## P-perm for SMA = NA because the SMA slope cannot be tested
##
## Regression results
## Method Intercept Slope Angle (degrees) P-perm (1-tailed)
## 1 OLS -0.6626691 -0.004124509 -0.2363156 0.03
## 2 MA -0.6626625 -0.004124718 -0.2363276 0.03
## 3 SMA -0.5333025 -0.008233620 -0.4717410 NA
## 4 RMA -0.5985303 -0.006161768 -0.3530388 0.03
##
## Confidence intervals
## Method 2.5%-Intercept 97.5%-Intercept 2.5%-Slope 97.5%-Slope
## 1 OLS -0.7659117 -0.5594264 -0.006648305 -0.001600712
## 2 MA -0.7421223 -0.5832010 -0.006648677 -0.001600812
## 3 SMA -0.6008546 -0.4419419 -0.011135535 -0.006087942
## 4 RMA -0.7058310 -0.4549259 -0.010723121 -0.002753543
##
## Eigenvalues: 702.3464 0.03566458
##
## H statistic used for computing C.I. of MA: 6.369978e-06
# RMA p-perm (1-tailed): 0.01
Chl vs. MLD:
lmodel2(Chla_ug_L_log10_mean ~ mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y = "interval", range.x = "interval", nperm = 99)
##
## Model II regression
##
## Call: lmodel2(formula = Chla_ug_L_log10_mean ~
## mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y =
## "interval", range.x = "interval", nperm = 99)
##
## n = 41 r = -0.05801434 r-square = 0.003365663
## Parametric P-values: 2-tailed = 0.7186312 1-tailed = 0.3593156
## Angle between the two OLS regression lines = 4.920752 degrees
##
## Permutation tests of OLS, MA, RMA slopes: 1-tailed, tail corresponding to sign
## A permutation test of r is equivalent to a permutation test of the OLS slope
## P-perm for SMA = NA because the SMA slope cannot be tested
##
## Regression results
## Method Intercept Slope Angle (degrees) P-perm (1-tailed)
## 1 OLS -0.9614034 -0.0002907530 -0.01665892 0.34
## 2 MA -0.9614032 -0.0002907603 -0.01665934 0.34
## 3 SMA -0.7965948 -0.0050117434 -0.28714934 NA
## 4 RMA -0.9382952 -0.0009526958 -0.05458543 0.34
##
## Confidence intervals
## Method 2.5%-Intercept 97.5%-Intercept 2.5%-Slope 97.5%-Slope
## 1 OLS -1.0326755 -0.8901313 -0.001911271 0.001329765
## 2 MA -1.0179765 -0.9048297 -0.001911322 0.001329800
## 3 SMA -0.8442479 -0.7311041 -0.006887743 -0.003646706
## 4 RMA -1.2288696 -0.4261570 -0.015623042 0.007370892
##
## Eigenvalues: 733.532 0.01836253
##
## H statistic used for computing C.I. of MA: 2.626211e-06
# RMA p-perm (1-tailed): 0.34
BC vs. MLD:
lmodel2(BC_umolC_L_log10_mean ~ mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y = "interval", range.x = "interval", nperm = 99)
##
## Model II regression
##
## Call: lmodel2(formula = BC_umolC_L_log10_mean ~
## mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y =
## "interval", range.x = "interval", nperm = 99)
##
## n = 40 r = 0.05047749 r-square = 0.002547977
## Parametric P-values: 2-tailed = 0.7570769 1-tailed = 0.3785384
## Angle between the two OLS regression lines = 3.135568 degrees
##
## Permutation tests of OLS, MA, RMA slopes: 1-tailed, tail corresponding to sign
## A permutation test of r is equivalent to a permutation test of the OLS slope
## P-perm for SMA = NA because the SMA slope cannot be tested
##
## Regression results
## Method Intercept Slope Angle (degrees) P-perm (1-tailed)
## 1 OLS -0.2610067 0.0001399376 0.008017831 0.41
## 2 MA -0.2610067 0.0001399386 0.008017893 0.41
## 3 SMA -0.3522699 0.0027722762 0.158839322 NA
## 4 RMA -0.2638718 0.0002225765 0.012752695 0.41
##
## Confidence intervals
## Method 2.5%-Intercept 97.5%-Intercept 2.5%-Slope 97.5%-Slope
## 1 OLS -0.3009848 -0.2210286 -0.0007693176 0.001049193
## 2 MA -0.2925309 -0.2294826 -0.0007693238 0.001049201
## 3 SMA -0.3888313 -0.3257836 0.0020083227 0.003826833
## 4 RMA -0.3176356 -0.2118458 -0.0012780279 0.001773307
##
## Eigenvalues: 749.9232 0.005748861
##
## H statistic used for computing C.I. of MA: 8.267576e-07
# RMA p-perm (1-tailed): 0.4
BP vs. MLD:
lmodel2(BP_umolC_L_d_log10_mean ~ mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y = "interval", range.x = "interval", nperm = 99)
##
## Model II regression
##
## Call: lmodel2(formula = BP_umolC_L_d_log10_mean ~
## mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y =
## "interval", range.x = "interval", nperm = 99)
##
## n = 39 r = -0.2676165 r-square = 0.0716186
## Parametric P-values: 2-tailed = 0.09954016 1-tailed = 0.04977008
## Angle between the two OLS regression lines = 0.8437079 degrees
##
## Permutation tests of OLS, MA, RMA slopes: 1-tailed, tail corresponding to sign
## A permutation test of r is equivalent to a permutation test of the OLS slope
## P-perm for SMA = NA because the SMA slope cannot be tested
##
## Regression results
## Method Intercept Slope Angle (degrees) P-perm (1-tailed)
## 1 OLS -1.293325 -0.001136078 -0.06509244 0.05
## 2 MA -1.293324 -0.001136097 -0.06509353 0.05
## 3 SMA -1.190358 -0.004245171 -0.24322893 NA
## 4 RMA -1.280262 -0.001530508 -0.08769157 0.05
##
## Confidence intervals
## Method 2.5%-Intercept 97.5%-Intercept 2.5%-Slope 97.5%-Slope
## 1 OLS -1.350354 -1.236296 -0.002498583 0.0002264276
## 2 MA -1.338449 -1.248200 -0.002498629 0.0002264305
## 3 SMA -1.228418 -1.138171 -0.005820969 -0.0030959583
## 4 RMA -1.339991 -1.215039 -0.003499938 0.0002729942
##
## Eigenvalues: 672.3508 0.01124894
##
## H statistic used for computing C.I. of MA: 1.856478e-06
# RMA p-perm (1-tailed): 0.09
POC vs. MLD:
lmodel2(POC_umol_L_log10_mean ~ mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y = "interval", range.x = "interval", nperm = 99)
##
## Model II regression
##
## Call: lmodel2(formula = POC_umol_L_log10_mean ~
## mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y =
## "interval", range.x = "interval", nperm = 99)
##
## n = 39 r = -0.3021468 r-square = 0.09129269
## Parametric P-values: 2-tailed = 0.0615568 1-tailed = 0.0307784
## Angle between the two OLS regression lines = 0.3222401 degrees
##
## Permutation tests of OLS, MA, RMA slopes: 1-tailed, tail corresponding to sign
## A permutation test of r is equivalent to a permutation test of the OLS slope
## P-perm for SMA = NA because the SMA slope cannot be tested
##
## Regression results
## Method Intercept Slope Angle (degrees) P-perm (1-tailed)
## 1 OLS 0.2500394 -0.0005650346 -0.03237409 0.02
## 2 MA 0.2500394 -0.0005650364 -0.03237420 0.02
## 3 SMA 0.2954879 -0.0018700664 -0.10714679 NA
## 4 RMA 0.2725182 -0.0012105019 -0.06935662 0.02
##
## Confidence intervals
## Method 2.5%-Intercept 97.5%-Intercept 2.5%-Slope 97.5%-Slope
## 1 OLS 0.2238649 0.2762139 -0.001158846 2.877729e-05
## 2 MA 0.2293595 0.2707194 -0.001158850 2.877728e-05
## 3 SMA 0.2780125 0.3193723 -0.002555893 -1.368269e-03
## 4 RMA 0.2332980 0.3383745 -0.003101531 -8.431634e-05
##
## Eigenvalues: 749.329 0.002381279
##
## H statistic used for computing C.I. of MA: 3.526146e-07
# RMA p-perm (1-tailed): 0.02
Finally, DOC vs. MLD:
lmodel2(DOC_uM_C_log10_mean ~ mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y = "interval", range.x = "interval", nperm = 99)
##
## Model II regression
##
## Call: lmodel2(formula = DOC_uM_C_log10_mean ~
## mld_p01_kgm3_from2pt5db_mean, data = upper.euphotic, range.y =
## "interval", range.x = "interval", nperm = 99)
##
## n = 41 r = 0.2277968 r-square = 0.0518914
## Parametric P-values: 2-tailed = 0.1520253 1-tailed = 0.07601263
## Angle between the two OLS regression lines = 0.1158677 degrees
##
## Permutation tests of OLS, MA, RMA slopes: 1-tailed, tail corresponding to sign
## A permutation test of r is equivalent to a permutation test of the OLS slope
## P-perm for SMA = NA because the SMA slope cannot be tested
##
## Regression results
## Method Intercept Slope Angle (degrees) P-perm (1-tailed)
## 1 OLS 1.840913 0.0001106822 0.006341625 0.06
## 2 MA 1.840913 0.0001106823 0.006341627 0.06
## 3 SMA 1.827815 0.0004858814 0.027838949 NA
## 4 RMA 1.837626 0.0002048466 0.011736844 0.06
##
## Confidence intervals
## Method 2.5%-Intercept 97.5%-Intercept 2.5%-Slope 97.5%-Slope
## 1 OLS 1.834174 1.847652 -4.255224e-05 0.0002639167
## 2 MA 1.835564 1.846262 -4.255225e-05 0.0002639168
## 3 SMA 1.821642 1.832341 3.562373e-04 0.0006627063
## 4 RMA 1.825396 1.847154 -6.809853e-05 0.0005551816
##
## Eigenvalues: 733.5319 0.0001641865
##
## H statistic used for computing C.I. of MA: 2.348082e-08
# RMA p-perm (1-tailed): 0.09