Import data, calculate seacarb pco2 values

Import raw bottle data. Remove all samples whose replicate deltas was >5 and/or had visiual precipitaiton of calcium carbonate in bottles. Send the remaining coupled DIC/TA samples through Seacarb.. append column to raw bottle data indicating if the calculated pco2 was greater than 1000, 2000 or 5000 (yes or no)

# Read the original raw data
forseacarb_raw <- read.csv('data2/bottle_raw.csv', stringsAsFactors = TRUE)


# Filter out samples where delta > 5, precip == "yes", and missing DIC
forseacarb_filtered <- forseacarb_raw %>%
  filter(!delta >= 5) %>%
  filter(!precip == "yes") %>%
  filter(!is.na(DIC) & DIC != "")

# Run seacarb to compute carbonate chemistry
seacarb.output.all = carb(
  flag = 15,
  var1 = forseacarb_filtered$Avg.TA / 10^6,  # Convert TA from umol/kg to mol/kg
  var2 = forseacarb_filtered$DIC / 10^6,  # Convert DIC from umol/kg to mol/kg
  S = forseacarb_filtered$Salinity,  # Salinity (psu)
  T = forseacarb_filtered$temp,  # Temperature (°C)
  P = forseacarb_filtered$dbar / 10  # Pressure (bar)
)

# Add pCO2 results to the filtered dataframe
forseacarb_filtered <- forseacarb_filtered %>%
  mutate(
    pCO2 = seacarb.output.all$pCO2,
    pco1000 = ifelse(pCO2 > 1000, "yes", "no"),
    pco2000 = ifelse(pCO2 > 2000, "yes", "no"),
    pco5000 = ifelse(pCO2 > 5000, "yes", "no")
  )

# Extract only the columns needed for merging
pco2_data <- forseacarb_filtered %>%
  dplyr::select(sample_id, pCO2, pco1000, pco2000, pco5000)

# Merge pCO2 columns back into the original dataset (including missing dic rows)
bottle_all <- forseacarb_raw %>%
  filter(!delta >= 5)%>%
  filter(!precip == "yes")%>%
  left_join(pco2_data, by = "sample_id")

Create filtered bottle sample data sets

Make filtered datasets to calculate average TA for each coral. So now we have:
bottle_all <- no filtering based on pco2 values
bottle_5000 <- anything higher than 5000 removed
bottle_2000 <- anything higher than 2000 removed
bottle_1000 <- anything higher than 1000 removed
bottle_feb16 <- no filtering based on pco2, but samples before feb 16 removed. Feb 19th being the date that initial bw and LE measurements taken.

bottle_1000 <- bottle_all %>%
  filter(pco1000 != 'yes' | is.na(pco1000))

bottle_2000 <- bottle_all %>%
  filter(pco2000 != 'yes' | is.na(pco2000))

bottle_5000 <- bottle_all %>%
  filter(pco5000 != 'yes' | is.na(pco5000))

bottle_feb16 <- bottle_all %>%
  mutate(Date = as.Date(Date, format = "%m/%d/%Y")) %>%  # Ensure proper conversion
  filter(Date >= as.Date("2024-02-16"))%>%
  filter(pco1000 != "yes"| is.na(pco1000))

Calculate coral TA stats

Generate mean, sd, sem TA for each coral based on filtered bottle data.

#Calculate the average ta and standard deviation and standard error for each coral across all dates
ta_coral_all <- bottle_all %>%
  group_by(Coral) %>%
  summarise(
     n = sum(!is.na(Avg.TA)),  # Count of non-NA values
    avg_ta = mean(Avg.TA, na.rm = TRUE),
    ta_sd = sd(Avg.TA, na.rm = TRUE),
    ta_se = sd(Avg.TA, na.rm = TRUE) / sqrt(n())
  )

#filtering out pco2 >1000
ta_coral_1000 <- bottle_1000 %>%
  group_by(Coral) %>%
  summarise(
     n = sum(!is.na(Avg.TA)),  # Count of non-NA values
    avg_ta = mean(Avg.TA, na.rm = TRUE),
    ta_sd = sd(Avg.TA, na.rm = TRUE),
    ta_se = sd(Avg.TA, na.rm = TRUE) / sqrt(n())
  )

ta_coral_2000 <- bottle_2000 %>%
  group_by(Coral) %>%
  summarise(
    n = sum(!is.na(Avg.TA)),  # Count of non-NA values
    avg_ta = mean(Avg.TA, na.rm = TRUE),
    ta_sd = sd(Avg.TA, na.rm = TRUE),
    ta_se = sd(Avg.TA, na.rm = TRUE) / sqrt(n())
  )

ta_coral_5000 <- bottle_5000 %>%
  group_by(Coral) %>%
  summarise(
    n = sum(!is.na(Avg.TA)),  # Count of non-NA values
    avg_ta = mean(Avg.TA, na.rm = TRUE),
    ta_sd = sd(Avg.TA, na.rm = TRUE),
    ta_se = sd(Avg.TA, na.rm = TRUE) / sqrt(n())
  )

ta_coral_feb16 <- bottle_feb16 %>%
  group_by(Coral) %>%
  summarise(
    n = sum(!is.na(Avg.TA)),  # Count of non-NA values
    avg_ta = mean(Avg.TA, na.rm = TRUE),
    ta_sd = sd(Avg.TA, na.rm = TRUE),
    ta_se = sd(Avg.TA, na.rm = TRUE) / sqrt(n())
  )

Growth dataframes

Import ta stats from above filtered data sets into growth dataframes. Create five based on different filtering of chem data.
growth_all
growth_5000
growth_2000
growth_1000
growth_feb16

Alkalinity~LinearExtension models/regression

All bottle data

T1-T2

linear model, stats:

#LE mixed effect linear model t1-t3
lmer_all_t1t2 <- lmerTest::lmer(le_t1t2e ~ avg_ta_mmol + (1|geno) + (1|tank), data = growth_all)
summary(lmer_all_t1t2)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: le_t1t2e ~ avg_ta_mmol + (1 | geno) + (1 | tank)
##    Data: growth_all
## 
## REML criterion at convergence: -108.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.0752 -0.5686 -0.1306  0.5990  2.5116 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  tank     (Intercept) 2.659e-21 5.156e-11
##  geno     (Intercept) 6.012e-04 2.452e-02
##  Residual             2.596e-03 5.095e-02
## Number of obs: 40, groups:  tank, 4; geno, 3
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)   
## (Intercept)  0.010414   0.040460 30.359327   0.257   0.7986   
## avg_ta_mmol  0.030729   0.009936 36.334860   3.093   0.0038 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## avg_ta_mmol -0.914
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

step(lmer_all_t1t2)

## Backward reduced random-effect table:
## 
##            Eliminated npar logLik      AIC    LRT Df Pr(>Chisq)  
## <none>                   5 54.349  -98.698                       
## (1 | tank)          1    4 54.349 -100.698 0.0000  1    1.00000  
## (1 | geno)          0    3 52.626  -99.251 3.4469  1    0.06337 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Backward reduced fixed-effect table:
## Degrees of freedom method: Satterthwaite 
## 
##             Eliminated   Sum Sq  Mean Sq NumDF  DenDF F value   Pr(>F)   
## avg_ta_mmol          0 0.024828 0.024828     1 36.335  9.5638 0.003802 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Model found:
## le_t1t2e ~ avg_ta_mmol + (1 | geno)

if you don’t consider 0.06 significant?.. no genotype effect, simplest linear model:

lm_all_t1t2 <- lm(le_t1t2e ~ avg_ta_mmol, data = growth_all)
summary(lm_all_t1t2)

## 
## Call:
## lm(formula = le_t1t2e ~ avg_ta_mmol, data = growth_all)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.099648 -0.040722 -0.000967  0.035276  0.128585 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.01754    0.04077   0.430  0.66944   
## avg_ta_mmol  0.02915    0.01074   2.715  0.00991 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05527 on 38 degrees of freedom
## Multiple R-squared:  0.1625, Adjusted R-squared:  0.1405 
## F-statistic: 7.373 on 1 and 38 DF,  p-value: 0.009907

sl_t1t2 <- coef(lm_all_t1t2)["avg_ta_mmol"]
intercept_t1t2 <- coef(lm_all_t1t2)[1]  # Intercept
summary_model_t1t2 <- summary(lm_all_t1t2)
r_squared_t1t2 <- summary(lm_all_t1t2)$r.squared
pvalue_t1t2 <- summary(lm_all_t1t2)$coefficients[2, 4]

T2 - T3

For simplicities sake, have not shown linear mixed models/linear model output for the rest of the analysis, but p value, slope and Rsquared for regression analysis will be on the plots