Cortisol Statistics Pt 1

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.0     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(rstatix)

## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:stats':
## 
##     filter

library(ggpubr)
library(lme4)

## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Registered S3 method overwritten by 'lme4':
##   method           from
##   na.action.merMod car

library(lmerTest)

## 
## Attaching package: 'lmerTest'
## 
## The following object is masked from 'package:lme4':
## 
##     lmer
## 
## The following object is masked from 'package:stats':
## 
##     step

library(dplyr)

data <- readxl::read_excel("CORT_Research.xlsx")

# Clean column names (remove spaces)
data_clean <- data %>%
  rename(
    Site = "Site",
    Temperature = "Temperature",
    pH = "pH",
    Turbidity = "Turbidity")

# Convert to factors
data_clean <- data_clean %>%
  mutate(
    Fish_ID = factor(Fish_ID),
    Site = factor(Site),
    Month = factor(Month, levels = c("May","June","July","August","September","October")),
    Sample_Time = factor(Sample_Time, levels = c("Initial","A","B"))
  )

data_clean <- data_clean %>%
  filter(!is.na(Cortisol))

data_swab <- data_clean %>%
  group_by(Fish_ID) %>%
  filter(all(c("Initial", "A", "B") %in% Sample_Time)) %>%  # ensures all 3 exist
  distinct(Fish_ID, Sample_Time, .keep_all = TRUE) %>%      # removes duplicates
  ungroup()

model_swab <- lmer(Cortisol ~ Sample_Time + (1 | Fish_ID), data = data_clean)

anova(model_swab)

## Type III Analysis of Variance Table with Satterthwaite's method
##              Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## Sample_Time 5676207 2838103     2 321.85  7.2839 0.0008057 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

library(emmeans)

## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'

emmeans(model_swab, pairwise ~ Sample_Time, adjust = "bonferroni")

## $emmeans
##  Sample_Time emmean   SE  df lower.CL upper.CL
##  Initial       1120 78.8 427      965     1275
##  A              850 62.5 311      727      973
##  B              836 66.0 340      706      966
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast    estimate   SE  df t.ratio p.value
##  Initial - A    270.5 79.4 327   3.408  0.0022
##  Initial - B    284.3 80.9 321   3.515  0.0015
##  A - B           13.8 67.1 305   0.206  1.0000
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: bonferroni method for 3 tests

ggplot(data_clean, aes(x = Sample_Time, y = Cortisol)) +
  geom_boxplot(fill = "lightblue") +
  geom_jitter(width = 0.15, alpha = 0.4) +
  stat_summary(fun = mean, geom = "point", size = 2, color = "pink") +
  labs(title = "Cortisol Across Sampling Times",
       x = "Swab Time",
       y = "Cortisol (pg/mL)") +
  theme_classic()

kruskal_test(Cortisol ~ Site, data = data_clean)

## # A tibble: 1 × 6
##   .y.          n statistic    df         p method        
## * <chr>    <int>     <dbl> <int>     <dbl> <chr>         
## 1 Cortisol   465      27.0     4 0.0000194 Kruskal-Wallis

data_clean %>%
  dunn_test(Cortisol ~ Site, p.adjust.method = "bonferroni")

## # A tibble: 10 × 9
##    .y.      group1     group2    n1    n2 statistic       p   p.adj p.adj.signif
##  * <chr>    <chr>      <chr>  <int> <int>     <dbl>   <dbl>   <dbl> <chr>       
##  1 Cortisol Clyde Riv… Dunk …   110    94     0.627 5.31e-1 1   e+0 ns          
##  2 Cortisol Clyde Riv… Pierr…   110    56    -1.01  3.12e-1 1   e+0 ns          
##  3 Cortisol Clyde Riv… West …   110    97    -0.649 5.16e-1 1   e+0 ns          
##  4 Cortisol Clyde Riv… Wilmo…   110   108    -4.19  2.73e-5 2.73e-4 ***         
##  5 Cortisol Dunk River Pierr…    94    56    -1.51  1.32e-1 1   e+0 ns          
##  6 Cortisol Dunk River West …    94    97    -1.23  2.18e-1 1   e+0 ns          
##  7 Cortisol Dunk River Wilmo…    94   108    -4.65  3.28e-6 3.28e-5 ****        
##  8 Cortisol Pierre Ja… West …    56    97     0.451 6.52e-1 1   e+0 ns          
##  9 Cortisol Pierre Ja… Wilmo…    56   108    -2.44  1.46e-2 1.46e-1 ns          
## 10 Cortisol West River Wilmo…    97   108    -3.42  6.36e-4 6.36e-3 **

ggplot(data_clean, aes(x = Site, y = Cortisol)) +
  geom_boxplot(fill = "lightblue") +
  geom_jitter(width = 0.2, alpha = 0.4) +
  labs(title = "Cortisol Across River Sites",
       x = "River Sites",
       y = "Cortisol (pg/mL)") +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

kruskal_test(Cortisol ~ Month, data = data_clean)

## # A tibble: 1 × 6
##   .y.          n statistic    df        p method        
## * <chr>    <int>     <dbl> <int>    <dbl> <chr>         
## 1 Cortisol   465      206.     5 1.75e-42 Kruskal-Wallis

data_clean %>%
  dunn_test(Cortisol ~ Month, p.adjust.method = "bonferroni")

## # A tibble: 15 × 9
##    .y.      group1   group2    n1    n2 statistic        p    p.adj p.adj.signif
##  * <chr>    <chr>    <chr>  <int> <int>     <dbl>    <dbl>    <dbl> <chr>       
##  1 Cortisol May      June      19    56    -5.39  7.24e- 8 1.09e- 6 ****        
##  2 Cortisol May      July      19    72    -0.469 6.39e- 1 1   e+ 0 ns          
##  3 Cortisol May      August    19   107     1.31  1.90e- 1 1   e+ 0 ns          
##  4 Cortisol May      Septe…    19   106     0.936 3.49e- 1 1   e+ 0 ns          
##  5 Cortisol May      Octob…    19   105    -4.19  2.83e- 5 4.24e- 4 ***         
##  6 Cortisol June     July      56    72     7.34  2.06e-13 3.09e-12 ****        
##  7 Cortisol June     August    56   107    10.6   1.83e-26 2.75e-25 ****        
##  8 Cortisol June     Septe…    56   106    10.1   7.85e-24 1.18e-22 ****        
##  9 Cortisol June     Octob…    56   105     2.33  1.97e- 2 2.96e- 1 ns          
## 10 Cortisol July     August    72   107     2.93  3.35e- 3 5.03e- 2 ns          
## 11 Cortisol July     Septe…    72   106     2.32  2.04e- 2 3.06e- 1 ns          
## 12 Cortisol July     Octob…    72   105    -6.03  1.63e- 9 2.44e- 8 ****        
## 13 Cortisol August   Septe…   107   106    -0.678 4.98e- 1 1   e+ 0 ns          
## 14 Cortisol August   Octob…   107   105    -9.97  2.00e-23 3.01e-22 ****        
## 15 Cortisol Septemb… Octob…   106   105    -9.27  1.78e-20 2.68e-19 ****

ggplot(data_clean, aes(x = Month, y = Cortisol)) +
  geom_boxplot(fill = "lightblue") +
  geom_jitter(width = 0.2, alpha = 0.4) +
  labs(title = "Seasonal Variation in Cortisol",
       x = "Month",
       y = "Cortisol (pg/mL)") +
  theme_classic()

model_env <- lmer(Cortisol ~ Temperature + DO + SPC + pH + Turbidity + (1|Site), data = data_clean)

summary(model_env)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Cortisol ~ Temperature + DO + SPC + pH + Turbidity + (1 | Site)
##    Data: data_clean
## 
## REML criterion at convergence: 7385.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.1948 -0.4604 -0.0972  0.2763 10.1440 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 237195   487.0   
##  Residual             571649   756.1   
## Number of obs: 461, groups:  Site, 5
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept) -8060.843   2383.866   451.489  -3.381 0.000784 ***
## Temperature    55.481     16.856   452.934   3.291 0.001075 ** 
## DO             75.973     11.190   453.868   6.790 3.53e-11 ***
## SPC             4.762      1.792    74.226   2.657 0.009650 ** 
## pH            -14.791    300.504   454.717  -0.049 0.960764    
## Turbidity      -2.604      2.700   453.423  -0.965 0.335255    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) Tmprtr DO     SPC    pH    
## Temperature -0.097                            
## DO          -0.171 -0.425                     
## SPC         -0.273 -0.082  0.270              
## pH          -0.860  0.226 -0.296 -0.064       
## Turbidity   -0.271 -0.061  0.044  0.075  0.238

anova(model_env)

## Type III Analysis of Variance Table with Satterthwaite's method
##               Sum Sq  Mean Sq NumDF  DenDF F value    Pr(>F)    
## Temperature  6193078  6193078     1 452.93 10.8337  0.001075 ** 
## DO          26351570 26351570     1 453.87 46.0974 3.535e-11 ***
## SPC          4035287  4035287     1  74.23  7.0590  0.009650 ** 
## pH              1385     1385     1 454.72  0.0024  0.960764    
## Turbidity     531904   531904     1 453.42  0.9305  0.335255    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Temperature example
ggplot(data_clean, aes(x = Temperature, y = Cortisol)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_classic() +
  labs(title = "Cortisol vs Temperature")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data_clean, aes(x = DO, y = Cortisol)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_classic() +
  labs(title = "Cortisol vs DO")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data_clean, aes(x = pH, y = Cortisol)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_classic() +
  labs(title = "Cortisol vs pH")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data_clean, aes(x = SPC, y = Cortisol)) +
  geom_point() +
  geom_smooth(method = "lm") +
  theme_classic() +
  labs(title = "Cortisol vs SPC")

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

data_clean <- data_clean %>%
  mutate(Species = factor(Fish_Species))

data_clean$Species <- factor(data_clean$Species,
                             levels = c("Brook Trout", "Rainbow Trout"))

wilcox_test(Cortisol ~ Species, data = data_clean)

## # A tibble: 1 × 7
##   .y.      group1      group2           n1    n2 statistic     p
## * <chr>    <chr>       <chr>         <int> <int>     <dbl> <dbl>
## 1 Cortisol Brook Trout Rainbow Trout   263   202    27188. 0.664

library(rstatix)

species_stats <- data_clean %>%
  wilcox_test(Cortisol ~ Species) %>%
  mutate(
    y.position = max(data_clean$Cortisol, na.rm = TRUE) * 1.1
  )

library(ggpubr)

ggplot(data_clean, aes(x = Species, y = Cortisol, fill = Species)) +
  geom_boxplot(alpha = 0.7) +
  geom_jitter(width = 0.15, alpha = 0.4) +
  labs(title = "Differences in Cortisol Between Trout Species",
       x = "Species",
       y = "Cortisol (pg/mL)") +
  theme_classic() +
  theme(legend.position = "none")

model_species <- lmer(Cortisol ~ Species + (1|Site) + (1|Fish_ID), data = data_clean)

anova(model_species)

## Type III Analysis of Variance Table with Satterthwaite's method
##         Sum Sq Mean Sq NumDF  DenDF F value Pr(>F)
## Species 182953  182953     1 171.33  0.4582 0.4994

emmeans::emmeans(model_species, pairwise ~ Species)

## $emmeans
##  Species       emmean  SE   df lower.CL upper.CL
##  Brook Trout      914 104 5.91      659     1170
##  Rainbow Trout    841 116 7.71      572     1109
## 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95 
## 
## $contrasts
##  contrast                    estimate  SE  df t.ratio p.value
##  Brook Trout - Rainbow Trout     73.9 111 166   0.666  0.5065
## 
## Degrees-of-freedom method: kenward-roger

p1 <- ggplot(data_clean, aes(x = Temperature, y = Cortisol)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm") +
  labs(title = "Temperature", x = "Temperature (°C)", y = "Cortisol") +
  theme_classic()

p2 <- ggplot(data_clean, aes(x = DO, y = Cortisol)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm") +
  labs(title = "Dissolved Oxygen", x = "DO (mg/L)", y = "Cortisol") +
  theme_classic()

p3 <- ggplot(data_clean, aes(x = SPC, y = Cortisol)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm") +
  labs(title = "Specific Conductivity", x = "SPC", y = "Cortisol") +
  theme_classic()

p4 <- ggplot(data_clean, aes(x = pH, y = Cortisol)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm")  +
  labs(title = "pH", x = "pH", y = "Cortisol") +
  theme_classic()

ggarrange(p1, p2, p3, p4,
          ncol = 2, nrow = 3,
          labels = c("A", "B", "C", "D"))

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 4 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 4 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 4 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggarrange(p1, p2, p3, p4,
          ncol = 2, nrow = 2,
          labels = c("A", "B", "C", "D"))

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_smooth()`).

## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 4 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 4 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

## `geom_smooth()` using formula = 'y ~ x'

## Warning: Removed 4 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

Cortisol Statistics Pt 1

2026-03-23