library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.0 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(rstatix)
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
library(ggpubr)
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Registered S3 method overwritten by 'lme4':
## method from
## na.action.merMod car
library(lmerTest)
##
## Attaching package: 'lmerTest'
##
## The following object is masked from 'package:lme4':
##
## lmer
##
## The following object is masked from 'package:stats':
##
## step
library(dplyr)
data <- readxl::read_excel("CORT_Research.xlsx")
# Clean column names (remove spaces)
data_clean <- data %>%
rename(
Site = "Site",
Temperature = "Temperature",
pH = "pH",
Turbidity = "Turbidity")
# Convert to factors
data_clean <- data_clean %>%
mutate(
Fish_ID = factor(Fish_ID),
Site = factor(Site),
Month = factor(Month, levels = c("May","June","July","August","September","October")),
Sample_Time = factor(Sample_Time, levels = c("Initial","A","B"))
)
data_clean <- data_clean %>%
filter(!is.na(Cortisol))
data_swab <- data_clean %>%
group_by(Fish_ID) %>%
filter(all(c("Initial", "A", "B") %in% Sample_Time)) %>% # ensures all 3 exist
distinct(Fish_ID, Sample_Time, .keep_all = TRUE) %>% # removes duplicates
ungroup()
model_swab <- lmer(Cortisol ~ Sample_Time + (1 | Fish_ID), data = data_clean)
anova(model_swab)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Sample_Time 5676207 2838103 2 321.85 7.2839 0.0008057 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
emmeans(model_swab, pairwise ~ Sample_Time, adjust = "bonferroni")
## $emmeans
## Sample_Time emmean SE df lower.CL upper.CL
## Initial 1120 78.8 427 965 1275
## A 850 62.5 311 727 973
## B 836 66.0 340 706 966
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## Initial - A 270.5 79.4 327 3.408 0.0022
## Initial - B 284.3 80.9 321 3.515 0.0015
## A - B 13.8 67.1 305 0.206 1.0000
##
## Degrees-of-freedom method: kenward-roger
## P value adjustment: bonferroni method for 3 tests
ggplot(data_clean, aes(x = Sample_Time, y = Cortisol)) +
geom_boxplot(fill = "lightblue") +
geom_jitter(width = 0.15, alpha = 0.4) +
stat_summary(fun = mean, geom = "point", size = 2, color = "pink") +
labs(title = "Cortisol Across Sampling Times",
x = "Swab Time",
y = "Cortisol (pg/mL)") +
theme_classic()

kruskal_test(Cortisol ~ Site, data = data_clean)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 Cortisol 465 27.0 4 0.0000194 Kruskal-Wallis
data_clean %>%
dunn_test(Cortisol ~ Site, p.adjust.method = "bonferroni")
## # A tibble: 10 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 Cortisol Clyde Riv… Dunk … 110 94 0.627 5.31e-1 1 e+0 ns
## 2 Cortisol Clyde Riv… Pierr… 110 56 -1.01 3.12e-1 1 e+0 ns
## 3 Cortisol Clyde Riv… West … 110 97 -0.649 5.16e-1 1 e+0 ns
## 4 Cortisol Clyde Riv… Wilmo… 110 108 -4.19 2.73e-5 2.73e-4 ***
## 5 Cortisol Dunk River Pierr… 94 56 -1.51 1.32e-1 1 e+0 ns
## 6 Cortisol Dunk River West … 94 97 -1.23 2.18e-1 1 e+0 ns
## 7 Cortisol Dunk River Wilmo… 94 108 -4.65 3.28e-6 3.28e-5 ****
## 8 Cortisol Pierre Ja… West … 56 97 0.451 6.52e-1 1 e+0 ns
## 9 Cortisol Pierre Ja… Wilmo… 56 108 -2.44 1.46e-2 1.46e-1 ns
## 10 Cortisol West River Wilmo… 97 108 -3.42 6.36e-4 6.36e-3 **
ggplot(data_clean, aes(x = Site, y = Cortisol)) +
geom_boxplot(fill = "lightblue") +
geom_jitter(width = 0.2, alpha = 0.4) +
labs(title = "Cortisol Across River Sites",
x = "River Sites",
y = "Cortisol (pg/mL)") +
theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

kruskal_test(Cortisol ~ Month, data = data_clean)
## # A tibble: 1 × 6
## .y. n statistic df p method
## * <chr> <int> <dbl> <int> <dbl> <chr>
## 1 Cortisol 465 206. 5 1.75e-42 Kruskal-Wallis
data_clean %>%
dunn_test(Cortisol ~ Month, p.adjust.method = "bonferroni")
## # A tibble: 15 × 9
## .y. group1 group2 n1 n2 statistic p p.adj p.adj.signif
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl> <dbl> <chr>
## 1 Cortisol May June 19 56 -5.39 7.24e- 8 1.09e- 6 ****
## 2 Cortisol May July 19 72 -0.469 6.39e- 1 1 e+ 0 ns
## 3 Cortisol May August 19 107 1.31 1.90e- 1 1 e+ 0 ns
## 4 Cortisol May Septe… 19 106 0.936 3.49e- 1 1 e+ 0 ns
## 5 Cortisol May Octob… 19 105 -4.19 2.83e- 5 4.24e- 4 ***
## 6 Cortisol June July 56 72 7.34 2.06e-13 3.09e-12 ****
## 7 Cortisol June August 56 107 10.6 1.83e-26 2.75e-25 ****
## 8 Cortisol June Septe… 56 106 10.1 7.85e-24 1.18e-22 ****
## 9 Cortisol June Octob… 56 105 2.33 1.97e- 2 2.96e- 1 ns
## 10 Cortisol July August 72 107 2.93 3.35e- 3 5.03e- 2 ns
## 11 Cortisol July Septe… 72 106 2.32 2.04e- 2 3.06e- 1 ns
## 12 Cortisol July Octob… 72 105 -6.03 1.63e- 9 2.44e- 8 ****
## 13 Cortisol August Septe… 107 106 -0.678 4.98e- 1 1 e+ 0 ns
## 14 Cortisol August Octob… 107 105 -9.97 2.00e-23 3.01e-22 ****
## 15 Cortisol Septemb… Octob… 106 105 -9.27 1.78e-20 2.68e-19 ****
ggplot(data_clean, aes(x = Month, y = Cortisol)) +
geom_boxplot(fill = "lightblue") +
geom_jitter(width = 0.2, alpha = 0.4) +
labs(title = "Seasonal Variation in Cortisol",
x = "Month",
y = "Cortisol (pg/mL)") +
theme_classic()

model_env <- lmer(Cortisol ~ Temperature + DO + SPC + pH + Turbidity + (1|Site), data = data_clean)
summary(model_env)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Cortisol ~ Temperature + DO + SPC + pH + Turbidity + (1 | Site)
## Data: data_clean
##
## REML criterion at convergence: 7385.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.1948 -0.4604 -0.0972 0.2763 10.1440
##
## Random effects:
## Groups Name Variance Std.Dev.
## Site (Intercept) 237195 487.0
## Residual 571649 756.1
## Number of obs: 461, groups: Site, 5
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -8060.843 2383.866 451.489 -3.381 0.000784 ***
## Temperature 55.481 16.856 452.934 3.291 0.001075 **
## DO 75.973 11.190 453.868 6.790 3.53e-11 ***
## SPC 4.762 1.792 74.226 2.657 0.009650 **
## pH -14.791 300.504 454.717 -0.049 0.960764
## Turbidity -2.604 2.700 453.423 -0.965 0.335255
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Tmprtr DO SPC pH
## Temperature -0.097
## DO -0.171 -0.425
## SPC -0.273 -0.082 0.270
## pH -0.860 0.226 -0.296 -0.064
## Turbidity -0.271 -0.061 0.044 0.075 0.238
anova(model_env)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Temperature 6193078 6193078 1 452.93 10.8337 0.001075 **
## DO 26351570 26351570 1 453.87 46.0974 3.535e-11 ***
## SPC 4035287 4035287 1 74.23 7.0590 0.009650 **
## pH 1385 1385 1 454.72 0.0024 0.960764
## Turbidity 531904 531904 1 453.42 0.9305 0.335255
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Temperature example
ggplot(data_clean, aes(x = Temperature, y = Cortisol)) +
geom_point() +
geom_smooth(method = "lm") +
theme_classic() +
labs(title = "Cortisol vs Temperature")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data_clean, aes(x = DO, y = Cortisol)) +
geom_point() +
geom_smooth(method = "lm") +
theme_classic() +
labs(title = "Cortisol vs DO")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data_clean, aes(x = pH, y = Cortisol)) +
geom_point() +
geom_smooth(method = "lm") +
theme_classic() +
labs(title = "Cortisol vs pH")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(data_clean, aes(x = SPC, y = Cortisol)) +
geom_point() +
geom_smooth(method = "lm") +
theme_classic() +
labs(title = "Cortisol vs SPC")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

data_clean <- data_clean %>%
mutate(Species = factor(Fish_Species))
data_clean$Species <- factor(data_clean$Species,
levels = c("Brook Trout", "Rainbow Trout"))
wilcox_test(Cortisol ~ Species, data = data_clean)
## # A tibble: 1 × 7
## .y. group1 group2 n1 n2 statistic p
## * <chr> <chr> <chr> <int> <int> <dbl> <dbl>
## 1 Cortisol Brook Trout Rainbow Trout 263 202 27188. 0.664
library(rstatix)
species_stats <- data_clean %>%
wilcox_test(Cortisol ~ Species) %>%
mutate(
y.position = max(data_clean$Cortisol, na.rm = TRUE) * 1.1
)
library(ggpubr)
ggplot(data_clean, aes(x = Species, y = Cortisol, fill = Species)) +
geom_boxplot(alpha = 0.7) +
geom_jitter(width = 0.15, alpha = 0.4) +
labs(title = "Differences in Cortisol Between Trout Species",
x = "Species",
y = "Cortisol (pg/mL)") +
theme_classic() +
theme(legend.position = "none")

model_species <- lmer(Cortisol ~ Species + (1|Site) + (1|Fish_ID), data = data_clean)
anova(model_species)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Species 182953 182953 1 171.33 0.4582 0.4994
emmeans::emmeans(model_species, pairwise ~ Species)
## $emmeans
## Species emmean SE df lower.CL upper.CL
## Brook Trout 914 104 5.91 659 1170
## Rainbow Trout 841 116 7.71 572 1109
##
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## Brook Trout - Rainbow Trout 73.9 111 166 0.666 0.5065
##
## Degrees-of-freedom method: kenward-roger
p1 <- ggplot(data_clean, aes(x = Temperature, y = Cortisol)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm") +
labs(title = "Temperature", x = "Temperature (°C)", y = "Cortisol") +
theme_classic()
p2 <- ggplot(data_clean, aes(x = DO, y = Cortisol)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm") +
labs(title = "Dissolved Oxygen", x = "DO (mg/L)", y = "Cortisol") +
theme_classic()
p3 <- ggplot(data_clean, aes(x = SPC, y = Cortisol)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm") +
labs(title = "Specific Conductivity", x = "SPC", y = "Cortisol") +
theme_classic()
p4 <- ggplot(data_clean, aes(x = pH, y = Cortisol)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm") +
labs(title = "pH", x = "pH", y = "Cortisol") +
theme_classic()
ggarrange(p1, p2, p3, p4,
ncol = 2, nrow = 3,
labels = c("A", "B", "C", "D"))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggarrange(p1, p2, p3, p4,
ncol = 2, nrow = 2,
labels = c("A", "B", "C", "D"))
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4 rows containing non-finite outside the scale range (`stat_smooth()`).
## Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
