Blotching in Caribbean Reef Sharks

Author

Alice Louise

Initial Housekeeping

library(tidyverse)
Warning: package 'ggplot2' was built under R version 4.4.2
Warning: package 'lubridate' was built under R version 4.4.2
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(knitr)
Warning: package 'knitr' was built under R version 4.4.2
library(readxl)
Warning: package 'readxl' was built under R version 4.4.2
library(ggplot2)
library(tidyr)

________________________________________________________________________________________________________________

Assignment Context

A little background: The assignment is being submitted as a part of the ‘Research Methods and Data Analysis’ module for the MSc qualification in Endangered Species Recovery and Conservation at Nottingham Trent University. The data set provided was information based around the phenomenon of ‘blotching’ and whether this is a stress response caused by the capture and tagging of the Caribbean Reef Shark ( Carcharhinus perezi ).

________________________________________________________________________________________________________________

Air and Water

Question One: Is there a correlation between the variables of air temperature and water temperature?

Answer:

CodeR_Water_Air_ <- read_excel("C:/Users/alice/Downloads/Temp R File/CodeR - Water Air .xlsx", sheet = "Spearman")

ggplot(CodeR_Water_Air_, aes(x = Air)) + 
  geom_histogram(bins = 30,       
                 fill = "ivory3",    
                 color = "seashell3",     
                 alpha = 0.7,         
                 boundary = 0) +      
  labs(title = "Histogram of Air Temperature",
       x = "Air Temperature (°C)",   
       y = "Frequency") +           
  theme_minimal()                   

CodeR_Water_Air_ <- read_excel("C:/Users/alice/Downloads/Temp R File/CodeR - Water Air .xlsx", sheet = "Spearman")
ggplot(CodeR_Water_Air_, aes(x = Water)) + 
  geom_histogram(bins = 30,       
                 fill = "lightblue",    
                 color = "skyblue",     
                 alpha = 0.7,         
                 boundary = 0) +      
  labs(title = "Histogram of Water Temperature",
       x = "Water Temperature (°C)",   
       y = "Frequency") +           
  theme_minimal()                   

CodeR_Water_Air_ <- read_excel("C:/Users/alice/Downloads/Temp R File/CodeR - Water Air .xlsx", sheet = "Spearman")
cor.test(CodeR_Water_Air_$Air, CodeR_Water_Air_$Water, method = "spearman")

    Spearman's rank correlation rho

data:  CodeR_Water_Air_$Air and CodeR_Water_Air_$Water
S = 22007692, p-value = 0.2082
alternative hypothesis: true rho is not equal to 0
sample estimates:
        rho 
-0.05637344 
ggplot(CodeR_Water_Air_, aes(x = Air, y = Water)) +
  geom_point(color = "skyblue1", size = 2) +  
  geom_smooth(method = "lm", color = "slategray3", se = FALSE) +  
  labs(title = "Spearman Rank Correlation: Air vs Water Temperature", 
       x = "Air Temperature (°C)", y = "Water Temperature (°C)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

________________________________________________________________________________________________________________

Recapture Data

Question Two: Does being captured on a second occasion have an impact on the time it takes for blotching to occur?

Answer:

R_Code_Recapture_Time <- read_excel("C:/Users/alice/Downloads/Temp R File/R Code - Recapture Time.xlsx")

colnames(R_Code_Recapture_Time)
[1] "Initial Capture" "Re-Capture"     
initial_capture_data <- R_Code_Recapture_Time$`Initial Capture`
recapture_data <- R_Code_Recapture_Time$`Re-Capture`

shapiro_initial_capture <- shapiro.test(initial_capture_data)
shapiro_recapture <- shapiro.test(recapture_data)

cat("Shapiro-Wilk Test for Initial Capture Data:\n")
Shapiro-Wilk Test for Initial Capture Data:
print(shapiro_initial_capture)

    Shapiro-Wilk normality test

data:  initial_capture_data
W = 0.97958, p-value = 0.5345
cat("Shapiro-Wilk Test for Re-Capture Data:\n")
Shapiro-Wilk Test for Re-Capture Data:
print(shapiro_recapture)

    Shapiro-Wilk normality test

data:  recapture_data
W = 0.97936, p-value = 0.5255
ggplot(data.frame(initial_capture_data), aes(x = initial_capture_data)) +
  geom_histogram(binwidth = 0.2, fill = "thistle", color = "thistle4", alpha = 0.7) +
  labs(title = "Histogram showing how quickly blotching occured during initial capture", x = "Capture Time", y = "Frequency") +
  theme_minimal()

ggplot(data.frame(recapture_data), aes(x = recapture_data)) +
  geom_histogram(binwidth = 0.2, fill = "seagreen3", color = "mediumseagreen", alpha = 0.7) +
  labs(title = "Histogram showing how quickly blotching occured during re-capture", x = "Capture Time", y = "Frequency") +
  theme_minimal()

Using the above results, I can see that the data is parametric and will therefore run a Welch two sample t-test.

R_Code_Recapture_Time <- read_excel("C:/Users/alice/Downloads/Temp R File/R Code - Recapture Time.xlsx")

R_Code_Water_v_Air <- read_excel("C:\\Users\\alice\\Downloads\\Temp R File\\R Code - Recapture Time.xlsx")

t.test(R_Code_Recapture_Time$`Initial Capture`, R_Code_Recapture_Time$`Re-Capture`)

    Welch Two Sample t-test

data:  R_Code_Recapture_Time$`Initial Capture` and R_Code_Recapture_Time$`Re-Capture`
t = -4.1143, df = 97.658, p-value = 8.113e-05
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.3782038 -0.4812731
sample estimates:
mean of x mean of y 
 35.03042  35.96016 
data_long <- R_Code_Recapture_Time %>%
  pivot_longer(cols = c(`Initial Capture`, `Re-Capture`), 
               names_to = "capture_time", 
               values_to = "time")

ggplot(data_long, aes(x = capture_time, y = time, fill = capture_time)) + 
  geom_boxplot(color = "peachpuff4") +  
  labs(title = "Boxplot Comparison of Initial Capture and Re-Capture Times",
       x = "Capture Time",
       y = "Time") + 
  scale_fill_manual(values = c("peachpuff2", "navajowhite3")) +  
  theme_minimal() +  
  guides(fill = guide_legend(title = NULL))

________________________________________________________________________________________________________________

Predicting Blotching Time

Question Three: Is it possible to predict the time it takes for blotching to occur?

Answer: Predicting Blotching

R_Code_CSV_Shark_Data_Set <- read.csv("C:/Users/alice/Downloads/Temp R File/R Code - CSV Shark Data Set.csv")

colnames(R_Code_CSV_Shark_Data_Set)
[1] "sex"         "blotch.time" "BPM"         "weight"      "length"     
[6] "air.temp"    "water.temp"  "cortisol"    "depth"      
model <- lm(`blotch.time` ~ BPM + `weight` + `length` + 
            `air.temp` + `water.temp` + `cortisol` + 
            `depth` + `sex`, data = R_Code_CSV_Shark_Data_Set)
summary(model)

Call:
lm(formula = blotch.time ~ BPM + weight + length + air.temp + 
    water.temp + cortisol + depth + sex, data = R_Code_CSV_Shark_Data_Set)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.97715 -0.66193 -0.00841  0.64123  2.90395 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) 11.1179728  1.8749828   5.930 5.73e-09 ***
BPM         -0.0020791  0.0031540  -0.659  0.51009    
weight       0.0017281  0.0033143   0.521  0.60231    
length       0.0013042  0.0009606   1.358  0.17517    
air.temp    -0.0310068  0.0315302  -0.983  0.32590    
water.temp  -0.0143878  0.0268112  -0.537  0.59176    
cortisol    -0.0011610  0.0025671  -0.452  0.65127    
depth        0.5034077  0.0220870  22.792  < 2e-16 ***
sexMale      0.3088617  0.0890602   3.468  0.00057 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9912 on 491 degrees of freedom
Multiple R-squared:  0.5256,    Adjusted R-squared:  0.5178 
F-statistic: 67.99 on 8 and 491 DF,  p-value: < 2.2e-16
ggplot(R_Code_CSV_Shark_Data_Set, aes(x = depth, y = `blotch.time`)) + 
  geom_point(color = "thistle3", size = 3) +   
  geom_smooth(method = "lm", color = "ivory4", se = FALSE) +  
  labs(title = "Linear Regression of Blotch Time vs Depth",
       x = "Depth",
       y = "Blotch Time") + 
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ggplot(R_Code_CSV_Shark_Data_Set, aes(x = sex, y = `blotch.time`, fill = sex, color = sex)) + 
  geom_boxplot() + 
  labs(title = "Box Plot showing Blotch Time by Sex",
       x = "Sex",
       y = "Blotch Time") + 
  scale_fill_manual(values = c("pink", "lightskyblue")) +  
  scale_color_manual(values = c("lightpink1", "skyblue3")) +  
  theme_minimal()

________________________________________________________________________________________________________________