Shark Blotching Analysis

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

# loading the files dataset
library(readxl)
file_path <- "/Users/jamiegriffiths/Desktop/sharks.xlsx"
sharks <- read_excel(file_path)
# loading the necessary packages
options(repos = c(CRAN = "https://cran.rstudio.com/"))
install.packages("tidyverse")

The downloaded binary packages are in
    /var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//RtmpHGfiv1/downloaded_packages
install.packages("ggplot2")

The downloaded binary packages are in
    /var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//RtmpHGfiv1/downloaded_packages
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
install.packages(c("FactoMineR", "factoextra")) 

The downloaded binary packages are in
    /var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//RtmpHGfiv1/downloaded_packages
library(FactoMineR)
library(factoextra)
Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
install.packages("ggrepel")

The downloaded binary packages are in
    /var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//RtmpHGfiv1/downloaded_packages
library(ggrepel)
install.packages("plotly")

The downloaded binary packages are in
    /var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//RtmpHGfiv1/downloaded_packages
library(plotly)

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
install.packages("effsize")

The downloaded binary packages are in
    /var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//RtmpHGfiv1/downloaded_packages
library(effsize)
install.packages("interactions")

The downloaded binary packages are in
    /var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//RtmpHGfiv1/downloaded_packages
library(interactions)
# Viewing the dataset 
head(sharks) # displaying the first few rows of the dataset 
# A tibble: 6 × 10
  ID    sex    blotch   BPM weight length   air water  meta depth
  <chr> <chr>   <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 SH001 Female   37.2   148   74.7   187.  37.7  23.4  64.1  53.2
2 SH002 Female   34.5   158   73.4   189.  35.7  21.4  73.7  49.6
3 SH003 Female   36.3   125   71.8   284.  34.8  20.1  54.4  49.4
4 SH004 Male     35.3   161  105.    171.  36.2  21.6  86.3  50.3
5 SH005 Female   37.4   138   67.1   264.  33.6  21.8 108.   49.0
6 SH006 Male     33.5   126  110.    270.  36.4  20.9 109.   46.8
tail(sharks) # displaying the last few rows of the dataset
# A tibble: 6 × 10
  ID    sex    blotch   BPM weight length   air water  meta depth
  <chr> <chr>   <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 SH495 Female   34.9   165  104.    149.  33.4  21.1  59.2  47.3
2 SH496 Male     34.6   150  109.    257.  34.9  23.7  60.4  48.5
3 SH497 Male     32.1   127   97.3   264.  33.4  24.9  54.1  45.4
4 SH498 Female   37.1   151   88.9   142.  35.9  22.0  82.4  53.1
5 SH499 Male     35.3   155   78.1   148.  33.3  22.1  99.2  52.1
6 SH500 Female   35.3   143  105.    159.  36.1  24.6  87.1  49.3
summary(sharks) # this provides values that are of potential importance such as min, max, mean and median
      ID                sex                blotch           BPM       
 Length:500         Length:500         Min.   :30.78   Min.   :119.0  
 Class :character   Class :character   1st Qu.:34.16   1st Qu.:129.0  
 Mode  :character   Mode  :character   Median :35.05   Median :142.0  
                                       Mean   :35.13   Mean   :141.8  
                                       3rd Qu.:36.05   3rd Qu.:153.2  
                                       Max.   :40.08   Max.   :166.0  
     weight           length           air            water      
 Min.   : 65.10   Min.   :128.3   Min.   :33.00   Min.   :20.01  
 1st Qu.: 75.68   1st Qu.:172.0   1st Qu.:34.42   1st Qu.:21.55  
 Median : 87.82   Median :211.1   Median :35.43   Median :23.11  
 Mean   : 87.94   Mean   :211.0   Mean   :35.54   Mean   :23.02  
 3rd Qu.:100.40   3rd Qu.:251.8   3rd Qu.:36.71   3rd Qu.:24.37  
 Max.   :110.94   Max.   :291.0   Max.   :38.00   Max.   :25.99  
      meta            depth      
 Min.   : 50.03   Min.   :44.64  
 1st Qu.: 67.39   1st Qu.:48.90  
 Median : 82.45   Median :50.14  
 Mean   : 82.04   Mean   :50.14  
 3rd Qu.: 95.97   3rd Qu.:51.35  
 Max.   :112.45   Max.   :56.83  
str(sharks) # this shows how the dataset is structured 
tibble [500 × 10] (S3: tbl_df/tbl/data.frame)
 $ ID    : chr [1:500] "SH001" "SH002" "SH003" "SH004" ...
 $ sex   : chr [1:500] "Female" "Female" "Female" "Male" ...
 $ blotch: num [1:500] 37.2 34.5 36.3 35.3 37.4 ...
 $ BPM   : num [1:500] 148 158 125 161 138 126 166 135 132 127 ...
 $ weight: num [1:500] 74.7 73.4 71.8 104.6 67.1 ...
 $ length: num [1:500] 187 189 284 171 264 ...
 $ air   : num [1:500] 37.7 35.7 34.8 36.2 33.6 ...
 $ water : num [1:500] 23.4 21.4 20.1 21.6 21.8 ...
 $ meta  : num [1:500] 64.1 73.7 54.4 86.3 108 ...
 $ depth : num [1:500] 53.2 49.6 49.4 50.3 49 ...
colSums(is.na(sharks)) # this identifies any potential missing values
    ID    sex blotch    BPM weight length    air  water   meta  depth 
     0      0      0      0      0      0      0      0      0      0 

Analysing the data set and identifying if blotching time can be predicted

#  Data Analysis of Blotching Time and Each Variable 

ggplot(sharks, aes(x = water, y = blotch, colour = sex)) +
  geom_point(size = 2) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  labs(title = "Blotching Time vs. Water Temperature", x = "Water (°C)", y = "Blotch (seconds)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ggplot(sharks, aes(x = sex, y = water, fill = sex)) +
  geom_boxplot() +
  labs(title = "Water Temperature by sex", x = "Sex", y = "Water temperature (°C)")

ggplot(sharks, aes(x = meta, y = blotch, color = sex)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  labs(title = "Blotching Time vs Cortisol Levels", x = "Meta (mcg/dl)", y = "Blotch (seconds)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ggplot(sharks, aes(x = air, y = blotch, color = sex)) +
  geom_point(size = 2) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  labs(title = "Blotching Time vs Air Temperature", y = "Blotch (seconds)", x = "Air (°C)") + 
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ggplot(sharks, aes(x = sex, y = blotch, fill = sex)) +
  geom_boxplot() +
  labs(title = "Blotching Time by Sex", x = "Sex", y = "Blotch (seconds)") +
  theme_minimal()

ggplot(sharks, aes(x = weight, y = blotch, color = sex)) +
  geom_point(size = 2) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  labs(title = "Blotching Time vs Weight", x = "Weight (Kg)", y = "Blotch (seconds)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ggplot(sharks, aes(x = sex, y = weight, fill = sex)) +
  geom_boxplot() +
  labs(title = "Weight by Sex", x = "Sex", y = "Weight (Kg)") +
  theme_minimal()

ggplot(sharks, aes(x = depth, y = blotch, colour = sex)) +
  geom_point(size = 2) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  labs(title = "Depth vs Blotching time", x = "Depth (Metres)", y = "Blotch (seconds)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ggplot(sharks, aes(x = sex, y = depth, fill = sex)) +
  geom_boxplot() +
  labs(title = "Depth by Sex", x = "Sex", y = "Depth (Metres)") +
  theme_minimal() 

ggplot(sharks, aes(x = BPM, y = blotch, color = sex)) + 
  geom_point(size = 2) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  labs(title = "Beats Per Minute (BPM) vs Blotching Time", x = "BPM", y = "Blotch (Seconds)") + 
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ggplot(sharks, aes(x = length, y = blotch, color = sex)) +
  geom_point(size = 2) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  labs(title = "Length of Shark vs Blotching Time", x = "Length (cm)", y = "Blotch (seconds)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

blotching.lm <- lm(blotch ~ length + BPM + meta + weight + depth + water + air + sex, data = sharks)
summary(blotching.lm)

Call:
lm(formula = blotch ~ length + BPM + meta + weight + depth + 
    water + air + sex, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.97715 -0.66193 -0.00841  0.64123  2.90395 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) 11.1179728  1.8749828   5.930 5.73e-09 ***
length       0.0013042  0.0009606   1.358  0.17517    
BPM         -0.0020791  0.0031540  -0.659  0.51009    
meta        -0.0011610  0.0025671  -0.452  0.65127    
weight       0.0017281  0.0033143   0.521  0.60231    
depth        0.5034077  0.0220870  22.792  < 2e-16 ***
water       -0.0143878  0.0268112  -0.537  0.59176    
air         -0.0310068  0.0315302  -0.983  0.32590    
sexMale      0.3088617  0.0890602   3.468  0.00057 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9912 on 491 degrees of freedom
Multiple R-squared:  0.5256,    Adjusted R-squared:  0.5178 
F-statistic: 67.99 on 8 and 491 DF,  p-value: < 2.2e-16
blotching.interaction1 <- lm(blotch ~ depth * length, data = sharks)
summary(blotching.interaction1)

Call:
lm(formula = blotch ~ depth * length, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.8201 -0.6498 -0.0215  0.6367  2.7734 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)  11.3192910  5.1583732   2.194   0.0287 *  
depth         0.4692827  0.1025259   4.577 5.96e-06 ***
length       -0.0076817  0.0237878  -0.323   0.7469    
depth:length  0.0001795  0.0004734   0.379   0.7047    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1 on 496 degrees of freedom
Multiple R-squared:  0.5121,    Adjusted R-squared:  0.5092 
F-statistic: 173.6 on 3 and 496 DF,  p-value: < 2.2e-16
blotching.interation2 <- lm(blotch ~ depth * water, data = sharks)
summary(blotching.interation2)

Call:
lm(formula = blotch ~ depth * water, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.84048 -0.64050  0.01152  0.60871  2.80332 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)
(Intercept) 12.936735  15.454846   0.837    0.403
depth        0.451254   0.308615   1.462    0.144
water       -0.134408   0.672499  -0.200    0.842
depth:water  0.002303   0.013432   0.171    0.864

Residual standard error: 1.002 on 496 degrees of freedom
Multiple R-squared:  0.5107,    Adjusted R-squared:  0.5077 
F-statistic: 172.5 on 3 and 496 DF,  p-value: < 2.2e-16
blotching.interaction3 <- lm(blotch ~ depth * meta, data = sharks)
summary(blotching.interaction3)

Call:
lm(formula = blotch ~ depth * meta, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.79278 -0.66090  0.00394  0.59395  2.79845 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.068423   5.503900  -0.194   0.8462    
depth        0.723949   0.109707   6.599 1.07e-10 ***
meta         0.132362   0.065555   2.019   0.0440 *  
depth:meta  -0.002665   0.001307  -2.040   0.0419 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9977 on 496 degrees of freedom
Multiple R-squared:  0.5144,    Adjusted R-squared:  0.5115 
F-statistic: 175.2 on 3 and 496 DF,  p-value: < 2.2e-16
blotching.interaction4 <- lm(blotch ~ depth * air, data = sharks)
summary(blotching.interaction4)

Call:
lm(formula = blotch ~ depth * air, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.86509 -0.66483  0.00611  0.60406  2.84002 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)   
(Intercept) -52.10931   27.84438  -1.871  0.06187 . 
depth         1.76230    0.55568   3.171  0.00161 **
air           1.75187    0.78678   2.227  0.02642 * 
depth:air    -0.03557    0.01570  -2.265  0.02392 * 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9961 on 496 degrees of freedom
Multiple R-squared:  0.516, Adjusted R-squared:  0.513 
F-statistic: 176.2 on 3 and 496 DF,  p-value: < 2.2e-16
blotching.interaction5 <- lm(blotch ~ depth * weight, data = sharks)
summary(blotching.interaction5)

Call:
lm(formula = blotch ~ depth * weight, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.84521 -0.65358  0.00054  0.63162  2.80736 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept)  19.458715   7.580224   2.567   0.0105 *
depth         0.310076   0.150965   2.054   0.0405 *
weight       -0.108956   0.084754  -1.286   0.1992  
depth:weight  0.002200   0.001688   1.304   0.1930  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1 on 496 degrees of freedom
Multiple R-squared:  0.512, Adjusted R-squared:  0.509 
F-statistic: 173.4 on 3 and 496 DF,  p-value: < 2.2e-16
blotching.interaction6 <- lm(blotch ~ depth * BPM, data = sharks)
summary(blotching.interaction6)

Call:
lm(formula = blotch ~ depth * BPM, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.77396 -0.64941  0.00761  0.60376  2.82741 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)   
(Intercept)  2.899456  11.555496   0.251  0.80198   
depth        0.648798   0.230571   2.814  0.00509 **
BPM          0.048914   0.081163   0.603  0.54701   
depth:BPM   -0.001018   0.001620  -0.629  0.52979   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.001 on 496 degrees of freedom
Multiple R-squared:  0.5109,    Adjusted R-squared:  0.508 
F-statistic: 172.7 on 3 and 496 DF,  p-value: < 2.2e-16
blotching.interaction7 <- lm(blotch ~ depth * sex, data = sharks)
summary(blotching.interaction7)

Call:
lm(formula = blotch ~ depth * sex, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.9614 -0.6388 -0.0179  0.5981  2.9815 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)   10.14057    1.58486   6.398 3.64e-10 ***
depth          0.49510    0.03164  15.650  < 2e-16 ***
sexMale       -0.33657    2.20540  -0.153    0.879    
depth:sexMale  0.01277    0.04396   0.291    0.771    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9904 on 496 degrees of freedom
Multiple R-squared:  0.5215,    Adjusted R-squared:  0.5186 
F-statistic: 180.2 on 3 and 496 DF,  p-value: < 2.2e-16
depth_model <- lm(blotch ~ depth, data = sharks)
summary(depth_model)

Call:
lm(formula = blotch ~ depth, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.81869 -0.65427 -0.01035  0.58825  2.83116 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  9.82178    1.11207   8.832   <2e-16 ***
depth        0.50467    0.02216  22.772   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1 on 498 degrees of freedom
Multiple R-squared:  0.5101,    Adjusted R-squared:  0.5091 
F-statistic: 518.6 on 1 and 498 DF,  p-value: < 2.2e-16
confint(depth_model)
                2.5 %     97.5 %
(Intercept) 7.6368581 12.0067017
depth       0.4611309  0.5482156
anova(depth_model)
Analysis of Variance Table

Response: blotch
           Df Sum Sq Mean Sq F value    Pr(>F)    
depth       1 518.70   518.7  518.57 < 2.2e-16 ***
Residuals 498 498.12     1.0                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
sex_model <- lm(blotch ~ sex, data = sharks)
summary(sex_model)

Call:
lm(formula = blotch ~ sex, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.1471 -0.9653 -0.0222  0.9889  4.7772 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 34.92294    0.09217 378.885  < 2e-16 ***
sexMale      0.38347    0.12685   3.023  0.00263 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.416 on 498 degrees of freedom
Multiple R-squared:  0.01802,   Adjusted R-squared:  0.01605 
F-statistic: 9.139 on 1 and 498 DF,  p-value: 0.002632
anova(sex_model)
Analysis of Variance Table

Response: blotch
           Df Sum Sq Mean Sq F value   Pr(>F)   
sex         1  18.32  18.323  9.1387 0.002632 **
Residuals 498 998.50   2.005                    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
blotching.multi.interaction <- lm(blotch ~ depth * air * meta, data = sharks)
summary(blotching.multi.interaction)

Call:
lm(formula = blotch ~ depth * air * meta, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.83374 -0.66661  0.00818  0.61764  2.80251 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)  
(Intercept)    -2.643e+02  1.449e+02  -1.824   0.0687 .
depth           6.017e+00  2.889e+00   2.082   0.0378 *
air             7.476e+00  4.112e+00   1.818   0.0697 .
meta            2.558e+00  1.696e+00   1.508   0.1322  
depth:air      -1.503e-01  8.199e-02  -1.833   0.0674 .
depth:meta     -5.126e-02  3.380e-02  -1.517   0.1300  
air:meta       -6.895e-02  4.811e-02  -1.433   0.1524  
depth:air:meta  1.381e-03  9.584e-04   1.441   0.1501  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9939 on 492 degrees of freedom
Multiple R-squared:  0.522, Adjusted R-squared:  0.5152 
F-statistic: 76.76 on 7 and 492 DF,  p-value: < 2.2e-16
pca_data <- sharks[, !(names(sharks) %in% c("ID", "sex"))]
pca <- PCA(pca_data, scale.unit = TRUE, quanti.sup = which(names(pca_data)== "blotch"), graph = FALSE)
blotch_coords <- pca$quanti.sup$coord[1, 1:2]
pca_plot <- fviz_pca_var(pca, col.var = "contrib", repel = TRUE, labelsize = 5) +
  geom_point(data = data.frame(x = blotch_coords[1],
                               y = blotch_coords[2]),
             aes(x = x, y = y), color = "red", size = 3) +
  geom_text_repel(data = data.frame(x = blotch_coords[1], y = blotch_coords[2]),
                  aes(x = x, y = y, label = "blotch"), color = "red", size = 5) +
  ggtitle("Variable Contributions to Blotching Time") +
  theme_minimal()
print(pca_plot)

blotching.multi.interaction1 <- lm(blotch ~ depth * air + depth * meta, data = sharks)
summary(blotching.multi.interaction1)

Call:
lm(formula = blotch ~ depth * air + depth * meta, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.84274 -0.66770  0.02015  0.61320  2.82711 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -60.121062  28.066377  -2.142 0.032673 *  
depth         1.924737   0.560299   3.435 0.000642 ***
air           1.681381   0.787225   2.136 0.033184 *  
meta          0.127681   0.065506   1.949 0.051842 .  
depth:air    -0.034186   0.015706  -2.177 0.029980 *  
depth:meta   -0.002570   0.001305  -1.969 0.049485 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.994 on 494 degrees of freedom
Multiple R-squared:   0.52, Adjusted R-squared:  0.5151 
F-statistic:   107 on 5 and 494 DF,  p-value: < 2.2e-16
blotching.multi.interaction2 <- lm(blotch ~ depth * sex + air * sex, data = sharks)
summary(blotching.multi.interaction2)

Call:
lm(formula = blotch ~ depth * sex + air * sex, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.93824 -0.65805 -0.01876  0.60904  2.82219 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)   13.06581    2.21240   5.906 6.54e-09 ***
depth          0.49819    0.03162  15.753  < 2e-16 ***
sexMale       -3.80126    3.13609  -1.212   0.2261    
air           -0.08678    0.04587  -1.892   0.0591 .  
depth:sexMale  0.01041    0.04397   0.237   0.8129    
sexMale:air    0.10092    0.06237   1.618   0.1063    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9888 on 494 degrees of freedom
Multiple R-squared:  0.525, Adjusted R-squared:  0.5202 
F-statistic: 109.2 on 5 and 494 DF,  p-value: < 2.2e-16
ggplot(sharks, aes(x = sex, y = air, fill = sex)) + geom_boxplot() +
  labs(title = "Air Teperature by Sex", x = "Sex", y = "Air (°C)") +
  theme_minimal()

sharks %>%
  group_by(sex) %>%
  summarise(mean_air = mean(air), sd_air = sd(air))
# A tibble: 2 × 3
  sex    mean_air sd_air
  <chr>     <dbl>  <dbl>
1 Female     35.5   1.41
2 Male       35.6   1.45
t.test(air ~ sex, data = sharks)

    Welch Two Sample t-test

data:  air by sex
t = -0.71294, df = 494.41, p-value = 0.4762
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -0.3421426  0.1599525
sample estimates:
mean in group Female   mean in group Male 
            35.48716             35.57826 
lm_air <- lm(air ~ sex, data = sharks)
summary(lm_air)

Call:
lm(formula = air ~ sex, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.57371 -1.13870 -0.06008  1.17279  2.50850 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 35.48716    0.09299 381.634   <2e-16 ***
sexMale      0.09110    0.12797   0.712    0.477    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.428 on 498 degrees of freedom
Multiple R-squared:  0.001016,  Adjusted R-squared:  -0.0009895 
F-statistic: 0.5067 on 1 and 498 DF,  p-value: 0.4769
model_full <- lm(blotch ~ depth + sex + air, data = sharks)
summary(model_full)

Call:
lm(formula = blotch ~ depth + sex + air, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-3.01554 -0.63019 -0.01043  0.62608  2.92694 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 10.98152    1.56709   7.008 7.95e-12 ***
depth        0.50142    0.02194  22.850  < 2e-16 ***
sexMale      0.30681    0.08875   3.457 0.000593 ***
air         -0.03260    0.03104  -1.050 0.294079    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9894 on 496 degrees of freedom
Multiple R-squared:  0.5225,    Adjusted R-squared:  0.5196 
F-statistic: 180.9 on 3 and 496 DF,  p-value: < 2.2e-16
ggplot(sharks, aes(x = depth, y = blotch, color = air, group = as.factor(air))) +
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x, se = FALSE) +
  labs(title = "Interaction Between Depth and Air on Blotching Time",
       x = "Depth (meters)",
       y = "Blotching Time (seconds)",
       color = "Air (°C)") +
  theme_minimal()

ggplot(sharks, aes(x = depth, y = blotch, color = sex)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  scale_color_manual(values = c("Female" = "lightblue", "Male" = "red")) +
  labs(title = "Interaction Between Depth and Sex on Blotching Time",
       x = "Depth (meters)",
       y = "Blotching Time (seconds)",
       color = "Sex") +
  theme_minimal() +
  facet_wrap(~sex)
`geom_smooth()` using formula = 'y ~ x'

ggplot(sharks, aes(x = depth, y = blotch, color = sex, group = sex)) +
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x * meta, se = FALSE) + 
  scale_color_manual(values = c("Female" = "lightblue", "Male" = "red")) +
  labs(title = "Interaction Between Depth and sex on Blotching Time",
       x = "Depth (meters)",
       y = "Blotching Time (seconds)",
       color = "sex") +
  theme_minimal()
Warning: Failed to fit group 1.
Caused by error:
! object 'meta' not found
Warning: Failed to fit group 2.
Caused by error:
! object 'meta' not found

ggplot(sharks, aes(x = depth, y = blotch, color = meta, group = meta)) +
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x * meta, se = FALSE) + 
  scale_color_gradient(low = "lightblue", high = "red") +
  labs(title = "Interaction Between Depth and Meta on Blotching Time",
       x = "Depth (meters)",
       y = "Blotching Time (seconds)",
       color = "Meta (mcg/dl)") +
  theme_minimal()

AIC(blotching.interaction3, blotching.interaction4, blotching.interaction7, blotching.multi.interaction, blotching.multi.interaction1)
                             df      AIC
blotching.interaction3        5 1422.642
blotching.interaction4        5 1421.045
blotching.interaction7        5 1415.313
blotching.multi.interaction   9 1422.772
blotching.multi.interaction1  7 1420.912
BIC(blotching.interaction3, blotching.interaction4, blotching.interaction7, blotching.multi.interaction, blotching.multi.interaction1) 
                             df      BIC
blotching.interaction3        5 1443.715
blotching.interaction4        5 1442.118
blotching.interaction7        5 1436.386
blotching.multi.interaction   9 1460.703
blotching.multi.interaction1  7 1450.414
ggplot(sharks, aes(x = depth, y = air, color = blotch, size = meta)) +
  geom_point(alpha = 0.7) +
  scale_color_gradient(low = "lightblue", high = "red") +
  scale_size(range = c(1, 4)) +
  labs(title = "Interaction of Depth x Air x Meta",
    x = "Depth",
    y = "Air",
    color = "Blotching Time",
    size = "Meta"
  ) +
  theme_minimal()

plot_ly(sharks,
        x = ~depth,
        y = ~air,
        z = ~blotch,
        type = "scatter3d",
        mode = "markers",
        marker = list(
          size = 3,
          color = ~blotch,
          colorscale = "viridis",
          colorbar = list(title = "Blotch Time")
        )) %>%
  layout(scene = list(
    xaxis = list(title = "Depth"),
    yaxis = list(title = "Air"),
    zaxis = list(title = "Blotch"),
    camera = list(
      eye = list(x = 1.5, y = 1.5, z = 0.7)
    )
  ))
data.predict <- data.frame(depth = c (45,50, 55), air = c(34, 36, 38))
predict(blotching.interaction4, data.predict, interval = "confidence")
       fit      lwr      upr
1 32.33199 31.99694 32.66703
2 35.04269 34.95046 35.13492
3 37.04195 36.55451 37.52940
data.predict2 <- data.frame(depth = c (45, 50, 55), sex = c("Male"))
predict(blotching.interaction7, data.predict2, interval = "confidence")
       fit      lwr      upr
1 32.65853 32.32377 32.99330
2 35.19793 35.07748 35.31838
3 37.73732 37.42634 38.04830
data.predictF <- data.frame(depth = c (45, 50, 55), sex = c("Female"))
predict(blotching.interaction7, data.predictF, interval = "confidence")
       fit      lwr      upr
1 32.42029 32.08152 32.75906
2 34.89581 34.76910 35.02253
3 37.37134 37.03888 37.70380
plot_ly(sharks,
        x = ~depth,
        y = ~sex,
        z = ~blotch,
        type = "scatter3d",
        mode = "markers",
        marker = list(
          size = 3,
          color = ~blotch,
          colorscale = "viridis",
          colorbar = list(title = "Blotch Time")
        )) %>%
  layout(scene = list(
    xaxis = list(title = "Depth"),
    yaxis = list(title = "Sex"),
    zaxis = list(title = "Blotch"),
    camera = list(
      eye = list(x = 1.5, y = 1.5, z = 0.7)
    )
  ))
data.predict3 <- data.frame(
  depth = c(45, 50, 55),
  sex = c("Male", "Female", "Male"))
predict(blotching.interaction7, data.predict3, interval = "confidence")
       fit      lwr      upr
1 32.65853 32.32377 32.99330
2 34.89581 34.76910 35.02253
3 37.73732 37.42634 38.04830
sharks$predicted_blotch <- predict(depth_model, newdata = sharks) # creates a new column in the dataset of predicted blotching times
head(sharks) # view the first 6 rows of the data to look at the new row created 
# A tibble: 6 × 11
  ID    sex    blotch   BPM weight length   air water  meta depth
  <chr> <chr>   <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 SH001 Female   37.2   148   74.7   187.  37.7  23.4  64.1  53.2
2 SH002 Female   34.5   158   73.4   189.  35.7  21.4  73.7  49.6
3 SH003 Female   36.3   125   71.8   284.  34.8  20.1  54.4  49.4
4 SH004 Male     35.3   161  105.    171.  36.2  21.6  86.3  50.3
5 SH005 Female   37.4   138   67.1   264.  33.6  21.8 108.   49.0
6 SH006 Male     33.5   126  110.    270.  36.4  20.9 109.   46.8
# ℹ 1 more variable: predicted_blotch <dbl>
ggplot(sharks, aes(x = depth, y = blotch)) + 
  geom_point(color = "blue", alpha = 0.6, size = 2) +
  geom_line(aes(y = predicted_blotch), color = "red", size = 1) +
  labs(title = "Observed vs Predicted Blotching Time", 
       x = "Depth (meters)",
       y = "Blotching Time (seconds)") +
  theme_minimal()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

cohen.d(sharks$blotch ~ sharks$sex)

Cohen's d

d estimate: -0.2708124 (small)
95 percent confidence interval:
      lower       upper 
-0.44762256 -0.09400216 
model_poly <- lm(blotch ~ poly(depth, 2) + sex, data = sharks)
summary(model_poly)

Call:
lm(formula = blotch ~ poly(depth, 2) + sex, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.93971 -0.65116 -0.01413  0.60168  3.00451 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     34.96448    0.06447 542.309  < 2e-16 ***
poly(depth, 2)1 22.64130    0.99076  22.852  < 2e-16 ***
poly(depth, 2)2  0.72019    0.99011   0.727 0.467335    
sexMale          0.30478    0.08877   3.434 0.000646 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.99 on 496 degrees of freedom
Multiple R-squared:  0.5219,    Adjusted R-squared:  0.519 
F-statistic: 180.5 on 3 and 496 DF,  p-value: < 2.2e-16
model_male <- lm(blotch ~ depth, data = subset(sharks, sex == "Male"))
model_female <- lm(blotch ~ depth, data = subset(sharks, sex == "Female"))
summary(model_male)

Call:
lm(formula = blotch ~ depth, data = subset(sharks, sex == "Male"))

Residuals:
     Min       1Q   Median       3Q      Max 
-2.96144 -0.69614 -0.04989  0.58563  2.61361 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  9.80400    1.57398   6.229 1.86e-09 ***
depth        0.50788    0.03132  16.215  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.016 on 262 degrees of freedom
Multiple R-squared:  0.5009,    Adjusted R-squared:  0.499 
F-statistic: 262.9 on 1 and 262 DF,  p-value: < 2.2e-16
summary(model_female)

Call:
lm(formula = blotch ~ depth, data = subset(sharks, sex == "Female"))

Residuals:
     Min       1Q   Median       3Q      Max 
-2.51461 -0.57180  0.02172  0.60537  2.98152 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 10.14057    1.53683   6.598 2.75e-10 ***
depth        0.49510    0.03068  16.139  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9604 on 234 degrees of freedom
Multiple R-squared:  0.5268,    Adjusted R-squared:  0.5247 
F-statistic: 260.5 on 1 and 234 DF,  p-value: < 2.2e-16
interaction_test <- lm(blotch ~ depth * sex, data = sharks)
summary(interaction_test)

Call:
lm(formula = blotch ~ depth * sex, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.9614 -0.6388 -0.0179  0.5981  2.9815 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)   10.14057    1.58486   6.398 3.64e-10 ***
depth          0.49510    0.03164  15.650  < 2e-16 ***
sexMale       -0.33657    2.20540  -0.153    0.879    
depth:sexMale  0.01277    0.04396   0.291    0.771    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.9904 on 496 degrees of freedom
Multiple R-squared:  0.5215,    Adjusted R-squared:  0.5186 
F-statistic: 180.2 on 3 and 496 DF,  p-value: < 2.2e-16
par(mfrow = c(2, 2))
plot(blotching.interaction7)

par(mfrow = c(2, 2))
plot(model_male)

plot(model_female)

Assessing if there is a correlation between the variables air and water?

ggplot(sharks, aes(x = air, y = water)) +
  geom_point(size = 2, color = "red") +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  labs(title = "Air Temperature vs Water Temperature", x = "Air Temperature (°C)", y = "Water Temperature (°C)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ggplot(sharks, aes(x = air, y = water)) +
  geom_point(size = 2, color = "red") +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  labs(title = "Air Temperature vs Water Temperature by Sex", 
       x = "Air Temperature (°C)", 
       y = "Water Temperature (°C)") +
  theme_minimal() +
  facet_wrap(~sex)
`geom_smooth()` using formula = 'y ~ x'

correlation_test <- cor.test(sharks$air, sharks$water, method = "pearson")
print(correlation_test)

    Pearson's product-moment correlation

data:  sharks$air and sharks$water
t = -1.2346, df = 498, p-value = 0.2176
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.14224207  0.03260803
sample estimates:
        cor 
-0.05524051 
model <- lm(water ~ air, data = sharks)
summary(model)

Call:
lm(formula = water ~ air, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-3.03472 -1.47563  0.09925  1.38700  3.06356 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 25.31781    1.86221  13.596   <2e-16 ***
air         -0.06465    0.05236  -1.235    0.218    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.67 on 498 degrees of freedom
Multiple R-squared:  0.003052,  Adjusted R-squared:  0.00105 
F-statistic: 1.524 on 1 and 498 DF,  p-value: 0.2176

investigating if multiple capture can have an effect on blotching time?

file_path <- "/Users/jamiegriffiths/Desktop/sharksub.xlsx"
sharksub <- read_excel(file_path)
head(sharksub) # this displays the first few rows of the dataset 
# A tibble: 6 × 4
  ID    sex    blotch1 blotch2
  <chr> <chr>    <dbl>   <dbl>
1 SH269 Female    36.1    37.2
2 SH163 Female    33.4    34.4
3 SH008 Female    36.3    36.5
4 SH239 Female    35.0    36.0
5 SH332 Female    35.7    36.8
6 SH328 Female    34.9    35.9
tail(sharksub) # this displays the last few rows of the dataset 
# A tibble: 6 × 4
  ID    sex   blotch1 blotch2
  <chr> <chr>   <dbl>   <dbl>
1 SH260 Male     36.4    37.5
2 SH357 Male     33.5    34.5
3 SH199 Male     37.1    38.2
4 SH413 Male     34.3    35.3
5 SH004 Male     35.3    35.6
6 SH307 Male     34.5    34.1
summary(sharksub) # this provides values that have potential importance such as min, max, mean and median
      ID                sex               blotch1         blotch2     
 Length:50          Length:50          Min.   :32.49   Min.   :33.47  
 Class :character   Class :character   1st Qu.:34.38   1st Qu.:35.31  
 Mode  :character   Mode  :character   Median :34.94   Median :35.94  
                                       Mean   :35.03   Mean   :35.96  
                                       3rd Qu.:35.90   3rd Qu.:36.78  
                                       Max.   :37.07   Max.   :38.18  
str(sharksub) # this shows how the dataset is structured 
tibble [50 × 4] (S3: tbl_df/tbl/data.frame)
 $ ID     : chr [1:50] "SH269" "SH163" "SH008" "SH239" ...
 $ sex    : chr [1:50] "Female" "Female" "Female" "Female" ...
 $ blotch1: num [1:50] 36.1 33.4 36.3 35 35.7 ...
 $ blotch2: num [1:50] 37.2 34.4 36.5 36 36.8 ...
colSums(is.na(sharksub)) # this identifies any potential missing values
     ID     sex blotch1 blotch2 
      0       0       0       0 
sharksub$blotch_change <- sharksub$blotch2 - sharksub$blotch1 # preparing the data to calculate change in blotching time (blotch_change)

# A paired t-test showing a comparison of blotch1 and blotch2 
t_test_result <- t.test(sharksub$blotch1, sharksub$blotch2, paired = TRUE)
print(t_test_result) # showing the results from the t-test

    Paired t-test

data:  sharksub$blotch1 and sharksub$blotch2
t = -17.39, df = 49, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 -1.037176 -0.822301
sample estimates:
mean difference 
     -0.9297384 
# showing summary statistics of both capture times and the 
summary_stats <- sharksub %>%
  summarize(
    mean_blotch1 = mean(blotch1, na.rm = TRUE),
    mean_blotch2 = mean(blotch2, na.rm = TRUE),
    mean_blotch_change = mean(blotch_change, na.rm = TRUE),
    sd_blotch_change = sd(blotch_change, na.rm = TRUE)
  )
print(summary_stats)
# A tibble: 1 × 4
  mean_blotch1 mean_blotch2 mean_blotch_change sd_blotch_change
         <dbl>        <dbl>              <dbl>            <dbl>
1         35.0         36.0              0.930            0.378
ggplot(sharksub, aes(x = factor(1), y = blotch1, fill = "Capture 1")) +
  geom_boxplot() +
  geom_boxplot(aes(x = factor(2), y = blotch2, fill = "Capture 2")) +
  scale_x_discrete(labels = c("Capture 1", "Capture 2")) +
  labs(title = "Blotching Times of First and Second Capture", x = "Capture", y = "Blotching time (seconds)") +
  theme_minimal()

ggplot(sharksub, aes(x = blotch1, y= blotch2)) +
  geom_point() +
  geom_smooth(method = lm, color = "blue") +
  labs(title = "First Capture vs Second capture",
       x = "Blotching Time (capture 1)",
       y = "Blotching Time(capture 2)") +
theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

model <- lm(blotch1 ~ blotch2, data = sharksub)
summary(model)

Call:
lm(formula = blotch1 ~ blotch2, data = sharksub)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.31595 -0.16130 -0.10971 -0.00299  1.27761 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  2.97912    1.59111   1.872   0.0673 .  
blotch2      0.89130    0.04422  20.154   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.36 on 48 degrees of freedom
Multiple R-squared:  0.8943,    Adjusted R-squared:  0.8921 
F-statistic: 406.2 on 1 and 48 DF,  p-value: < 2.2e-16