Shark Blotching

#Activation of relevant packages 

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(palmerpenguins)
library(ggpubr)
library(readr)

#Creation of scatter plot to show data distribution of both air and water temperature

sharks1 <- read_csv("/Volumes/University/sharks1.csv")
Rows: 500 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): ID, sex
dbl (8): blotch, BPM, weight, length, air, water, meta, depth

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(sharks1)

ggplot(sharks1, aes(x = air,
                    y = water)) +
  geom_point() +
  geom_smooth(method = "lm",
              se = FALSE) +
  labs(x = "Ambient Air Temperature°C",
       y = "Surface Water Temperature°C",
       title = "Air and Water Temperature Association")
`geom_smooth()` using formula = 'y ~ x'

# Spearman's rank correlation test command 

library(readr)
sharks1 <- read_csv("/Volumes/University/sharks1.csv")
Rows: 500 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): ID, sex
dbl (8): blotch, BPM, weight, length, air, water, meta, depth

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(sharks1)
cor.test( sharks1$air, sharks1$water, method = "spearman" )

    Spearman's rank correlation rho

data:  sharks1$air and sharks1$water
S = 22007692, p-value = 0.2082
alternative hypothesis: true rho is not equal to 0
sample estimates:
        rho 
-0.05637344 
# Paired T-test command 

library(readr)
sharksub2 <- read_csv("/Volumes/University/sharksub2.csv")
Rows: 50 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): ID, sex
dbl (2): blotch1, blotch2

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
t.test(sharksub2$blotch1, sharksub2$blotch2, paired = TRUE, alternative = "two.sided")

    Paired t-test

data:  sharksub2$blotch1 and sharksub2$blotch2
t = -17.39, df = 49, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 -1.037176 -0.822301
sample estimates:
mean difference 
     -0.9297384 
library(readr)
sharksub2 <- read_csv("/Volumes/University/sharksub2.csv")
Rows: 50 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): ID, sex
dbl (2): blotch1, blotch2

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Selecting only relevant data (blotch times) for ease of viewing

sharksub2 %>% select(blotch1, blotch2)
# A tibble: 50 × 2
   blotch1 blotch2
     <dbl>   <dbl>
 1    36.1    37.2
 2    33.4    34.4
 3    36.3    36.5
 4    35.0    36.0
 5    35.7    36.8
 6    34.9    35.9
 7    33.1    34.1
 8    32.5    33.5
 9    34.3    35.3
10    35.5    36.6
# ℹ 40 more rows
#Blotch 1 Mean Calculation

data <- c(36.07201, 33.38396, 36.29497, 34.98931, 35.70572, 34.90283, 33.11113, 32.49322, 34.27203, 35.54855, 35.99021, 34.88511, 36.83684, 34.79932, 34.41429, 34.74200, 34.36868, 34.27528, 36.11878, 33.98649, 34.45879, 34.93960, 35.81950, 34.47858, 33.26947, 32.85958, 34.65632, 36.48702, 34.93594, 36.42572, 36.50001, 36.37596, 35.17775, 34.57425, 36.68703, 35.28866, 35.92057, 34.73333, 33.60120, 35.70079, 35.02983, 35.63334, 34.24796, 35.34228, 36.42890, 33.53000, 37.07165, 34.29493, 35.33881, 34.52245)

mean <- mean(data)
cat("Mean:", mean, "\n")
Mean: 35.03042 
#Blotch 2 Mean Calculation

data <- c(37.15417, 34.38548, 36.46102, 36.03899, 36.77689, 35.94991, 34.10446, 33.46802, 35.30019, 36.61501, 37.06992, 35.93166, 37.94195, 35.84330, 35.44672, 34.39458, 35.39974, 35.30354, 37.20234, 35.00608, 35.49255, 35.98779, 36.89408, 35.51294, 34.26755, 33.84537, 35.69601, 37.58163, 35.98402, 37.51849, 37.59501, 37.46724, 36.23308, 35.61148, 37.78764, 36.34732, 35.52544, 35.77533, 34.60924, 36.77181, 36.08072, 36.70234, 35.27540, 36.40255, 37.52177, 34.53590, 38.18380, 35.32378, 35.60995, 34.07366)

mean <- mean (data)
cat("Mean:", mean, "\n")
Mean: 35.96016 
# Creation of scatter plot to show data distribution for both blotch times

ggplot(sharksub2, aes(x = blotch1,
                      y = blotch2)) +
  geom_point() +
  geom_smooth(method = "lm",
              se = FALSE) +
  labs(x = "Capture 1",
       Y = "Capture 2",
       title = "Time for Blotching to Occur on 30% of the Ventral Surface (Seconds)")
`geom_smooth()` using formula = 'y ~ x'

#Creation of boxplot to determine if sex impacts blotch time

ggplot(sharks1, aes(x = sex,
                    y = blotch,
                    colour = sex)) +
  geom_boxplot(show.legend = FALSE) +
  labs(x = "Shark Sex",
       y = "Blotch Time in Seconds",
       title = "Impact of Sex on Blotching Time") +
  scale_color_brewer(palette = "Dark2") +
  theme_minimal()

#Unpaired T-Test Command 

subset(sharks1, sex == "Male")
# A tibble: 264 × 10
   ID    sex   blotch   BPM weight length   air water  meta depth
   <chr> <chr>  <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
 1 SH004 Male    35.3   161  105.    171.  36.2  21.6  86.3  50.3
 2 SH006 Male    33.5   126  110.    270.  36.4  20.9 109.   46.8
 3 SH007 Male    36.7   166  101.    194.  33.1  21.8  99.7  49.1
 4 SH009 Male    35.4   132   95.0   269.  35.3  22.2  79.0  49.9
 5 SH013 Male    35.7   133  109.    244.  37.5  23.6 110.   50.3
 6 SH014 Male    39.8   121   90.9   193.  34.4  23.4  99.8  55.3
 7 SH016 Male    34.8   128   87.7   244.  33.9  24.8  85.7  50.7
 8 SH019 Male    33.5   154   97.4   168.  35.1  21.1 104.   50.9
 9 SH020 Male    35.1   125   93.3   291.  36.1  20.6  95.3  48.4
10 SH021 Male    35.8   136   67.2   269.  35.1  22.6  78.0  51.2
# ℹ 254 more rows
subset(sharks1, sex == "Female")
# A tibble: 236 × 10
   ID    sex    blotch   BPM weight length   air water  meta depth
   <chr> <chr>   <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
 1 SH001 Female   37.2   148   74.7   187.  37.7  23.4  64.1  53.2
 2 SH002 Female   34.5   158   73.4   189.  35.7  21.4  73.7  49.6
 3 SH003 Female   36.3   125   71.8   284.  34.8  20.1  54.4  49.4
 4 SH005 Female   37.4   138   67.1   264.  33.6  21.8 108.   49.0
 5 SH008 Female   36.3   135  101.    128.  36.8  21.3  96.3  50.6
 6 SH010 Female   36.0   127   71.3   163.  35.7  24.6  72.3  51.2
 7 SH011 Female   31.8   126   67.5   179.  35.7  24.5 109.   48.6
 8 SH012 Female   36.2   131   70.0   235.  33.7  20.2  62.0  50.6
 9 SH015 Female   37.0   166   67.7   253.  34.0  23.2 103.   53.5
10 SH017 Female   34.2   145  100.    181.  33.8  24.0 102.   50.8
# ℹ 226 more rows
t.test(sharks1$blotch ~ sharks1$sex)

    Welch Two Sample t-test

data:  sharks1$blotch by sharks1$sex
t = -3.0282, df = 494.67, p-value = 0.002589
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -0.6322714 -0.1346620
sample estimates:
mean in group Female   mean in group Male 
            34.92294             35.30641 
#Linear Model Command + Summary

model_lm <- lm(blotch ~ BPM + weight + length + air + water + meta + depth, data = sharks1)
summary(model_lm)

Call:
lm(formula = blotch ~ BPM + weight + length + air + water + meta + 
    depth, data = sharks1)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.83745 -0.66117 -0.00702  0.60110  2.74108 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) 11.1405851  1.8958668   5.876 7.74e-09 ***
BPM         -0.0019723  0.0031890  -0.618    0.537    
weight       0.0016283  0.0033511   0.486    0.627    
length       0.0012295  0.0009710   1.266    0.206    
air         -0.0281474  0.0318707  -0.883    0.378    
water       -0.0188934  0.0270782  -0.698    0.486    
meta        -0.0009712  0.0025951  -0.374    0.708    
depth        0.5061285  0.0223191  22.677  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.002 on 492 degrees of freedom
Multiple R-squared:  0.514, Adjusted R-squared:  0.507 
F-statistic: 74.32 on 7 and 492 DF,  p-value: < 2.2e-16
#Creation of Scatter Plot Displaying Linear Model Fitted to Data

ggplot(sharks1, aes(x = depth,
                    y = blotch)) +
  geom_point() +
  geom_smooth(method = "lm") +
  labs(x = "Depth of Capture (Metres)",
       y = "Blotching Time (Seconds)",
       title = "Linear Model Fitted to Data")
`geom_smooth()` using formula = 'y ~ x'

#Command for Linear Regression Model & Summary

model <- lm(blotch ~ depth, data = sharks1)
summary(model)

Call:
lm(formula = blotch ~ depth, data = sharks1)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.81869 -0.65427 -0.01035  0.58825  2.83116 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  9.82178    1.11207   8.832   <2e-16 ***
depth        0.50467    0.02216  22.772   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1 on 498 degrees of freedom
Multiple R-squared:  0.5101,    Adjusted R-squared:  0.5091 
F-statistic: 518.6 on 1 and 498 DF,  p-value: < 2.2e-16