Research Methods and Data Analysis Summative Assessment - Supplementary R Code

Long term physiological effects of catch and release practices – A study of the Caribbean reef shark, Biodiversity and Conservation, Tamara Walker, Nottingham Trent University, Brackenhurst Campus, Nottingham, UK, tamara.walker2024@my.ntu.ac.uk

Annotated r code provided to replicate the data and media produced in the Biodiversity Conservation summative assessment.

sharks <- read.csv("sharks.csv") #Loads the sharks dataset from the working directory
sharksub <- read.csv("sharksub.csv") #Loads the sharksub dataset from the working directory
library(tidyverse)
library(ggplot2)
library(ggthemes) 
library(knitr)
library(tibble)
# Loads the relevant libraries required to produce code
str(sharks) # Uses the structure function to check the number of observations is equal to the number of unique identifiers
'data.frame':   500 obs. of  10 variables:
 $ ID    : chr  "SH001" "SH002" "SH003" "SH004" ...
 $ sex   : chr  "Female" "Female" "Female" "Male" ...
 $ blotch: num  37.2 34.5 36.3 35.3 37.4 ...
 $ BPM   : int  148 158 125 161 138 126 166 135 132 127 ...
 $ weight: num  74.7 73.4 71.8 104.6 67.1 ...
 $ length: num  187 189 284 171 264 ...
 $ air   : num  37.7 35.7 34.8 36.2 33.6 ...
 $ water : num  23.4 21.4 20.1 21.6 21.8 ...
 $ meta  : num  64.1 73.7 54.4 86.3 108 ...
 $ depth : num  53.2 49.6 49.4 50.3 49 ...
str(sharksub) # Uses the structure function to check the number of observations is equal to the number of unique identifiers
'data.frame':   50 obs. of  4 variables:
 $ ID     : chr  "SH269" "SH163" "SH008" "SH239" ...
 $ sex    : chr  "Female" "Female" "Female" "Female" ...
 $ blotch1: num  36.1 33.4 36.3 35 35.7 ...
 $ blotch2: num  37.2 34.4 36.5 36 36.8 ...
colSums(is.na(sharks)) # Checks the sharks file for any missing data
    ID    sex blotch    BPM weight length    air  water   meta  depth 
     0      0      0      0      0      0      0      0      0      0 
colSums(is.na(sharksub)) # Checks the sharksub file for any missing data
     ID     sex blotch1 blotch2 
      0       0       0       0 
mod1 <- lm(water~air, data = sharks) # Creates a linear model of surface water temperature and ambient air temperature 
summary(mod1) # Provides a summary of residuals and coefficients

Call:
lm(formula = water ~ air, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-3.03472 -1.47563  0.09925  1.38700  3.06356 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 25.31781    1.86221  13.596   <2e-16 ***
air         -0.06465    0.05236  -1.235    0.218    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.67 on 498 degrees of freedom
Multiple R-squared:  0.003052,  Adjusted R-squared:  0.00105 
F-statistic: 1.524 on 1 and 498 DF,  p-value: 0.2176
ggplot(sharks, aes(x = air, y = water)) + # Produces a scatter plot of the variables air and water 
geom_point(colour = "blue", alpha = .3) + # Changes the points to the colour blue and size to 3
geom_smooth(method = "lm", se = FALSE) + # Adds a smooth regression line to the plot 
labs(x = "Ambient air temperature (C)", y = "Surface water temperature (C)") + # Amends the label names
(scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette
`geom_smooth()` using formula = 'y ~ x'

mod2 <- lm(blotch~air, data = sharks) # Creates a linear model of ambient air temperature and time taken to blotch
summary(mod2) # Provides a summary of residuals and coefficients

Call:
lm(formula = blotch ~ air, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.3900 -0.9459 -0.0506  0.9423  4.9134 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 36.46184    1.59219   22.90   <2e-16 ***
air         -0.03761    0.04477   -0.84    0.401    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.428 on 498 degrees of freedom
Multiple R-squared:  0.001415,  Adjusted R-squared:  -0.0005902 
F-statistic: 0.7057 on 1 and 498 DF,  p-value: 0.4013
ggplot(sharks, aes(x = air, y = blotch)) +# Produces a scatter plot of the variables air and time taken to blotch
geom_point(colour = "blue", alpha = .3) + # Changes the points to the colour blue and size to 3
geom_smooth(method = "lm", se = FALSE) + # Adds a smooth regression line to the plot 
labs(x = "Ambient air temp (C)", y = "Time taken to blotch (s)") + # Amends the label names
(scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette
`geom_smooth()` using formula = 'y ~ x'

mod3 <- lm(blotch~water, data = sharks) # Creates a linear model of water surface temperature and time taken to blotch. 
summary(mod3) # Provides a summary of residuals and coefficients

Call:
lm(formula = blotch ~ water, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.3051 -0.9629 -0.0495  0.9471  5.0216 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 36.14125    0.88240  40.958   <2e-16 ***
water       -0.04413    0.03823  -1.154    0.249    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.427 on 498 degrees of freedom
Multiple R-squared:  0.002668,  Adjusted R-squared:  0.0006654 
F-statistic: 1.332 on 1 and 498 DF,  p-value: 0.249
ggplot(sharks, aes(x = water, y = blotch)) + # Produces a scatter plot of the variables water and time taken to blotch
geom_point(colour = "blue", alpha = .3) + # Changes the points to the colour blue and size to 3
geom_smooth(method = "lm", se = FALSE) + # Adds a smooth regression line to the plot 
labs(x = "Surface water temperature (C)", y = "Time taken to blotch (s)") +
(scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette
`geom_smooth()` using formula = 'y ~ x'

mod6 <- lm(blotch~BPM, data = sharks) # Creates a linear model of heart rate and time taken to blotch. 
summary(mod6) # Provides a summary of residuals and coefficients

Call:
lm(formula = blotch ~ BPM, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.3282 -0.9492 -0.0876  0.8940  4.9559 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 35.544599   0.644079  55.187   <2e-16 ***
BPM         -0.002957   0.004521  -0.654    0.513    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.428 on 498 degrees of freedom
Multiple R-squared:  0.0008583, Adjusted R-squared:  -0.001148 
F-statistic: 0.4278 on 1 and 498 DF,  p-value: 0.5134
mod7 <- lm(blotch~weight, data = sharks) # Creates a linear model of weight and time taken to blotch. 
summary(mod7) # Provides a summary of residuals and coefficients

Call:
lm(formula = blotch ~ weight, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.3702 -0.9600 -0.0687  0.9328  4.9357 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 3.504e+01  4.227e-01  82.887   <2e-16 ***
weight      9.795e-04  4.752e-03   0.206    0.837    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.429 on 498 degrees of freedom
Multiple R-squared:  8.531e-05, Adjusted R-squared:  -0.001923 
F-statistic: 0.04249 on 1 and 498 DF,  p-value: 0.8368
mod8 <- lm(blotch~length, data = sharks) # Creates a linear model of length and time taken to blotch. 
summary(mod8) # Provides a summary of residuals and coefficients

Call:
lm(formula = blotch ~ length, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.3280 -0.9663 -0.0678  0.9117  4.9545 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) 35.2312817  0.2965354 118.810   <2e-16 ***
length      -0.0005017  0.0013721  -0.366    0.715    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.429 on 498 degrees of freedom
Multiple R-squared:  0.0002684, Adjusted R-squared:  -0.001739 
F-statistic: 0.1337 on 1 and 498 DF,  p-value: 0.7148
mod9 <- lm(blotch~meta, data = sharks) # Creates a linear model of cortisol level and time taken to blotch. 
summary(mod9) # Provides a summary of residuals and coefficients

Call:
lm(formula = blotch ~ meta, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.3635 -0.9639 -0.0793  0.9275  4.9411 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) 35.1892930  0.3075967 114.401   <2e-16 ***
meta        -0.0007787  0.0036674  -0.212    0.832    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.429 on 498 degrees of freedom
Multiple R-squared:  9.051e-05, Adjusted R-squared:  -0.001917 
F-statistic: 0.04508 on 1 and 498 DF,  p-value: 0.8319
mod10 <- lm(blotch~depth, data = sharks) # Creates a linear model of depth and time taken to blotch
summary(mod10) # Provides a summary of residuals and coefficients

Call:
lm(formula = blotch ~ depth, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.81869 -0.65427 -0.01035  0.58825  2.83116 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  9.82178    1.11207   8.832   <2e-16 ***
depth        0.50467    0.02216  22.772   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1 on 498 degrees of freedom
Multiple R-squared:  0.5101,    Adjusted R-squared:  0.5091 
F-statistic: 518.6 on 1 and 498 DF,  p-value: < 2.2e-16
ggplot(sharks, aes(x = depth, y = blotch)) + # Produces a scatter plot of the variables depth and time taken to blotch 
geom_point(colour = "blue", alpha = .3,) + # Changes the points to the colour blue and size to 3  
geom_smooth(method = "lm", se = FALSE) + # Adds a smooth regression line to the plot 
  labs(x = "Water depth of capture (m)", y = "Time taken to blotch (s)") + # Amends the label names
scale_fill_brewer(palette = "Dark2") # Changes to a colour blind friendly palette
`geom_smooth()` using formula = 'y ~ x'

new_depth <- data.frame(depth = c(40,42.5,55,58)) #Creates a dataframe tab
new_depth$predicted <- predict(mod10, new_depth) # Creates a new column in new_blotch called predicted and uses mod5 to calculate the value
kable(new_depth, col.names=c("Hypothetical depth of capture (m)","Predicted time to blotch (s)")) #Creates a table and changes the column headings
Hypothetical depth of capture (m) Predicted time to blotch (s)
40.0 30.00871
42.5 31.27039
55.0 37.57881
58.0 39.09283
sharks$tvar <- sharks$air - sharks$water # Creates a new field 'tvar' calculating the variation between ambient air temperature and surface water temperature.
mod4 <- lm(blotch~tvar, data = sharks) # Creates a linear model of the variance between air temperature and surface water temperature and time taken to blotch. 
summary(mod4) # Provides a summary of residuals and coefficients

Call:
lm(formula = blotch ~ tvar, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.3305 -0.9723 -0.0591  0.9231  4.9821 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 35.011079   0.360355  97.157   <2e-16 ***
tvar         0.009136   0.028338   0.322    0.747    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.429 on 498 degrees of freedom
Multiple R-squared:  0.0002086, Adjusted R-squared:  -0.001799 
F-statistic: 0.1039 on 1 and 498 DF,  p-value: 0.7473
ggplot(sharks, aes(x = tvar, y = blotch)) + # Produces a scatter plot of the variables temperature variance between air and water and time taken to blotch 
geom_point(colour = "blue", alpha = .3) + # Changes the points to the colour blue and size to 3
geom_smooth(method = "lm", se = FALSE) + # Adds a smooth regression line to the plot 
labs(x = "Temperature variance air/water (C)", y = "Time taken to blotch (s)") + # Amends the label names
(scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette
`geom_smooth()` using formula = 'y ~ x'

mod5 <- lm(blotch2~blotch1, data = sharksub) # Creates a linear model of of time taken to blotch on first capture and time taken to blotch on second capture 
summary(mod5) # Provides a summary of residuals and coefficients

Call:
lm(formula = blotch2 ~ blotch1, data = sharksub)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.37681  0.09513  0.11617  0.13911  0.17550 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.81111    1.74483   0.465    0.644    
blotch1      1.00339    0.04979  20.154   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.3819 on 48 degrees of freedom
Multiple R-squared:  0.8943,    Adjusted R-squared:  0.8921 
F-statistic: 406.2 on 1 and 48 DF,  p-value: < 2.2e-16
ggplot(sharksub, aes(x = blotch1, y = blotch2)) + # Produces a scatter plot of the variables blotch1 and blotch2
geom_point(colour = "blue", alpha = .3,) + # Changes the points to the colour blue and size to 3
geom_smooth(method = "lm", se = FALSE) + # Adds a smooth regression line to the plot 
labs(x = "First capture", y = "Second capture", title = "Time taken to blotch (seconds)" + # Amends the label names and adds a title
scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette
`geom_smooth()` using formula = 'y ~ x'

ggplot(sharksub, aes(x = blotch1, y = blotch2, colour = sex)) + # Produces a scatter plot of the variables blotch1 and blotch2, coloured by sex
geom_point(alpha = .3,) + # Changes the points to size to 3
geom_smooth(method = "lm", se = FALSE) + # Adds smooth regression lines to the plot 
labs(x = "First capture", y = "Second capture", title = "Time taken to blotch (seconds)" + # Amends the label names and adds a title
scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette
`geom_smooth()` using formula = 'y ~ x'

ggplot(sharks, aes(x = sex, y = blotch, colour = sex,))  + # Creates a plot of time taken to blotch by sex
  geom_boxplot() + geom_jitter(alpha = .3,) + # Creates a boxplot showing all residuals and point size 3
  labs(x = "Sex", y = "Time taken to blotch (s)" + # Changes the axis titles
  scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette

t.test(blotch ~ sex, data = sharks) # Produces a Welch Two Sample t-test for time time to blotch between males and females. Significant - on average males delayed reaction to stress compared to females 

    Welch Two Sample t-test

data:  blotch by sex
t = -3.0282, df = 494.67, p-value = 0.002589
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -0.6322714 -0.1346620
sample estimates:
mean in group Female   mean in group Male 
            34.92294             35.30641 
new_blotch <- data.frame(blotch1 = c(36.5,37.5,38.5,39.5)) #Creates a data frame called new_blotch, containing 1 column and user defined values
new_blotch$predicted <- predict(mod5, new_blotch) # Creates a new column in new_blotch called predicted and uses mod5 to calculate the value
kable(new_blotch, col.names=c("Hypothetical time to blotch at first capture (s)","Predicted time to blotch at second capture (s)")) # Creates a table and changes the column names
Hypothetical time to blotch at first capture (s) Predicted time to blotch at second capture (s)
36.5 37.43472
37.5 38.43810
38.5 39.44149
39.5 40.44487
mean(sharks$blotch)
[1] 35.12541
min(sharks$blotch)
[1] 30.77585
max(sharks$blotch)
[1] 40.08356
mean(sharks$air)
[1] 35.53526
min(sharks$air)
[1] 33.00454
max(sharks$air)
[1] 37.99978
mean(sharks$water)
[1] 23.02052
min(sharks$water)
[1] 20.00503
max(sharks$water)
[1] 25.98523
mean(sharksub$blotch2)
[1] 35.96016
min(sharksub$blotch2)
[1] 33.46802
max(sharksub$blotch2)
[1] 38.1838
mean(sharks$tvar)
[1] 12.51474
min(sharks$tvar)
[1] 7.503214
max(sharks$tvar)
[1] 17.8354
range(sharks$depth)
[1] 44.64474 56.82916