<- read.csv("sharks.csv") #Loads the sharks dataset from the working directory
sharks <- read.csv("sharksub.csv") #Loads the sharksub dataset from the working directory sharksub
Research Methods and Data Analysis Summative Assessment - Supplementary R Code
Long term physiological effects of catch and release practices – A study of the Caribbean reef shark, Biodiversity and Conservation, Tamara Walker, Nottingham Trent University, Brackenhurst Campus, Nottingham, UK, tamara.walker2024@my.ntu.ac.uk
Annotated r code provided to replicate the data and media produced in the Biodiversity Conservation summative assessment.
library(tidyverse)
library(ggplot2)
library(ggthemes)
library(knitr)
library(tibble)
# Loads the relevant libraries required to produce code
str(sharks) # Uses the structure function to check the number of observations is equal to the number of unique identifiers
'data.frame': 500 obs. of 10 variables:
$ ID : chr "SH001" "SH002" "SH003" "SH004" ...
$ sex : chr "Female" "Female" "Female" "Male" ...
$ blotch: num 37.2 34.5 36.3 35.3 37.4 ...
$ BPM : int 148 158 125 161 138 126 166 135 132 127 ...
$ weight: num 74.7 73.4 71.8 104.6 67.1 ...
$ length: num 187 189 284 171 264 ...
$ air : num 37.7 35.7 34.8 36.2 33.6 ...
$ water : num 23.4 21.4 20.1 21.6 21.8 ...
$ meta : num 64.1 73.7 54.4 86.3 108 ...
$ depth : num 53.2 49.6 49.4 50.3 49 ...
str(sharksub) # Uses the structure function to check the number of observations is equal to the number of unique identifiers
'data.frame': 50 obs. of 4 variables:
$ ID : chr "SH269" "SH163" "SH008" "SH239" ...
$ sex : chr "Female" "Female" "Female" "Female" ...
$ blotch1: num 36.1 33.4 36.3 35 35.7 ...
$ blotch2: num 37.2 34.4 36.5 36 36.8 ...
colSums(is.na(sharks)) # Checks the sharks file for any missing data
ID sex blotch BPM weight length air water meta depth
0 0 0 0 0 0 0 0 0 0
colSums(is.na(sharksub)) # Checks the sharksub file for any missing data
ID sex blotch1 blotch2
0 0 0 0
<- lm(water~air, data = sharks) # Creates a linear model of surface water temperature and ambient air temperature
mod1 summary(mod1) # Provides a summary of residuals and coefficients
Call:
lm(formula = water ~ air, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-3.03472 -1.47563 0.09925 1.38700 3.06356
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 25.31781 1.86221 13.596 <2e-16 ***
air -0.06465 0.05236 -1.235 0.218
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.67 on 498 degrees of freedom
Multiple R-squared: 0.003052, Adjusted R-squared: 0.00105
F-statistic: 1.524 on 1 and 498 DF, p-value: 0.2176
ggplot(sharks, aes(x = air, y = water)) + # Produces a scatter plot of the variables air and water
geom_point(colour = "blue", alpha = .3) + # Changes the points to the colour blue and size to 3
geom_smooth(method = "lm", se = FALSE) + # Adds a smooth regression line to the plot
labs(x = "Ambient air temperature (C)", y = "Surface water temperature (C)") + # Amends the label names
scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette (
`geom_smooth()` using formula = 'y ~ x'
<- lm(blotch~air, data = sharks) # Creates a linear model of ambient air temperature and time taken to blotch
mod2 summary(mod2) # Provides a summary of residuals and coefficients
Call:
lm(formula = blotch ~ air, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-4.3900 -0.9459 -0.0506 0.9423 4.9134
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 36.46184 1.59219 22.90 <2e-16 ***
air -0.03761 0.04477 -0.84 0.401
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.428 on 498 degrees of freedom
Multiple R-squared: 0.001415, Adjusted R-squared: -0.0005902
F-statistic: 0.7057 on 1 and 498 DF, p-value: 0.4013
ggplot(sharks, aes(x = air, y = blotch)) +# Produces a scatter plot of the variables air and time taken to blotch
geom_point(colour = "blue", alpha = .3) + # Changes the points to the colour blue and size to 3
geom_smooth(method = "lm", se = FALSE) + # Adds a smooth regression line to the plot
labs(x = "Ambient air temp (C)", y = "Time taken to blotch (s)") + # Amends the label names
scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette (
`geom_smooth()` using formula = 'y ~ x'
<- lm(blotch~water, data = sharks) # Creates a linear model of water surface temperature and time taken to blotch.
mod3 summary(mod3) # Provides a summary of residuals and coefficients
Call:
lm(formula = blotch ~ water, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-4.3051 -0.9629 -0.0495 0.9471 5.0216
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 36.14125 0.88240 40.958 <2e-16 ***
water -0.04413 0.03823 -1.154 0.249
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.427 on 498 degrees of freedom
Multiple R-squared: 0.002668, Adjusted R-squared: 0.0006654
F-statistic: 1.332 on 1 and 498 DF, p-value: 0.249
ggplot(sharks, aes(x = water, y = blotch)) + # Produces a scatter plot of the variables water and time taken to blotch
geom_point(colour = "blue", alpha = .3) + # Changes the points to the colour blue and size to 3
geom_smooth(method = "lm", se = FALSE) + # Adds a smooth regression line to the plot
labs(x = "Surface water temperature (C)", y = "Time taken to blotch (s)") +
scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette (
`geom_smooth()` using formula = 'y ~ x'
<- lm(blotch~BPM, data = sharks) # Creates a linear model of heart rate and time taken to blotch.
mod6 summary(mod6) # Provides a summary of residuals and coefficients
Call:
lm(formula = blotch ~ BPM, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-4.3282 -0.9492 -0.0876 0.8940 4.9559
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 35.544599 0.644079 55.187 <2e-16 ***
BPM -0.002957 0.004521 -0.654 0.513
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.428 on 498 degrees of freedom
Multiple R-squared: 0.0008583, Adjusted R-squared: -0.001148
F-statistic: 0.4278 on 1 and 498 DF, p-value: 0.5134
<- lm(blotch~weight, data = sharks) # Creates a linear model of weight and time taken to blotch.
mod7 summary(mod7) # Provides a summary of residuals and coefficients
Call:
lm(formula = blotch ~ weight, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-4.3702 -0.9600 -0.0687 0.9328 4.9357
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.504e+01 4.227e-01 82.887 <2e-16 ***
weight 9.795e-04 4.752e-03 0.206 0.837
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.429 on 498 degrees of freedom
Multiple R-squared: 8.531e-05, Adjusted R-squared: -0.001923
F-statistic: 0.04249 on 1 and 498 DF, p-value: 0.8368
<- lm(blotch~length, data = sharks) # Creates a linear model of length and time taken to blotch.
mod8 summary(mod8) # Provides a summary of residuals and coefficients
Call:
lm(formula = blotch ~ length, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-4.3280 -0.9663 -0.0678 0.9117 4.9545
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 35.2312817 0.2965354 118.810 <2e-16 ***
length -0.0005017 0.0013721 -0.366 0.715
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.429 on 498 degrees of freedom
Multiple R-squared: 0.0002684, Adjusted R-squared: -0.001739
F-statistic: 0.1337 on 1 and 498 DF, p-value: 0.7148
<- lm(blotch~meta, data = sharks) # Creates a linear model of cortisol level and time taken to blotch.
mod9 summary(mod9) # Provides a summary of residuals and coefficients
Call:
lm(formula = blotch ~ meta, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-4.3635 -0.9639 -0.0793 0.9275 4.9411
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 35.1892930 0.3075967 114.401 <2e-16 ***
meta -0.0007787 0.0036674 -0.212 0.832
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.429 on 498 degrees of freedom
Multiple R-squared: 9.051e-05, Adjusted R-squared: -0.001917
F-statistic: 0.04508 on 1 and 498 DF, p-value: 0.8319
<- lm(blotch~depth, data = sharks) # Creates a linear model of depth and time taken to blotch
mod10 summary(mod10) # Provides a summary of residuals and coefficients
Call:
lm(formula = blotch ~ depth, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.81869 -0.65427 -0.01035 0.58825 2.83116
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 9.82178 1.11207 8.832 <2e-16 ***
depth 0.50467 0.02216 22.772 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1 on 498 degrees of freedom
Multiple R-squared: 0.5101, Adjusted R-squared: 0.5091
F-statistic: 518.6 on 1 and 498 DF, p-value: < 2.2e-16
ggplot(sharks, aes(x = depth, y = blotch)) + # Produces a scatter plot of the variables depth and time taken to blotch
geom_point(colour = "blue", alpha = .3,) + # Changes the points to the colour blue and size to 3
geom_smooth(method = "lm", se = FALSE) + # Adds a smooth regression line to the plot
labs(x = "Water depth of capture (m)", y = "Time taken to blotch (s)") + # Amends the label names
scale_fill_brewer(palette = "Dark2") # Changes to a colour blind friendly palette
`geom_smooth()` using formula = 'y ~ x'
<- data.frame(depth = c(40,42.5,55,58)) #Creates a dataframe tab
new_depth $predicted <- predict(mod10, new_depth) # Creates a new column in new_blotch called predicted and uses mod5 to calculate the value
new_depthkable(new_depth, col.names=c("Hypothetical depth of capture (m)","Predicted time to blotch (s)")) #Creates a table and changes the column headings
Hypothetical depth of capture (m) | Predicted time to blotch (s) |
---|---|
40.0 | 30.00871 |
42.5 | 31.27039 |
55.0 | 37.57881 |
58.0 | 39.09283 |
$tvar <- sharks$air - sharks$water # Creates a new field 'tvar' calculating the variation between ambient air temperature and surface water temperature. sharks
<- lm(blotch~tvar, data = sharks) # Creates a linear model of the variance between air temperature and surface water temperature and time taken to blotch.
mod4 summary(mod4) # Provides a summary of residuals and coefficients
Call:
lm(formula = blotch ~ tvar, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-4.3305 -0.9723 -0.0591 0.9231 4.9821
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 35.011079 0.360355 97.157 <2e-16 ***
tvar 0.009136 0.028338 0.322 0.747
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.429 on 498 degrees of freedom
Multiple R-squared: 0.0002086, Adjusted R-squared: -0.001799
F-statistic: 0.1039 on 1 and 498 DF, p-value: 0.7473
ggplot(sharks, aes(x = tvar, y = blotch)) + # Produces a scatter plot of the variables temperature variance between air and water and time taken to blotch
geom_point(colour = "blue", alpha = .3) + # Changes the points to the colour blue and size to 3
geom_smooth(method = "lm", se = FALSE) + # Adds a smooth regression line to the plot
labs(x = "Temperature variance air/water (C)", y = "Time taken to blotch (s)") + # Amends the label names
scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette (
`geom_smooth()` using formula = 'y ~ x'
<- lm(blotch2~blotch1, data = sharksub) # Creates a linear model of of time taken to blotch on first capture and time taken to blotch on second capture
mod5 summary(mod5) # Provides a summary of residuals and coefficients
Call:
lm(formula = blotch2 ~ blotch1, data = sharksub)
Residuals:
Min 1Q Median 3Q Max
-1.37681 0.09513 0.11617 0.13911 0.17550
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.81111 1.74483 0.465 0.644
blotch1 1.00339 0.04979 20.154 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.3819 on 48 degrees of freedom
Multiple R-squared: 0.8943, Adjusted R-squared: 0.8921
F-statistic: 406.2 on 1 and 48 DF, p-value: < 2.2e-16
ggplot(sharksub, aes(x = blotch1, y = blotch2)) + # Produces a scatter plot of the variables blotch1 and blotch2
geom_point(colour = "blue", alpha = .3,) + # Changes the points to the colour blue and size to 3
geom_smooth(method = "lm", se = FALSE) + # Adds a smooth regression line to the plot
labs(x = "First capture", y = "Second capture", title = "Time taken to blotch (seconds)" + # Amends the label names and adds a title
scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette
`geom_smooth()` using formula = 'y ~ x'
ggplot(sharksub, aes(x = blotch1, y = blotch2, colour = sex)) + # Produces a scatter plot of the variables blotch1 and blotch2, coloured by sex
geom_point(alpha = .3,) + # Changes the points to size to 3
geom_smooth(method = "lm", se = FALSE) + # Adds smooth regression lines to the plot
labs(x = "First capture", y = "Second capture", title = "Time taken to blotch (seconds)" + # Amends the label names and adds a title
scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette
`geom_smooth()` using formula = 'y ~ x'
ggplot(sharks, aes(x = sex, y = blotch, colour = sex,)) + # Creates a plot of time taken to blotch by sex
geom_boxplot() + geom_jitter(alpha = .3,) + # Creates a boxplot showing all residuals and point size 3
labs(x = "Sex", y = "Time taken to blotch (s)" + # Changes the axis titles
scale_fill_brewer(palette = "Dark2")) # Changes to a colour blind friendly palette
t.test(blotch ~ sex, data = sharks) # Produces a Welch Two Sample t-test for time time to blotch between males and females. Significant - on average males delayed reaction to stress compared to females
Welch Two Sample t-test
data: blotch by sex
t = -3.0282, df = 494.67, p-value = 0.002589
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
-0.6322714 -0.1346620
sample estimates:
mean in group Female mean in group Male
34.92294 35.30641
<- data.frame(blotch1 = c(36.5,37.5,38.5,39.5)) #Creates a data frame called new_blotch, containing 1 column and user defined values
new_blotch $predicted <- predict(mod5, new_blotch) # Creates a new column in new_blotch called predicted and uses mod5 to calculate the value
new_blotchkable(new_blotch, col.names=c("Hypothetical time to blotch at first capture (s)","Predicted time to blotch at second capture (s)")) # Creates a table and changes the column names
Hypothetical time to blotch at first capture (s) | Predicted time to blotch at second capture (s) |
---|---|
36.5 | 37.43472 |
37.5 | 38.43810 |
38.5 | 39.44149 |
39.5 | 40.44487 |
mean(sharks$blotch)
[1] 35.12541
min(sharks$blotch)
[1] 30.77585
max(sharks$blotch)
[1] 40.08356
mean(sharks$air)
[1] 35.53526
min(sharks$air)
[1] 33.00454
max(sharks$air)
[1] 37.99978
mean(sharks$water)
[1] 23.02052
min(sharks$water)
[1] 20.00503
max(sharks$water)
[1] 25.98523
mean(sharksub$blotch2)
[1] 35.96016
min(sharksub$blotch2)
[1] 33.46802
max(sharksub$blotch2)
[1] 38.1838
mean(sharks$tvar)
[1] 12.51474
min(sharks$tvar)
[1] 7.503214
max(sharks$tvar)
[1] 17.8354
range(sharks$depth)
[1] 44.64474 56.82916