# loading the files dataset
library(readxl)
file_path <- "/Users/jamiegriffiths/Desktop/sharks.xlsx"
sharks <- read_excel(file_path)Shark Blotching Analysis
Quarto
Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.
# loading the necessary packages
options(repos = c(CRAN = "https://cran.rstudio.com/"))
install.packages("tidyverse")
The downloaded binary packages are in
/var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//Rtmpmp8nvR/downloaded_packages
install.packages("ggplot2")
The downloaded binary packages are in
/var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//Rtmpmp8nvR/downloaded_packages
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
install.packages(c("FactoMineR", "factoextra"))
The downloaded binary packages are in
/var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//Rtmpmp8nvR/downloaded_packages
library(FactoMineR)
library(factoextra)Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
install.packages("ggrepel")
The downloaded binary packages are in
/var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//Rtmpmp8nvR/downloaded_packages
library(ggrepel)
install.packages("plotly")
The downloaded binary packages are in
/var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//Rtmpmp8nvR/downloaded_packages
library(plotly)
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
install.packages("effsize")
The downloaded binary packages are in
/var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//Rtmpmp8nvR/downloaded_packages
library(effsize)
install.packages("interactions")
The downloaded binary packages are in
/var/folders/kz/l0hrdr_932ncsvj1j3lwnf9r0000gn/T//Rtmpmp8nvR/downloaded_packages
library(interactions)# Viewing the dataset
head(sharks) # displaying the first few rows of the dataset # A tibble: 6 × 10
ID sex blotch BPM weight length air water meta depth
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 SH001 Female 37.2 148 74.7 187. 37.7 23.4 64.1 53.2
2 SH002 Female 34.5 158 73.4 189. 35.7 21.4 73.7 49.6
3 SH003 Female 36.3 125 71.8 284. 34.8 20.1 54.4 49.4
4 SH004 Male 35.3 161 105. 171. 36.2 21.6 86.3 50.3
5 SH005 Female 37.4 138 67.1 264. 33.6 21.8 108. 49.0
6 SH006 Male 33.5 126 110. 270. 36.4 20.9 109. 46.8
tail(sharks) # displaying the last few rows of the dataset# A tibble: 6 × 10
ID sex blotch BPM weight length air water meta depth
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 SH495 Female 34.9 165 104. 149. 33.4 21.1 59.2 47.3
2 SH496 Male 34.6 150 109. 257. 34.9 23.7 60.4 48.5
3 SH497 Male 32.1 127 97.3 264. 33.4 24.9 54.1 45.4
4 SH498 Female 37.1 151 88.9 142. 35.9 22.0 82.4 53.1
5 SH499 Male 35.3 155 78.1 148. 33.3 22.1 99.2 52.1
6 SH500 Female 35.3 143 105. 159. 36.1 24.6 87.1 49.3
summary(sharks) # this provides values that are of potential importance such as min, max, mean and median ID sex blotch BPM
Length:500 Length:500 Min. :30.78 Min. :119.0
Class :character Class :character 1st Qu.:34.16 1st Qu.:129.0
Mode :character Mode :character Median :35.05 Median :142.0
Mean :35.13 Mean :141.8
3rd Qu.:36.05 3rd Qu.:153.2
Max. :40.08 Max. :166.0
weight length air water
Min. : 65.10 Min. :128.3 Min. :33.00 Min. :20.01
1st Qu.: 75.68 1st Qu.:172.0 1st Qu.:34.42 1st Qu.:21.55
Median : 87.82 Median :211.1 Median :35.43 Median :23.11
Mean : 87.94 Mean :211.0 Mean :35.54 Mean :23.02
3rd Qu.:100.40 3rd Qu.:251.8 3rd Qu.:36.71 3rd Qu.:24.37
Max. :110.94 Max. :291.0 Max. :38.00 Max. :25.99
meta depth
Min. : 50.03 Min. :44.64
1st Qu.: 67.39 1st Qu.:48.90
Median : 82.45 Median :50.14
Mean : 82.04 Mean :50.14
3rd Qu.: 95.97 3rd Qu.:51.35
Max. :112.45 Max. :56.83
str(sharks) # this shows how the dataset is structured tibble [500 × 10] (S3: tbl_df/tbl/data.frame)
$ ID : chr [1:500] "SH001" "SH002" "SH003" "SH004" ...
$ sex : chr [1:500] "Female" "Female" "Female" "Male" ...
$ blotch: num [1:500] 37.2 34.5 36.3 35.3 37.4 ...
$ BPM : num [1:500] 148 158 125 161 138 126 166 135 132 127 ...
$ weight: num [1:500] 74.7 73.4 71.8 104.6 67.1 ...
$ length: num [1:500] 187 189 284 171 264 ...
$ air : num [1:500] 37.7 35.7 34.8 36.2 33.6 ...
$ water : num [1:500] 23.4 21.4 20.1 21.6 21.8 ...
$ meta : num [1:500] 64.1 73.7 54.4 86.3 108 ...
$ depth : num [1:500] 53.2 49.6 49.4 50.3 49 ...
colSums(is.na(sharks)) # this identifies any potential missing values ID sex blotch BPM weight length air water meta depth
0 0 0 0 0 0 0 0 0 0
Analysing the data set and identifying if blotching time can be predicted
# Data Analysis of Blotching Time and Each Variable
# A scatter plot of water aginst blotching time this was repeated many times with the x-axis variable changing
ggplot(sharks, aes(x = water, y = blotch, colour = sex)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Blotching Time vs. Water Temperature", x = "Water (°C)", y = "Blotch (seconds)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
# assessing any correlation of sex against water, this was also repeated many times with the water variable changing
ggplot(sharks, aes(x = sex, y = water, fill = sex)) +
geom_boxplot() +
labs(title = "Water Temperature by sex", x = "Sex", y = "Water temperature (°C)")ggplot(sharks, aes(x = meta, y = blotch, color = sex)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Blotching Time vs Cortisol Levels", x = "Meta (mcg/dl)", y = "Blotch (seconds)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
ggplot(sharks, aes(x = air, y = blotch, color = sex)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Blotching Time vs Air Temperature", y = "Blotch (seconds)", x = "Air (°C)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
ggplot(sharks, aes(x = sex, y = blotch, fill = sex)) +
geom_boxplot() +
labs(title = "Blotching Time by Sex", x = "Sex", y = "Blotch (seconds)") +
theme_minimal()ggplot(sharks, aes(x = weight, y = blotch, color = sex)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Blotching Time vs Weight", x = "Weight (Kg)", y = "Blotch (seconds)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
ggplot(sharks, aes(x = sex, y = weight, fill = sex)) +
geom_boxplot() +
labs(title = "Weight by Sex", x = "Sex", y = "Weight (Kg)") +
theme_minimal()# predicitng blotching time
ggplot(sharks, aes(x = depth, y = blotch, colour = sex)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Depth vs Blotching time", x = "Depth (Metres)", y = "Blotch (seconds)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
ggplot(sharks, aes(x = sex, y = depth, fill = sex)) +
geom_boxplot() +
labs(title = "Depth by Sex", x = "Sex", y = "Depth (Metres)") +
theme_minimal() ggplot(sharks, aes(x = BPM, y = blotch, color = sex)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Beats Per Minute (BPM) vs Blotching Time", x = "BPM", y = "Blotch (Seconds)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
ggplot(sharks, aes(x = length, y = blotch, color = sex)) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Length of Shark vs Blotching Time", x = "Length (cm)", y = "Blotch (seconds)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
# A linear regression model to identify how each variable impacts blotching time
blotching.lm <- lm(blotch ~ length + BPM + meta + weight + depth + water + air + sex, data = sharks)
summary(blotching.lm)
Call:
lm(formula = blotch ~ length + BPM + meta + weight + depth +
water + air + sex, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.97715 -0.66193 -0.00841 0.64123 2.90395
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 11.1179728 1.8749828 5.930 5.73e-09 ***
length 0.0013042 0.0009606 1.358 0.17517
BPM -0.0020791 0.0031540 -0.659 0.51009
meta -0.0011610 0.0025671 -0.452 0.65127
weight 0.0017281 0.0033143 0.521 0.60231
depth 0.5034077 0.0220870 22.792 < 2e-16 ***
water -0.0143878 0.0268112 -0.537 0.59176
air -0.0310068 0.0315302 -0.983 0.32590
sexMale 0.3088617 0.0890602 3.468 0.00057 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.9912 on 491 degrees of freedom
Multiple R-squared: 0.5256, Adjusted R-squared: 0.5178
F-statistic: 67.99 on 8 and 491 DF, p-value: < 2.2e-16
# A linear regression model assessing the interaction of depth and length on blotch, this was conducted multiple times with different interaction variables.
blotching.interaction1 <- lm(blotch ~ depth * length, data = sharks)
summary(blotching.interaction1)
Call:
lm(formula = blotch ~ depth * length, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.8201 -0.6498 -0.0215 0.6367 2.7734
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 11.3192910 5.1583732 2.194 0.0287 *
depth 0.4692827 0.1025259 4.577 5.96e-06 ***
length -0.0076817 0.0237878 -0.323 0.7469
depth:length 0.0001795 0.0004734 0.379 0.7047
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1 on 496 degrees of freedom
Multiple R-squared: 0.5121, Adjusted R-squared: 0.5092
F-statistic: 173.6 on 3 and 496 DF, p-value: < 2.2e-16
blotching.interation2 <- lm(blotch ~ depth * water, data = sharks)
summary(blotching.interation2)
Call:
lm(formula = blotch ~ depth * water, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.84048 -0.64050 0.01152 0.60871 2.80332
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 12.936735 15.454846 0.837 0.403
depth 0.451254 0.308615 1.462 0.144
water -0.134408 0.672499 -0.200 0.842
depth:water 0.002303 0.013432 0.171 0.864
Residual standard error: 1.002 on 496 degrees of freedom
Multiple R-squared: 0.5107, Adjusted R-squared: 0.5077
F-statistic: 172.5 on 3 and 496 DF, p-value: < 2.2e-16
blotching.interaction3 <- lm(blotch ~ depth * meta, data = sharks)
summary(blotching.interaction3)
Call:
lm(formula = blotch ~ depth * meta, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.79278 -0.66090 0.00394 0.59395 2.79845
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.068423 5.503900 -0.194 0.8462
depth 0.723949 0.109707 6.599 1.07e-10 ***
meta 0.132362 0.065555 2.019 0.0440 *
depth:meta -0.002665 0.001307 -2.040 0.0419 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.9977 on 496 degrees of freedom
Multiple R-squared: 0.5144, Adjusted R-squared: 0.5115
F-statistic: 175.2 on 3 and 496 DF, p-value: < 2.2e-16
blotching.interaction4 <- lm(blotch ~ depth * air, data = sharks)
summary(blotching.interaction4)
Call:
lm(formula = blotch ~ depth * air, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.86509 -0.66483 0.00611 0.60406 2.84002
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -52.10931 27.84438 -1.871 0.06187 .
depth 1.76230 0.55568 3.171 0.00161 **
air 1.75187 0.78678 2.227 0.02642 *
depth:air -0.03557 0.01570 -2.265 0.02392 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.9961 on 496 degrees of freedom
Multiple R-squared: 0.516, Adjusted R-squared: 0.513
F-statistic: 176.2 on 3 and 496 DF, p-value: < 2.2e-16
blotching.interaction5 <- lm(blotch ~ depth * weight, data = sharks)
summary(blotching.interaction5)
Call:
lm(formula = blotch ~ depth * weight, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.84521 -0.65358 0.00054 0.63162 2.80736
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 19.458715 7.580224 2.567 0.0105 *
depth 0.310076 0.150965 2.054 0.0405 *
weight -0.108956 0.084754 -1.286 0.1992
depth:weight 0.002200 0.001688 1.304 0.1930
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1 on 496 degrees of freedom
Multiple R-squared: 0.512, Adjusted R-squared: 0.509
F-statistic: 173.4 on 3 and 496 DF, p-value: < 2.2e-16
blotching.interaction6 <- lm(blotch ~ depth * BPM, data = sharks)
summary(blotching.interaction6)
Call:
lm(formula = blotch ~ depth * BPM, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.77396 -0.64941 0.00761 0.60376 2.82741
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.899456 11.555496 0.251 0.80198
depth 0.648798 0.230571 2.814 0.00509 **
BPM 0.048914 0.081163 0.603 0.54701
depth:BPM -0.001018 0.001620 -0.629 0.52979
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.001 on 496 degrees of freedom
Multiple R-squared: 0.5109, Adjusted R-squared: 0.508
F-statistic: 172.7 on 3 and 496 DF, p-value: < 2.2e-16
blotching.interaction7 <- lm(blotch ~ depth * sex, data = sharks)
summary(blotching.interaction7)
Call:
lm(formula = blotch ~ depth * sex, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.9614 -0.6388 -0.0179 0.5981 2.9815
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 10.14057 1.58486 6.398 3.64e-10 ***
depth 0.49510 0.03164 15.650 < 2e-16 ***
sexMale -0.33657 2.20540 -0.153 0.879
depth:sexMale 0.01277 0.04396 0.291 0.771
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.9904 on 496 degrees of freedom
Multiple R-squared: 0.5215, Adjusted R-squared: 0.5186
F-statistic: 180.2 on 3 and 496 DF, p-value: < 2.2e-16
# a linear regress model to closer view depth as an effect on blotch
depth_model <- lm(blotch ~ depth, data = sharks)
summary(depth_model)
Call:
lm(formula = blotch ~ depth, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.81869 -0.65427 -0.01035 0.58825 2.83116
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 9.82178 1.11207 8.832 <2e-16 ***
depth 0.50467 0.02216 22.772 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1 on 498 degrees of freedom
Multiple R-squared: 0.5101, Adjusted R-squared: 0.5091
F-statistic: 518.6 on 1 and 498 DF, p-value: < 2.2e-16
# calculation of confidence intervals for the model coefficients
confint(depth_model) 2.5 % 97.5 %
(Intercept) 7.6368581 12.0067017
depth 0.4611309 0.5482156
# an Anova test on the model to identify significance of depth
anova(depth_model)Analysis of Variance Table
Response: blotch
Df Sum Sq Mean Sq F value Pr(>F)
depth 1 518.70 518.7 518.57 < 2.2e-16 ***
Residuals 498 498.12 1.0
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# closer viewing the effect of sex on blotch
sex_model <- lm(blotch ~ sex, data = sharks)
summary(sex_model)
Call:
lm(formula = blotch ~ sex, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-4.1471 -0.9653 -0.0222 0.9889 4.7772
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 34.92294 0.09217 378.885 < 2e-16 ***
sexMale 0.38347 0.12685 3.023 0.00263 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.416 on 498 degrees of freedom
Multiple R-squared: 0.01802, Adjusted R-squared: 0.01605
F-statistic: 9.139 on 1 and 498 DF, p-value: 0.002632
# an anova test to identify significance of sex
anova(sex_model)Analysis of Variance Table
Response: blotch
Df Sum Sq Mean Sq F value Pr(>F)
sex 1 18.32 18.323 9.1387 0.002632 **
Residuals 498 998.50 2.005
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# assessing if there was an interaction between depth, air, and meta on blotch, this was done multiple times with the variables changing
blotching.multi.interaction <- lm(blotch ~ depth * air * meta, data = sharks)
summary(blotching.multi.interaction)
Call:
lm(formula = blotch ~ depth * air * meta, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.83374 -0.66661 0.00818 0.61764 2.80251
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -2.643e+02 1.449e+02 -1.824 0.0687 .
depth 6.017e+00 2.889e+00 2.082 0.0378 *
air 7.476e+00 4.112e+00 1.818 0.0697 .
meta 2.558e+00 1.696e+00 1.508 0.1322
depth:air -1.503e-01 8.199e-02 -1.833 0.0674 .
depth:meta -5.126e-02 3.380e-02 -1.517 0.1300
air:meta -6.895e-02 4.811e-02 -1.433 0.1524
depth:air:meta 1.381e-03 9.584e-04 1.441 0.1501
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.9939 on 492 degrees of freedom
Multiple R-squared: 0.522, Adjusted R-squared: 0.5152
F-statistic: 76.76 on 7 and 492 DF, p-value: < 2.2e-16
# creating a pca graph to identify what was the biggest contributer to blotch
pca_data <- sharks[, !(names(sharks) %in% c("ID", "sex"))]
pca <- PCA(pca_data, scale.unit = TRUE, quanti.sup = which(names(pca_data)== "blotch"), graph = FALSE)
blotch_coords <- pca$quanti.sup$coord[1, 1:2]
pca_plot <- fviz_pca_var(pca, col.var = "contrib", repel = TRUE, labelsize = 5) +
geom_point(data = data.frame(x = blotch_coords[1],
y = blotch_coords[2]),
aes(x = x, y = y), color = "red", size = 3) +
geom_text_repel(data = data.frame(x = blotch_coords[1], y = blotch_coords[2]),
aes(x = x, y = y, label = "blotch"), color = "red", size = 5) +
ggtitle("Variable Contributions to Blotching Time") +
theme_minimal()
print(pca_plot)blotching.multi.interaction1 <- lm(blotch ~ depth * air + depth * meta, data = sharks)
summary(blotching.multi.interaction1)
Call:
lm(formula = blotch ~ depth * air + depth * meta, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.84274 -0.66770 0.02015 0.61320 2.82711
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -60.121062 28.066377 -2.142 0.032673 *
depth 1.924737 0.560299 3.435 0.000642 ***
air 1.681381 0.787225 2.136 0.033184 *
meta 0.127681 0.065506 1.949 0.051842 .
depth:air -0.034186 0.015706 -2.177 0.029980 *
depth:meta -0.002570 0.001305 -1.969 0.049485 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.994 on 494 degrees of freedom
Multiple R-squared: 0.52, Adjusted R-squared: 0.5151
F-statistic: 107 on 5 and 494 DF, p-value: < 2.2e-16
blotching.multi.interaction2 <- lm(blotch ~ depth * sex + air * sex, data = sharks)
summary(blotching.multi.interaction2)
Call:
lm(formula = blotch ~ depth * sex + air * sex, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.93824 -0.65805 -0.01876 0.60904 2.82219
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 13.06581 2.21240 5.906 6.54e-09 ***
depth 0.49819 0.03162 15.753 < 2e-16 ***
sexMale -3.80126 3.13609 -1.212 0.2261
air -0.08678 0.04587 -1.892 0.0591 .
depth:sexMale 0.01041 0.04397 0.237 0.8129
sexMale:air 0.10092 0.06237 1.618 0.1063
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.9888 on 494 degrees of freedom
Multiple R-squared: 0.525, Adjusted R-squared: 0.5202
F-statistic: 109.2 on 5 and 494 DF, p-value: < 2.2e-16
# a scatter plot to assess the interaction of depth and sex on blotching time
ggplot(sharks, aes(x = depth, y = blotch, color = sex)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
scale_color_manual(values = c("Female" = "lightblue", "Male" = "red")) +
labs(title = "Interaction Between Depth and Sex on Blotching Time",
x = "Depth (meters)",
y = "Blotching Time (seconds)",
color = "Sex") +
theme_minimal() +
facet_wrap(~sex)`geom_smooth()` using formula = 'y ~ x'
# a scatter plot with sex data points overlayed to assess how the interaction of sex and depth differs in each sex
ggplot(sharks, aes(x = depth, y = blotch, color = sex, group = sex)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x * meta, se = FALSE) +
scale_color_manual(values = c("Female" = "lightblue", "Male" = "red")) +
labs(title = "Interaction Between Depth and sex on Blotching Time",
x = "Depth (meters)",
y = "Blotching Time (seconds)",
color = "sex") +
theme_minimal()Warning: Failed to fit group 1.
Caused by error:
! object 'meta' not found
Warning: Failed to fit group 2.
Caused by error:
! object 'meta' not found
# comparing the AIC values of different models
AIC(blotching.interaction3, blotching.interaction4, blotching.interaction7, blotching.multi.interaction, blotching.multi.interaction1) df AIC
blotching.interaction3 5 1422.642
blotching.interaction4 5 1421.045
blotching.interaction7 5 1415.313
blotching.multi.interaction 9 1422.772
blotching.multi.interaction1 7 1420.912
# comparing the BIC values from different models
BIC(blotching.interaction3, blotching.interaction4, blotching.interaction7, blotching.multi.interaction, blotching.multi.interaction1) df BIC
blotching.interaction3 5 1443.715
blotching.interaction4 5 1442.118
blotching.interaction7 5 1436.386
blotching.multi.interaction 9 1460.703
blotching.multi.interaction1 7 1450.414
# creating and then predicting blotch values for males with confidence intervals
data.predict2 <- data.frame(depth = c (45, 50, 55), sex = c("Male"))
predict(blotching.interaction7, data.predict2, interval = "confidence") fit lwr upr
1 32.65853 32.32377 32.99330
2 35.19793 35.07748 35.31838
3 37.73732 37.42634 38.04830
# creating and predicting blotch values for females with confidence intervals
data.predictF <- data.frame(depth = c (45, 50, 55), sex = c("Female"))
predict(blotching.interaction7, data.predictF, interval = "confidence") fit lwr upr
1 32.42029 32.08152 32.75906
2 34.89581 34.76910 35.02253
3 37.37134 37.03888 37.70380
# A 3D scatter plot to better visually assess the interaction of sex and depth on blotching time
plot_ly(sharks,
x = ~depth,
y = ~sex,
z = ~blotch,
type = "scatter3d",
mode = "markers",
marker = list(
size = 3,
color = ~blotch,
colorscale = "viridis",
colorbar = list(title = "Blotch Time")
)) %>%
layout(scene = list(
xaxis = list(title = "Depth"),
yaxis = list(title = "Sex"),
zaxis = list(title = "Blotch"),
camera = list(
eye = list(x = 1.5, y = 1.5, z = 0.7)
)
))data.predict3 <- data.frame(
depth = c(45, 50, 55),
sex = c("Male", "Female", "Male"))
predict(blotching.interaction7, data.predict3, interval = "confidence") fit lwr upr
1 32.65853 32.32377 32.99330
2 34.89581 34.76910 35.02253
3 37.73732 37.42634 38.04830
sharks$predicted_blotch <- predict(depth_model, newdata = sharks) # creates a new column in the dataset of predicted blotching times
head(sharks) # view the first 6 rows of the data to look at the new row created # A tibble: 6 × 11
ID sex blotch BPM weight length air water meta depth
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 SH001 Female 37.2 148 74.7 187. 37.7 23.4 64.1 53.2
2 SH002 Female 34.5 158 73.4 189. 35.7 21.4 73.7 49.6
3 SH003 Female 36.3 125 71.8 284. 34.8 20.1 54.4 49.4
4 SH004 Male 35.3 161 105. 171. 36.2 21.6 86.3 50.3
5 SH005 Female 37.4 138 67.1 264. 33.6 21.8 108. 49.0
6 SH006 Male 33.5 126 110. 270. 36.4 20.9 109. 46.8
# ℹ 1 more variable: predicted_blotch <dbl>
# creating a scatter graph of the observed times against the newly generated predicted values
ggplot(sharks, aes(x = depth, y = blotch)) +
geom_point(color = "blue", alpha = 0.6, size = 2) +
geom_line(aes(y = predicted_blotch), color = "red", size = 1) +
labs(title = "Observed vs Predicted Blotching Time",
x = "Depth (meters)",
y = "Blotching Time (seconds)") +
theme_minimal()Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
# conducting a cohne's d test for the differences between sexes
cohen.d(sharks$blotch ~ sharks$sex)
Cohen's d
d estimate: -0.2708124 (small)
95 percent confidence interval:
lower upper
-0.44762256 -0.09400216
# fitting a polynomial model with depth and sex
model_poly <- lm(blotch ~ poly(depth, 2) + sex, data = sharks)
summary(model_poly)
Call:
lm(formula = blotch ~ poly(depth, 2) + sex, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.93971 -0.65116 -0.01413 0.60168 3.00451
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 34.96448 0.06447 542.309 < 2e-16 ***
poly(depth, 2)1 22.64130 0.99076 22.852 < 2e-16 ***
poly(depth, 2)2 0.72019 0.99011 0.727 0.467335
sexMale 0.30478 0.08877 3.434 0.000646 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.99 on 496 degrees of freedom
Multiple R-squared: 0.5219, Adjusted R-squared: 0.519
F-statistic: 180.5 on 3 and 496 DF, p-value: < 2.2e-16
# fitting a model for males and females seperately
model_male <- lm(blotch ~ depth, data = subset(sharks, sex == "Male"))
model_female <- lm(blotch ~ depth, data = subset(sharks, sex == "Female"))
summary(model_male)
Call:
lm(formula = blotch ~ depth, data = subset(sharks, sex == "Male"))
Residuals:
Min 1Q Median 3Q Max
-2.96144 -0.69614 -0.04989 0.58563 2.61361
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 9.80400 1.57398 6.229 1.86e-09 ***
depth 0.50788 0.03132 16.215 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.016 on 262 degrees of freedom
Multiple R-squared: 0.5009, Adjusted R-squared: 0.499
F-statistic: 262.9 on 1 and 262 DF, p-value: < 2.2e-16
summary(model_female)
Call:
lm(formula = blotch ~ depth, data = subset(sharks, sex == "Female"))
Residuals:
Min 1Q Median 3Q Max
-2.51461 -0.57180 0.02172 0.60537 2.98152
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 10.14057 1.53683 6.598 2.75e-10 ***
depth 0.49510 0.03068 16.139 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.9604 on 234 degrees of freedom
Multiple R-squared: 0.5268, Adjusted R-squared: 0.5247
F-statistic: 260.5 on 1 and 234 DF, p-value: < 2.2e-16
# running a interaction tes with depth and sex
interaction_test <- lm(blotch ~ depth * sex, data = sharks)
summary(interaction_test)
Call:
lm(formula = blotch ~ depth * sex, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.9614 -0.6388 -0.0179 0.5981 2.9815
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 10.14057 1.58486 6.398 3.64e-10 ***
depth 0.49510 0.03164 15.650 < 2e-16 ***
sexMale -0.33657 2.20540 -0.153 0.879
depth:sexMale 0.01277 0.04396 0.291 0.771
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.9904 on 496 degrees of freedom
Multiple R-squared: 0.5215, Adjusted R-squared: 0.5186
F-statistic: 180.2 on 3 and 496 DF, p-value: < 2.2e-16
# checking the residuals for the interaction of sex and depth
par(mfrow = c(2, 2))
plot(blotching.interaction7)# checking the residuals for both the male and female model
par(mfrow = c(2, 2))
plot(model_male)plot(model_female)Assessing if there is a correlation between the variables air and water?
# a scatter graph to assess the relationship between air and water temperature
ggplot(sharks, aes(x = air, y = water)) +
geom_point(size = 2, color = "red") +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Air Temperature vs Water Temperature", x = "Air Temperature (°C)", y = "Water Temperature (°C)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
# assessing if the relationship changes between sexes
ggplot(sharks, aes(x = air, y = water)) +
geom_point(size = 2, color = "red") +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(title = "Air Temperature vs Water Temperature by Sex",
x = "Air Temperature (°C)",
y = "Water Temperature (°C)") +
theme_minimal() +
facet_wrap(~sex)`geom_smooth()` using formula = 'y ~ x'
# a correlation test between air and water
correlation_test <- cor.test(sharks$air, sharks$water, method = "pearson")
print(correlation_test)
Pearson's product-moment correlation
data: sharks$air and sharks$water
t = -1.2346, df = 498, p-value = 0.2176
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.14224207 0.03260803
sample estimates:
cor
-0.05524051
# running a linear model to predict water from air
model <- lm(water ~ air, data = sharks)
summary(model)
Call:
lm(formula = water ~ air, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-3.03472 -1.47563 0.09925 1.38700 3.06356
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 25.31781 1.86221 13.596 <2e-16 ***
air -0.06465 0.05236 -1.235 0.218
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.67 on 498 degrees of freedom
Multiple R-squared: 0.003052, Adjusted R-squared: 0.00105
F-statistic: 1.524 on 1 and 498 DF, p-value: 0.2176
investigating if multiple capture can have an effect on blotching time?
# loading the necessary dataset for this research question
file_path <- "/Users/jamiegriffiths/Desktop/sharksub.xlsx"
sharksub <- read_excel(file_path)# a quick view and analysis of the data
head(sharksub) # this displays the first few rows of the dataset # A tibble: 6 × 4
ID sex blotch1 blotch2
<chr> <chr> <dbl> <dbl>
1 SH269 Female 36.1 37.2
2 SH163 Female 33.4 34.4
3 SH008 Female 36.3 36.5
4 SH239 Female 35.0 36.0
5 SH332 Female 35.7 36.8
6 SH328 Female 34.9 35.9
tail(sharksub) # this displays the last few rows of the dataset # A tibble: 6 × 4
ID sex blotch1 blotch2
<chr> <chr> <dbl> <dbl>
1 SH260 Male 36.4 37.5
2 SH357 Male 33.5 34.5
3 SH199 Male 37.1 38.2
4 SH413 Male 34.3 35.3
5 SH004 Male 35.3 35.6
6 SH307 Male 34.5 34.1
summary(sharksub) # this provides values that have potential importance such as min, max, mean and median ID sex blotch1 blotch2
Length:50 Length:50 Min. :32.49 Min. :33.47
Class :character Class :character 1st Qu.:34.38 1st Qu.:35.31
Mode :character Mode :character Median :34.94 Median :35.94
Mean :35.03 Mean :35.96
3rd Qu.:35.90 3rd Qu.:36.78
Max. :37.07 Max. :38.18
str(sharksub) # this shows how the dataset is structured tibble [50 × 4] (S3: tbl_df/tbl/data.frame)
$ ID : chr [1:50] "SH269" "SH163" "SH008" "SH239" ...
$ sex : chr [1:50] "Female" "Female" "Female" "Female" ...
$ blotch1: num [1:50] 36.1 33.4 36.3 35 35.7 ...
$ blotch2: num [1:50] 37.2 34.4 36.5 36 36.8 ...
colSums(is.na(sharksub)) # this identifies any potential missing values ID sex blotch1 blotch2
0 0 0 0
sharksub$blotch_change <- sharksub$blotch2 - sharksub$blotch1 # preparing the data to calculate change in blotching time (blotch_change)
# A paired t-test showing a comparison of blotch1 and blotch2
t_test_result <- t.test(sharksub$blotch1, sharksub$blotch2, paired = TRUE)
print(t_test_result) # showing the results from the t-test
Paired t-test
data: sharksub$blotch1 and sharksub$blotch2
t = -17.39, df = 49, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
-1.037176 -0.822301
sample estimates:
mean difference
-0.9297384
# showing summary statistics of both capture times and the
summary_stats <- sharksub %>%
summarize(
mean_blotch1 = mean(blotch1, na.rm = TRUE),
mean_blotch2 = mean(blotch2, na.rm = TRUE),
mean_blotch_change = mean(blotch_change, na.rm = TRUE),
sd_blotch_change = sd(blotch_change, na.rm = TRUE)
)
print(summary_stats)# A tibble: 1 × 4
mean_blotch1 mean_blotch2 mean_blotch_change sd_blotch_change
<dbl> <dbl> <dbl> <dbl>
1 35.0 36.0 0.930 0.378
# creating a box plot to visually assess the differences of blotching time between captures
ggplot(sharksub, aes(x = factor(1), y = blotch1, fill = "Capture 1")) +
geom_boxplot() +
geom_boxplot(aes(x = factor(2), y = blotch2, fill = "Capture 2")) +
scale_x_discrete(labels = c("Capture 1", "Capture 2")) +
labs(title = "Blotching Times of First and Second Capture", x = "Capture", y = "Blotching time (seconds)") +
theme_minimal()# creating a scatter plot with a linear regression line blotch1 vs blotch2
ggplot(sharksub, aes(x = blotch1, y= blotch2)) +
geom_point() +
geom_smooth(method = lm, color = "blue") +
labs(title = "First Capture vs Second capture",
x = "Blotching Time (capture 1)",
y = "Blotching Time(capture 2)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
# creating a linear model predicting blotch1 from blotch2
model <- lm(blotch1 ~ blotch2, data = sharksub)
summary(model)
Call:
lm(formula = blotch1 ~ blotch2, data = sharksub)
Residuals:
Min 1Q Median 3Q Max
-0.31595 -0.16130 -0.10971 -0.00299 1.27761
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 2.97912 1.59111 1.872 0.0673 .
blotch2 0.89130 0.04422 20.154 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.36 on 48 degrees of freedom
Multiple R-squared: 0.8943, Adjusted R-squared: 0.8921
F-statistic: 406.2 on 1 and 48 DF, p-value: < 2.2e-16