RMDA Summative Assignment

Author

Emily Sankey

Blotching time in sharks

Is there a correlation between the variables air and water?

Code

# Set CRAN mirror
options(repos = c(CRAN = "https://cran.rstudio.com/"))

# Install and load necessary packages
install.packages("tidyverse")

Installing package into 'C:/Users/sanke/AppData/Local/R/win-library/4.2'
(as 'lib' is unspecified)

package 'tidyverse' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\sanke\AppData\Local\Temp\RtmpiqP3bM\downloaded_packages

Code

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Code

library(dplyr)
install.packages("readxl")

Installing package into 'C:/Users/sanke/AppData/Local/R/win-library/4.2'
(as 'lib' is unspecified)

package 'readxl' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\sanke\AppData\Local\Temp\RtmpiqP3bM\downloaded_packages

Code

library(readxl)

sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")

# Summarise and inspect the dataset
sharks %>% summary()

      ID                sex                blotch           BPM       
 Length:500         Length:500         Min.   :30.78   Min.   :119.0  
 Class :character   Class :character   1st Qu.:34.16   1st Qu.:129.0  
 Mode  :character   Mode  :character   Median :35.05   Median :142.0  
                                       Mean   :35.13   Mean   :141.8  
                                       3rd Qu.:36.05   3rd Qu.:153.2  
                                       Max.   :40.08   Max.   :166.0  
     weight           length           air            water      
 Min.   : 65.10   Min.   :128.3   Min.   :33.00   Min.   :20.01  
 1st Qu.: 75.68   1st Qu.:172.0   1st Qu.:34.42   1st Qu.:21.55  
 Median : 87.82   Median :211.1   Median :35.43   Median :23.11  
 Mean   : 87.94   Mean   :211.0   Mean   :35.54   Mean   :23.02  
 3rd Qu.:100.40   3rd Qu.:251.8   3rd Qu.:36.71   3rd Qu.:24.37  
 Max.   :110.94   Max.   :291.0   Max.   :38.00   Max.   :25.99  
      meta            depth      
 Min.   : 50.03   Min.   :44.64  
 1st Qu.: 67.39   1st Qu.:48.90  
 Median : 82.45   Median :50.14  
 Mean   : 82.04   Mean   :50.14  
 3rd Qu.: 95.97   3rd Qu.:51.35  
 Max.   :112.45   Max.   :56.83

Code

# Check the first few rows of the dataset
head(sharks)

# A tibble: 6 × 10
  ID    sex    blotch   BPM weight length   air water  meta depth
  <chr> <chr>   <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 SH001 Female   37.2   148   74.7   187.  37.7  23.4  64.1  53.2
2 SH002 Female   34.5   158   73.4   189.  35.7  21.4  73.7  49.6
3 SH003 Female   36.3   125   71.8   284.  34.8  20.1  54.4  49.4
4 SH004 Male     35.3   161  105.    171.  36.2  21.6  86.3  50.3
5 SH005 Female   37.4   138   67.1   264.  33.6  21.8 108.   49.0
6 SH006 Male     33.5   126  110.    270.  36.4  20.9 109.   46.8

Code

# Check the structure of the dataset
str(sharks)

tibble [500 × 10] (S3: tbl_df/tbl/data.frame)
 $ ID    : chr [1:500] "SH001" "SH002" "SH003" "SH004" ...
 $ sex   : chr [1:500] "Female" "Female" "Female" "Male" ...
 $ blotch: num [1:500] 37.2 34.5 36.3 35.3 37.4 ...
 $ BPM   : num [1:500] 148 158 125 161 138 126 166 135 132 127 ...
 $ weight: num [1:500] 74.7 73.4 71.8 104.6 67.1 ...
 $ length: num [1:500] 187 189 284 171 264 ...
 $ air   : num [1:500] 37.7 35.7 34.8 36.2 33.6 ...
 $ water : num [1:500] 23.4 21.4 20.1 21.6 21.8 ...
 $ meta  : num [1:500] 64.1 73.7 54.4 86.3 108 ...
 $ depth : num [1:500] 53.2 49.6 49.4 50.3 49 ...

Code

class(sharks)

[1] "tbl_df"     "tbl"        "data.frame"

Code

names(sharks)

 [1] "ID"     "sex"    "blotch" "BPM"    "weight" "length" "air"    "water" 
 [9] "meta"   "depth"

Code

# Inspect the first few values of 'air' and 'water'
head(sharks$air)

[1] 37.73957 35.68413 34.79854 36.15973 33.61477 36.38343

Code

class(sharks$air)

[1] "numeric"

Code

class(sharks$water)

[1] "numeric"

Code

head(sharks$water)

[1] 23.37377 21.42088 20.05114 21.64319 21.76143 20.85200

Code

# Check column names to ensure they are correctly referenced
colnames(sharks)

 [1] "ID"     "sex"    "blotch" "BPM"    "weight" "length" "air"    "water" 
 [9] "meta"   "depth"

Code

# Load ggplot2 library
library(ggplot2)

# Create a customised scatter graph with theme
ggplot(sharks, aes(x = air, y = water)) +
  geom_point(color = "lightgreen", size = 3, shape = 16, alpha = 0.6) +
  geom_smooth(method = "lm", se = FALSE, color = "red", linetype = "dashed") +
  labs(
    title = "Relationship between air and water temperatures", 
    x = "Air temperature (°C)", 
    y = "Water temperature (°C)")

`geom_smooth()` using formula = 'y ~ x'

Code

# Calculate correlation between air and water
cor.test(sharks$air, sharks$water)


    Pearson's product-moment correlation

data:  sharks$air and sharks$water
t = -1.2346, df = 498, p-value = 0.2176
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.14224207  0.03260803
sample estimates:
        cor 
-0.05524051

These results indicate that there is no statistically significant difference between ambient air temperature and surface water temperature.

Does multiple capture have an effect on blotching time?

Code

sharksub <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharksub.xlsx")

sharksub %>% summary()

      ID                sex               blotch1         blotch2     
 Length:50          Length:50          Min.   :32.49   Min.   :33.47  
 Class :character   Class :character   1st Qu.:34.38   1st Qu.:35.31  
 Mode  :character   Mode  :character   Median :34.94   Median :35.94  
                                       Mean   :35.03   Mean   :35.96  
                                       3rd Qu.:35.90   3rd Qu.:36.78  
                                       Max.   :37.07   Max.   :38.18

Code

head(sharksub)

# A tibble: 6 × 4
  ID    sex    blotch1 blotch2
  <chr> <chr>    <dbl>   <dbl>
1 SH269 Female    36.1    37.2
2 SH163 Female    33.4    34.4
3 SH008 Female    36.3    36.5
4 SH239 Female    35.0    36.0
5 SH332 Female    35.7    36.8
6 SH328 Female    34.9    35.9

Code

str(sharksub)

tibble [50 × 4] (S3: tbl_df/tbl/data.frame)
 $ ID     : chr [1:50] "SH269" "SH163" "SH008" "SH239" ...
 $ sex    : chr [1:50] "Female" "Female" "Female" "Female" ...
 $ blotch1: num [1:50] 36.1 33.4 36.3 35 35.7 ...
 $ blotch2: num [1:50] 37.2 34.4 36.5 36 36.8 ...

Code

class(sharksub)

[1] "tbl_df"     "tbl"        "data.frame"

Code

colnames(sharksub)

[1] "ID"      "sex"     "blotch1" "blotch2"

Code

names(sharksub)

[1] "ID"      "sex"     "blotch1" "blotch2"

Code

library(ggplot2)
library(tidyr)  # For gathering the data into long format

# Convert data from wide to long format for ggplot2 compatibility
sharksub_long <- sharksub %>%
  gather(key = "blotch_type", value = "time", blotch1, blotch2)

# Boxplot of blotch1 vs blotch2
ggplot(sharksub_long, aes(x = blotch_type, y = time, fill = blotch_type)) +
  geom_boxplot() +
  labs(title = "Comparison of blotching times",
       x = "Blotches",
       y = "Time (seconds)") +
  theme_minimal() +
  scale_fill_manual(values = c("lightblue", "lightgreen"))

Code

# Perform independent t-test between blotch1 and blotch2
t.test(sharksub$blotch1, sharksub$blotch2)


    Welch Two Sample t-test

data:  sharksub$blotch1 and sharksub$blotch2
t = -4.1143, df = 97.658, p-value = 8.113e-05
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.3782038 -0.4812731
sample estimates:
mean of x mean of y 
 35.03042  35.96016

These results indicate that multiple capture has a statistically significant effect on blotching time in that blotching is increased when the sharks were captured more than once.

Is it possible to predict blotching time?

Blotch vs. Depth

Code

sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")

# Perform linear regression
model <- lm(depth ~ blotch, data = sharks)

# Output the regression summary
summary(model)


Call:
lm(formula = depth ~ blotch, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.3570 -0.9453 -0.0124  0.9863  4.7997 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 14.63435    1.56040   9.379   <2e-16 ***
blotch       1.01079    0.04439  22.772   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.415 on 498 degrees of freedom
Multiple R-squared:  0.5101,    Adjusted R-squared:  0.5091 
F-statistic: 518.6 on 1 and 498 DF,  p-value: < 2.2e-16

Code

# Plotting the data and regression line
ggplot(sharks, aes(x = blotch, y = depth)) +
  geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  # Scatter plot with color and transparency
  geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  # Regression line with confidence interval
  labs(title = "Relationship between blotching and depth",
       x = "Blotch (seconds)",
       y = "Depth (metres)") +
  theme_minimal() +  # Minimal theme for cleaner look
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
    panel.grid.minor = element_blank()
  ) +
  theme(legend.position = "none")  # Remove legend (not needed for this plot)

`geom_smooth()` using formula = 'y ~ x'

Code

# Compute the Pearson correlation coefficient between 'blotch' and 'depth'
cor.test(sharks$blotch, sharks$depth)


    Pearson's product-moment correlation

data:  sharks$blotch and sharks$depth
t = 22.772, df = 498, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.6683963 0.7546509
sample estimates:
      cor 
0.7142247

Blotch vs. Weight

Code

sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")

model <- lm(weight ~ blotch, data = sharks)
summary(model)


Call:
lm(formula = weight ~ blotch, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-22.9687 -12.2943  -0.1632  12.3893  22.9622 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  84.8823    14.8545   5.714  1.9e-08 ***
blotch        0.0871     0.4225   0.206    0.837    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 13.47 on 498 degrees of freedom
Multiple R-squared:  8.531e-05, Adjusted R-squared:  -0.001923 
F-statistic: 0.04249 on 1 and 498 DF,  p-value: 0.8368

Code

ggplot(sharks, aes(x = blotch, y = weight)) +
  geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  
  geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  
  labs(title = "Relationship between blotching and weight",
       x = "Blotch (seconds)",
       y = "Weight (Kg") +
  theme_minimal() +  
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
    panel.grid.minor = element_blank()
  ) +
  theme(legend.position = "none")

`geom_smooth()` using formula = 'y ~ x'

Code

# Compute the Pearson correlation coefficient between 'blotch' and 'weight'
cor.test(sharks$blotch, sharks$weight)


    Pearson's product-moment correlation

data:  sharks$blotch and sharks$weight
t = 0.20613, df = 498, p-value = 0.8368
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.07851766  0.09684867
sample estimates:
        cor 
0.009236525

Blotch vs. Air Temperature

Code

sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")

model <- lm(air ~ blotch, data = sharks)
summary(model)


Call:
lm(formula = air ~ blotch, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.54412 -1.10096 -0.09279  1.17905  2.51344 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 36.85684    1.57452   23.41   <2e-16 ***
blotch      -0.03762    0.04479   -0.84    0.401    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.428 on 498 degrees of freedom
Multiple R-squared:  0.001415,  Adjusted R-squared:  -0.0005902 
F-statistic: 0.7057 on 1 and 498 DF,  p-value: 0.4013

Code

ggplot(sharks, aes(x = blotch, y = air)) +
  geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  
  geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  
  labs(title = "Relationship between blotching and ambient air temperature",
       x = "Blotch (seconds)",
       y = "Air temperature (Celcius)") +
  theme_minimal() +  # Minimal theme for cleaner look
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
    panel.grid.minor = element_blank()
  ) +
  theme(legend.position = "none")

`geom_smooth()` using formula = 'y ~ x'

Code

# Compute the Pearson correlation coefficient between 'blotch' and 'air'
cor.test(sharks$blotch, sharks$air)


    Pearson's product-moment correlation

data:  sharks$blotch and sharks$air
t = -0.84005, df = 498, p-value = 0.4013
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.12489535  0.05023956
sample estimates:
        cor 
-0.03761675

Blotch vs. BPM

Code

sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")

model <- lm(BPM ~ blotch, data = sharks)
summary(model)


Call:
lm(formula = BPM ~ blotch, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-23.029 -13.030   0.441  11.674  24.796 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 151.9574    15.6007   9.740   <2e-16 ***
blotch       -0.2903     0.4438  -0.654    0.513    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 14.15 on 498 degrees of freedom
Multiple R-squared:  0.0008583, Adjusted R-squared:  -0.001148 
F-statistic: 0.4278 on 1 and 498 DF,  p-value: 0.5134

Code

ggplot(sharks, aes(x = blotch, y = BPM)) +
  geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  
  geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  
  labs(title = "Relationship between blotching and BPM",
       x = "Blotch (seconds)",
       y = "BPM (Beats per minute)") +
  theme_minimal() +  # Minimal theme for cleaner look
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
    panel.grid.minor = element_blank()
  ) +
  theme(legend.position = "none")

`geom_smooth()` using formula = 'y ~ x'

Code

# Compute the Pearson correlation coefficient between 'blotch' and 'BPM'
cor.test(sharks$blotch, sharks$BPM)


    Pearson's product-moment correlation

data:  sharks$blotch and sharks$BPM
t = -0.65406, df = 498, p-value = 0.5134
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.11668743  0.05854438
sample estimates:
        cor 
-0.02929661

Blotch vs. Body Length

Code

sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")

model <- lm(length ~ blotch, data = sharks)
summary(model)


Call:
lm(formula = length ~ blotch, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-83.303 -38.860  -0.192  40.659  80.596 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 229.8342    51.4343   4.468 9.76e-06 ***
blotch       -0.5349     1.4631  -0.366    0.715    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 46.65 on 498 degrees of freedom
Multiple R-squared:  0.0002684, Adjusted R-squared:  -0.001739 
F-statistic: 0.1337 on 1 and 498 DF,  p-value: 0.7148

Code

ggplot(sharks, aes(x = blotch, y = length)) +
  geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  
  geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  
  labs(title = "Relationship between blotching and total body length",
       x = "Blotch (seconds)",
       y = "length (cm)") +
  theme_minimal() +  
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
    panel.grid.minor = element_blank()
  ) +
  theme(legend.position = "none")

`geom_smooth()` using formula = 'y ~ x'

Code

# Compute the Pearson correlation coefficient between 'blotch' and 'length'
cor.test(sharks$blotch, sharks$length)


    Pearson's product-moment correlation

data:  sharks$blotch and sharks$length
t = -0.36562, df = 498, p-value = 0.7148
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.1039230  0.0714115
sample estimates:
        cor 
-0.01638167

Blotch vs. Surface Water Temperature

Code

sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")

model <- lm(water ~ blotch, data = sharks)
summary(model)


Call:
lm(formula = water ~ blotch, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-3.07227 -1.43903  0.07593  1.34741  3.04345 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 25.14433    1.84152  13.654   <2e-16 ***
blotch      -0.06046    0.05238  -1.154    0.249    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.67 on 498 degrees of freedom
Multiple R-squared:  0.002668,  Adjusted R-squared:  0.0006654 
F-statistic: 1.332 on 1 and 498 DF,  p-value: 0.249

Code

ggplot(sharks, aes(x = blotch, y = water)) +
  geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  
  geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  
  labs(title = "Relationship between blotching and surface water temperature",
       x = "Blotch (seconds)",
       y = "Surface water temperature (Celcius)") +
  theme_minimal() +  
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
    panel.grid.minor = element_blank()
  ) +
  theme(legend.position = "none")

`geom_smooth()` using formula = 'y ~ x'

Code

# Compute the Pearson correlation coefficient between 'blotch' and 'surface water temperature'
cor.test(sharks$blotch, sharks$water)


    Pearson's product-moment correlation

data:  sharks$blotch and sharks$water
t = -1.1542, df = 498, p-value = 0.249
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.13871605  0.03620077
sample estimates:
        cor 
-0.05165379

Blotch vs. Cortisol levels

Code

sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")

model <- lm(meta ~ blotch, data = sharks)
summary(model)


Call:
lm(formula = meta ~ blotch, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-31.856 -14.556   0.426  13.857  30.687 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  86.1257    19.2463   4.475 9.48e-06 ***
blotch       -0.1162     0.5475  -0.212    0.832    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 17.46 on 498 degrees of freedom
Multiple R-squared:  9.051e-05, Adjusted R-squared:  -0.001917 
F-statistic: 0.04508 on 1 and 498 DF,  p-value: 0.8319

Code

ggplot(sharks, aes(x = blotch, y = meta)) +
  geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  
  geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  
  labs(title = "Relationship between blotching and cortisol levels",
       x = "Blotch (seconds)",
       y = "Cortisol levels (mcg/dL)") +
  theme_minimal() +  
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12),
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
    panel.grid.minor = element_blank()
  ) +
  theme(legend.position = "none")

`geom_smooth()` using formula = 'y ~ x'

Code

# Compute the Pearson correlation coefficient between 'blotch' and 'cortisol levels'
cor.test(sharks$blotch, sharks$meta)


    Pearson's product-moment correlation

data:  sharks$blotch and sharks$meta
t = -0.21232, df = 498, p-value = 0.8319
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.09712341  0.07824201
sample estimates:
         cor 
-0.009513855

Blotch vs. Sex

Code

# Boxplot to compare blotching between male and female sharks
boxplot(sharks$blotch ~ sharks$sex, data = sharks,
        main = "Comparison of blotching in sharks by sex",
        xlab = "Sex",
        ylab = "Blotching (seconds)",
        col = c("lightpink", "lightblue"))

Code

# T-test to compare blotching between sexes
t_test_result <- t.test(sharks$blotch ~ sharks$sex, sharks = df)
print(t_test_result)


    Welch Two Sample t-test

data:  sharks$blotch by sharks$sex
t = -3.0282, df = 494.67, p-value = 0.002589
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -0.6322714 -0.1346620
sample estimates:
mean in group Female   mean in group Male 
            34.92294             35.30641

These results indicate that there is statistically significant difference between blotching times in male and female sharks.