RMDA Summative Assignment

Author

Emily Sankey

Blotching time in sharks

  1. Is there a correlation between the variables air and water?

    Code
    # Set CRAN mirror
    options(repos = c(CRAN = "https://cran.rstudio.com/"))
    
    # Install and load necessary packages
    install.packages("tidyverse")
    Installing package into 'C:/Users/sanke/AppData/Local/R/win-library/4.2'
    (as 'lib' is unspecified)
    package 'tidyverse' successfully unpacked and MD5 sums checked
    
    The downloaded binary packages are in
        C:\Users\sanke\AppData\Local\Temp\RtmpiqP3bM\downloaded_packages
    Code
    library(tidyverse)
    ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
    ✔ dplyr     1.1.4     ✔ readr     2.1.5
    ✔ forcats   1.0.0     ✔ stringr   1.5.1
    ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
    ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
    ✔ purrr     1.0.2     
    ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
    ✖ dplyr::filter() masks stats::filter()
    ✖ dplyr::lag()    masks stats::lag()
    ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
    Code
    library(dplyr)
    install.packages("readxl")
    Installing package into 'C:/Users/sanke/AppData/Local/R/win-library/4.2'
    (as 'lib' is unspecified)
    package 'readxl' successfully unpacked and MD5 sums checked
    
    The downloaded binary packages are in
        C:\Users\sanke\AppData\Local\Temp\RtmpiqP3bM\downloaded_packages
    Code
    library(readxl)
    
    sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")
    
    # Summarise and inspect the dataset
    sharks %>% summary()
          ID                sex                blotch           BPM       
     Length:500         Length:500         Min.   :30.78   Min.   :119.0  
     Class :character   Class :character   1st Qu.:34.16   1st Qu.:129.0  
     Mode  :character   Mode  :character   Median :35.05   Median :142.0  
                                           Mean   :35.13   Mean   :141.8  
                                           3rd Qu.:36.05   3rd Qu.:153.2  
                                           Max.   :40.08   Max.   :166.0  
         weight           length           air            water      
     Min.   : 65.10   Min.   :128.3   Min.   :33.00   Min.   :20.01  
     1st Qu.: 75.68   1st Qu.:172.0   1st Qu.:34.42   1st Qu.:21.55  
     Median : 87.82   Median :211.1   Median :35.43   Median :23.11  
     Mean   : 87.94   Mean   :211.0   Mean   :35.54   Mean   :23.02  
     3rd Qu.:100.40   3rd Qu.:251.8   3rd Qu.:36.71   3rd Qu.:24.37  
     Max.   :110.94   Max.   :291.0   Max.   :38.00   Max.   :25.99  
          meta            depth      
     Min.   : 50.03   Min.   :44.64  
     1st Qu.: 67.39   1st Qu.:48.90  
     Median : 82.45   Median :50.14  
     Mean   : 82.04   Mean   :50.14  
     3rd Qu.: 95.97   3rd Qu.:51.35  
     Max.   :112.45   Max.   :56.83  
    Code
    # Check the first few rows of the dataset
    head(sharks)
    # A tibble: 6 × 10
      ID    sex    blotch   BPM weight length   air water  meta depth
      <chr> <chr>   <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
    1 SH001 Female   37.2   148   74.7   187.  37.7  23.4  64.1  53.2
    2 SH002 Female   34.5   158   73.4   189.  35.7  21.4  73.7  49.6
    3 SH003 Female   36.3   125   71.8   284.  34.8  20.1  54.4  49.4
    4 SH004 Male     35.3   161  105.    171.  36.2  21.6  86.3  50.3
    5 SH005 Female   37.4   138   67.1   264.  33.6  21.8 108.   49.0
    6 SH006 Male     33.5   126  110.    270.  36.4  20.9 109.   46.8
    Code
    # Check the structure of the dataset
    str(sharks)
    tibble [500 × 10] (S3: tbl_df/tbl/data.frame)
     $ ID    : chr [1:500] "SH001" "SH002" "SH003" "SH004" ...
     $ sex   : chr [1:500] "Female" "Female" "Female" "Male" ...
     $ blotch: num [1:500] 37.2 34.5 36.3 35.3 37.4 ...
     $ BPM   : num [1:500] 148 158 125 161 138 126 166 135 132 127 ...
     $ weight: num [1:500] 74.7 73.4 71.8 104.6 67.1 ...
     $ length: num [1:500] 187 189 284 171 264 ...
     $ air   : num [1:500] 37.7 35.7 34.8 36.2 33.6 ...
     $ water : num [1:500] 23.4 21.4 20.1 21.6 21.8 ...
     $ meta  : num [1:500] 64.1 73.7 54.4 86.3 108 ...
     $ depth : num [1:500] 53.2 49.6 49.4 50.3 49 ...
    Code
    class(sharks)
    [1] "tbl_df"     "tbl"        "data.frame"
    Code
    names(sharks)
     [1] "ID"     "sex"    "blotch" "BPM"    "weight" "length" "air"    "water" 
     [9] "meta"   "depth" 
    Code
    # Inspect the first few values of 'air' and 'water'
    head(sharks$air)
    [1] 37.73957 35.68413 34.79854 36.15973 33.61477 36.38343
    Code
    class(sharks$air)
    [1] "numeric"
    Code
    class(sharks$water)
    [1] "numeric"
    Code
    head(sharks$water)
    [1] 23.37377 21.42088 20.05114 21.64319 21.76143 20.85200
    Code
    # Check column names to ensure they are correctly referenced
    colnames(sharks)
     [1] "ID"     "sex"    "blotch" "BPM"    "weight" "length" "air"    "water" 
     [9] "meta"   "depth" 
    Code
    # Load ggplot2 library
    library(ggplot2)
    
    # Create a customised scatter graph with theme
    ggplot(sharks, aes(x = air, y = water)) +
      geom_point(color = "lightgreen", size = 3, shape = 16, alpha = 0.6) +
      geom_smooth(method = "lm", se = FALSE, color = "red", linetype = "dashed") +
      labs(
        title = "Relationship between air and water temperatures", 
        x = "Air temperature (°C)", 
        y = "Water temperature (°C)")
    `geom_smooth()` using formula = 'y ~ x'

Code
# Calculate correlation between air and water
cor.test(sharks$air, sharks$water)

    Pearson's product-moment correlation

data:  sharks$air and sharks$water
t = -1.2346, df = 498, p-value = 0.2176
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.14224207  0.03260803
sample estimates:
        cor 
-0.05524051 

These results indicate that there is no statistically significant difference between ambient air temperature and surface water temperature.

  1. Does multiple capture have an effect on blotching time?

    Code
    sharksub <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharksub.xlsx")
    
    sharksub %>% summary()
          ID                sex               blotch1         blotch2     
     Length:50          Length:50          Min.   :32.49   Min.   :33.47  
     Class :character   Class :character   1st Qu.:34.38   1st Qu.:35.31  
     Mode  :character   Mode  :character   Median :34.94   Median :35.94  
                                           Mean   :35.03   Mean   :35.96  
                                           3rd Qu.:35.90   3rd Qu.:36.78  
                                           Max.   :37.07   Max.   :38.18  
    Code
    head(sharksub)
    # A tibble: 6 × 4
      ID    sex    blotch1 blotch2
      <chr> <chr>    <dbl>   <dbl>
    1 SH269 Female    36.1    37.2
    2 SH163 Female    33.4    34.4
    3 SH008 Female    36.3    36.5
    4 SH239 Female    35.0    36.0
    5 SH332 Female    35.7    36.8
    6 SH328 Female    34.9    35.9
    Code
    str(sharksub)
    tibble [50 × 4] (S3: tbl_df/tbl/data.frame)
     $ ID     : chr [1:50] "SH269" "SH163" "SH008" "SH239" ...
     $ sex    : chr [1:50] "Female" "Female" "Female" "Female" ...
     $ blotch1: num [1:50] 36.1 33.4 36.3 35 35.7 ...
     $ blotch2: num [1:50] 37.2 34.4 36.5 36 36.8 ...
    Code
    class(sharksub)
    [1] "tbl_df"     "tbl"        "data.frame"
    Code
    colnames(sharksub)
    [1] "ID"      "sex"     "blotch1" "blotch2"
    Code
    names(sharksub)
    [1] "ID"      "sex"     "blotch1" "blotch2"
    Code
    library(ggplot2)
    library(tidyr)  # For gathering the data into long format
    
    # Convert data from wide to long format for ggplot2 compatibility
    sharksub_long <- sharksub %>%
      gather(key = "blotch_type", value = "time", blotch1, blotch2)
    
    # Boxplot of blotch1 vs blotch2
    ggplot(sharksub_long, aes(x = blotch_type, y = time, fill = blotch_type)) +
      geom_boxplot() +
      labs(title = "Comparison of blotching times",
           x = "Blotches",
           y = "Time (seconds)") +
      theme_minimal() +
      scale_fill_manual(values = c("lightblue", "lightgreen"))

    Code
    # Perform independent t-test between blotch1 and blotch2
    t.test(sharksub$blotch1, sharksub$blotch2)
    
        Welch Two Sample t-test
    
    data:  sharksub$blotch1 and sharksub$blotch2
    t = -4.1143, df = 97.658, p-value = 8.113e-05
    alternative hypothesis: true difference in means is not equal to 0
    95 percent confidence interval:
     -1.3782038 -0.4812731
    sample estimates:
    mean of x mean of y 
     35.03042  35.96016 

    These results indicate that multiple capture has a statistically significant effect on blotching time in that blotching is increased when the sharks were captured more than once.

    1. Is it possible to predict blotching time?

    Blotch vs. Depth

    Code
    sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")
    
    # Perform linear regression
    model <- lm(depth ~ blotch, data = sharks)
    
    # Output the regression summary
    summary(model)
    
    Call:
    lm(formula = depth ~ blotch, data = sharks)
    
    Residuals:
        Min      1Q  Median      3Q     Max 
    -4.3570 -0.9453 -0.0124  0.9863  4.7997 
    
    Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
    (Intercept) 14.63435    1.56040   9.379   <2e-16 ***
    blotch       1.01079    0.04439  22.772   <2e-16 ***
    ---
    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    
    Residual standard error: 1.415 on 498 degrees of freedom
    Multiple R-squared:  0.5101,    Adjusted R-squared:  0.5091 
    F-statistic: 518.6 on 1 and 498 DF,  p-value: < 2.2e-16
    Code
    # Plotting the data and regression line
    ggplot(sharks, aes(x = blotch, y = depth)) +
      geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  # Scatter plot with color and transparency
      geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  # Regression line with confidence interval
      labs(title = "Relationship between blotching and depth",
           x = "Blotch (seconds)",
           y = "Depth (metres)") +
      theme_minimal() +  # Minimal theme for cleaner look
      theme(
        plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 12),
        panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
        panel.grid.minor = element_blank()
      ) +
      theme(legend.position = "none")  # Remove legend (not needed for this plot)
    `geom_smooth()` using formula = 'y ~ x'

    Code
    # Compute the Pearson correlation coefficient between 'blotch' and 'depth'
    cor.test(sharks$blotch, sharks$depth)
    
        Pearson's product-moment correlation
    
    data:  sharks$blotch and sharks$depth
    t = 22.772, df = 498, p-value < 2.2e-16
    alternative hypothesis: true correlation is not equal to 0
    95 percent confidence interval:
     0.6683963 0.7546509
    sample estimates:
          cor 
    0.7142247 

    Blotch vs. Weight

    Code
    sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")
    
    model <- lm(weight ~ blotch, data = sharks)
    summary(model)
    
    Call:
    lm(formula = weight ~ blotch, data = sharks)
    
    Residuals:
         Min       1Q   Median       3Q      Max 
    -22.9687 -12.2943  -0.1632  12.3893  22.9622 
    
    Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
    (Intercept)  84.8823    14.8545   5.714  1.9e-08 ***
    blotch        0.0871     0.4225   0.206    0.837    
    ---
    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    
    Residual standard error: 13.47 on 498 degrees of freedom
    Multiple R-squared:  8.531e-05, Adjusted R-squared:  -0.001923 
    F-statistic: 0.04249 on 1 and 498 DF,  p-value: 0.8368
    Code
    ggplot(sharks, aes(x = blotch, y = weight)) +
      geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  
      geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  
      labs(title = "Relationship between blotching and weight",
           x = "Blotch (seconds)",
           y = "Weight (Kg") +
      theme_minimal() +  
      theme(
        plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 12),
        panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
        panel.grid.minor = element_blank()
      ) +
      theme(legend.position = "none")
    `geom_smooth()` using formula = 'y ~ x'

    Code
    # Compute the Pearson correlation coefficient between 'blotch' and 'weight'
    cor.test(sharks$blotch, sharks$weight)
    
        Pearson's product-moment correlation
    
    data:  sharks$blotch and sharks$weight
    t = 0.20613, df = 498, p-value = 0.8368
    alternative hypothesis: true correlation is not equal to 0
    95 percent confidence interval:
     -0.07851766  0.09684867
    sample estimates:
            cor 
    0.009236525 

    Blotch vs. Air Temperature

    Code
    sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")
    
    model <- lm(air ~ blotch, data = sharks)
    summary(model)
    
    Call:
    lm(formula = air ~ blotch, data = sharks)
    
    Residuals:
         Min       1Q   Median       3Q      Max 
    -2.54412 -1.10096 -0.09279  1.17905  2.51344 
    
    Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
    (Intercept) 36.85684    1.57452   23.41   <2e-16 ***
    blotch      -0.03762    0.04479   -0.84    0.401    
    ---
    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    
    Residual standard error: 1.428 on 498 degrees of freedom
    Multiple R-squared:  0.001415,  Adjusted R-squared:  -0.0005902 
    F-statistic: 0.7057 on 1 and 498 DF,  p-value: 0.4013
    Code
    ggplot(sharks, aes(x = blotch, y = air)) +
      geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  
      geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  
      labs(title = "Relationship between blotching and ambient air temperature",
           x = "Blotch (seconds)",
           y = "Air temperature (Celcius)") +
      theme_minimal() +  # Minimal theme for cleaner look
      theme(
        plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 12),
        panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
        panel.grid.minor = element_blank()
      ) +
      theme(legend.position = "none")
    `geom_smooth()` using formula = 'y ~ x'

    Code
    # Compute the Pearson correlation coefficient between 'blotch' and 'air'
    cor.test(sharks$blotch, sharks$air)
    
        Pearson's product-moment correlation
    
    data:  sharks$blotch and sharks$air
    t = -0.84005, df = 498, p-value = 0.4013
    alternative hypothesis: true correlation is not equal to 0
    95 percent confidence interval:
     -0.12489535  0.05023956
    sample estimates:
            cor 
    -0.03761675 

    Blotch vs. BPM

    Code
    sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")
    
    model <- lm(BPM ~ blotch, data = sharks)
    summary(model)
    
    Call:
    lm(formula = BPM ~ blotch, data = sharks)
    
    Residuals:
        Min      1Q  Median      3Q     Max 
    -23.029 -13.030   0.441  11.674  24.796 
    
    Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
    (Intercept) 151.9574    15.6007   9.740   <2e-16 ***
    blotch       -0.2903     0.4438  -0.654    0.513    
    ---
    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    
    Residual standard error: 14.15 on 498 degrees of freedom
    Multiple R-squared:  0.0008583, Adjusted R-squared:  -0.001148 
    F-statistic: 0.4278 on 1 and 498 DF,  p-value: 0.5134
    Code
    ggplot(sharks, aes(x = blotch, y = BPM)) +
      geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  
      geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  
      labs(title = "Relationship between blotching and BPM",
           x = "Blotch (seconds)",
           y = "BPM (Beats per minute)") +
      theme_minimal() +  # Minimal theme for cleaner look
      theme(
        plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 12),
        panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
        panel.grid.minor = element_blank()
      ) +
      theme(legend.position = "none")  
    `geom_smooth()` using formula = 'y ~ x'

    Code
    # Compute the Pearson correlation coefficient between 'blotch' and 'BPM'
    cor.test(sharks$blotch, sharks$BPM)
    
        Pearson's product-moment correlation
    
    data:  sharks$blotch and sharks$BPM
    t = -0.65406, df = 498, p-value = 0.5134
    alternative hypothesis: true correlation is not equal to 0
    95 percent confidence interval:
     -0.11668743  0.05854438
    sample estimates:
            cor 
    -0.02929661 

    Blotch vs. Body Length

    Code
    sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")
    
    model <- lm(length ~ blotch, data = sharks)
    summary(model)
    
    Call:
    lm(formula = length ~ blotch, data = sharks)
    
    Residuals:
        Min      1Q  Median      3Q     Max 
    -83.303 -38.860  -0.192  40.659  80.596 
    
    Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
    (Intercept) 229.8342    51.4343   4.468 9.76e-06 ***
    blotch       -0.5349     1.4631  -0.366    0.715    
    ---
    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    
    Residual standard error: 46.65 on 498 degrees of freedom
    Multiple R-squared:  0.0002684, Adjusted R-squared:  -0.001739 
    F-statistic: 0.1337 on 1 and 498 DF,  p-value: 0.7148
    Code
    ggplot(sharks, aes(x = blotch, y = length)) +
      geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  
      geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  
      labs(title = "Relationship between blotching and total body length",
           x = "Blotch (seconds)",
           y = "length (cm)") +
      theme_minimal() +  
      theme(
        plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 12),
        panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
        panel.grid.minor = element_blank()
      ) +
      theme(legend.position = "none")
    `geom_smooth()` using formula = 'y ~ x'

    Code
    # Compute the Pearson correlation coefficient between 'blotch' and 'length'
    cor.test(sharks$blotch, sharks$length)
    
        Pearson's product-moment correlation
    
    data:  sharks$blotch and sharks$length
    t = -0.36562, df = 498, p-value = 0.7148
    alternative hypothesis: true correlation is not equal to 0
    95 percent confidence interval:
     -0.1039230  0.0714115
    sample estimates:
            cor 
    -0.01638167 

    Blotch vs. Surface Water Temperature

    Code
    sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")
    
    model <- lm(water ~ blotch, data = sharks)
    summary(model)
    
    Call:
    lm(formula = water ~ blotch, data = sharks)
    
    Residuals:
         Min       1Q   Median       3Q      Max 
    -3.07227 -1.43903  0.07593  1.34741  3.04345 
    
    Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
    (Intercept) 25.14433    1.84152  13.654   <2e-16 ***
    blotch      -0.06046    0.05238  -1.154    0.249    
    ---
    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    
    Residual standard error: 1.67 on 498 degrees of freedom
    Multiple R-squared:  0.002668,  Adjusted R-squared:  0.0006654 
    F-statistic: 1.332 on 1 and 498 DF,  p-value: 0.249
    Code
    ggplot(sharks, aes(x = blotch, y = water)) +
      geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  
      geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  
      labs(title = "Relationship between blotching and surface water temperature",
           x = "Blotch (seconds)",
           y = "Surface water temperature (Celcius)") +
      theme_minimal() +  
      theme(
        plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 12),
        panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
        panel.grid.minor = element_blank()
      ) +
      theme(legend.position = "none")  
    `geom_smooth()` using formula = 'y ~ x'

    Code
    # Compute the Pearson correlation coefficient between 'blotch' and 'surface water temperature'
    cor.test(sharks$blotch, sharks$water)
    
        Pearson's product-moment correlation
    
    data:  sharks$blotch and sharks$water
    t = -1.1542, df = 498, p-value = 0.249
    alternative hypothesis: true correlation is not equal to 0
    95 percent confidence interval:
     -0.13871605  0.03620077
    sample estimates:
            cor 
    -0.05165379 

    Blotch vs. Cortisol levels

    Code
    sharks <- read_excel("C:\\Users\\sanke\\OneDrive - Nottingham Trent University\\RMDA\\sharks.xlsx")
    
    model <- lm(meta ~ blotch, data = sharks)
    summary(model)
    
    Call:
    lm(formula = meta ~ blotch, data = sharks)
    
    Residuals:
        Min      1Q  Median      3Q     Max 
    -31.856 -14.556   0.426  13.857  30.687 
    
    Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
    (Intercept)  86.1257    19.2463   4.475 9.48e-06 ***
    blotch       -0.1162     0.5475  -0.212    0.832    
    ---
    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
    
    Residual standard error: 17.46 on 498 degrees of freedom
    Multiple R-squared:  9.051e-05, Adjusted R-squared:  -0.001917 
    F-statistic: 0.04508 on 1 and 498 DF,  p-value: 0.8319
    Code
    ggplot(sharks, aes(x = blotch, y = meta)) +
      geom_point(color = "#1f78b4", size = 3, alpha = 0.7) +  
      geom_smooth(method = "lm", se = TRUE, color = "red", size = 1) +  
      labs(title = "Relationship between blotching and cortisol levels",
           x = "Blotch (seconds)",
           y = "Cortisol levels (mcg/dL)") +
      theme_minimal() +  
      theme(
        plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 12),
        panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5),
        panel.grid.minor = element_blank()
      ) +
      theme(legend.position = "none")
    `geom_smooth()` using formula = 'y ~ x'

    Code
    # Compute the Pearson correlation coefficient between 'blotch' and 'cortisol levels'
    cor.test(sharks$blotch, sharks$meta)
    
        Pearson's product-moment correlation
    
    data:  sharks$blotch and sharks$meta
    t = -0.21232, df = 498, p-value = 0.8319
    alternative hypothesis: true correlation is not equal to 0
    95 percent confidence interval:
     -0.09712341  0.07824201
    sample estimates:
             cor 
    -0.009513855 

Blotch vs. Sex

Code
# Boxplot to compare blotching between male and female sharks
boxplot(sharks$blotch ~ sharks$sex, data = sharks,
        main = "Comparison of blotching in sharks by sex",
        xlab = "Sex",
        ylab = "Blotching (seconds)",
        col = c("lightpink", "lightblue"))

Code
# T-test to compare blotching between sexes
t_test_result <- t.test(sharks$blotch ~ sharks$sex, sharks = df)
print(t_test_result)

    Welch Two Sample t-test

data:  sharks$blotch by sharks$sex
t = -3.0282, df = 494.67, p-value = 0.002589
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -0.6322714 -0.1346620
sample estimates:
mean in group Female   mean in group Male 
            34.92294             35.30641 

These results indicate that there is statistically significant difference between blotching times in male and female sharks.