RMDA Summative

Author

Kia Wright-Williams

Blotching time in sharks

Q1: Is there a correlation between the variables air and water?

# Set CRAN mirror for document rendering
options(repos = c(CRAN = "https://cran.rstudio.com/"))

# Install and load all necessary packages
install.packages("tidyverse") 

The downloaded binary packages are in
    /var/folders/_3/gl1b3gb52rsg0qg560909w4r0000gn/T//RtmpfSRcoV/downloaded_packages
library(tidyverse) 
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
library(readxl)

# Read the datasets
sharks <- read_excel("~/Library/CloudStorage/OneDrive-NottinghamTrentUniversity/Data Analysis/Summative/sharks.xlsx")
sharksub <- read_excel("~/Library/CloudStorage/OneDrive-NottinghamTrentUniversity/Data Analysis/Summative/sharksub.xlsx")
                     
# Summarize the dataset 
sharks %>% 
  summary() 
      ID                sex                blotch           BPM       
 Length:500         Length:500         Min.   :30.78   Min.   :119.0  
 Class :character   Class :character   1st Qu.:34.16   1st Qu.:129.0  
 Mode  :character   Mode  :character   Median :35.05   Median :142.0  
                                       Mean   :35.13   Mean   :141.8  
                                       3rd Qu.:36.05   3rd Qu.:153.2  
                                       Max.   :40.08   Max.   :166.0  
     weight           length           air            water      
 Min.   : 65.10   Min.   :128.3   Min.   :33.00   Min.   :20.01  
 1st Qu.: 75.68   1st Qu.:172.0   1st Qu.:34.42   1st Qu.:21.55  
 Median : 87.82   Median :211.1   Median :35.43   Median :23.11  
 Mean   : 87.94   Mean   :211.0   Mean   :35.54   Mean   :23.02  
 3rd Qu.:100.40   3rd Qu.:251.8   3rd Qu.:36.71   3rd Qu.:24.37  
 Max.   :110.94   Max.   :291.0   Max.   :38.00   Max.   :25.99  
      meta            depth      
 Min.   : 50.03   Min.   :44.64  
 1st Qu.: 67.39   1st Qu.:48.90  
 Median : 82.45   Median :50.14  
 Mean   : 82.04   Mean   :50.14  
 3rd Qu.: 95.97   3rd Qu.:51.35  
 Max.   :112.45   Max.   :56.83  
# Check the first few rows of the dataset 
head(sharks) 
# A tibble: 6 × 10
  ID    sex    blotch   BPM weight length   air water  meta depth
  <chr> <chr>   <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl> <dbl> <dbl>
1 SH001 Female   37.2   148   74.7   187.  37.7  23.4  64.1  53.2
2 SH002 Female   34.5   158   73.4   189.  35.7  21.4  73.7  49.6
3 SH003 Female   36.3   125   71.8   284.  34.8  20.1  54.4  49.4
4 SH004 Male     35.3   161  105.    171.  36.2  21.6  86.3  50.3
5 SH005 Female   37.4   138   67.1   264.  33.6  21.8 108.   49.0
6 SH006 Male     33.5   126  110.    270.  36.4  20.9 109.   46.8
# Check the structure of the dataset (this will give you column names, data types, etc.) 
str(sharks) 
tibble [500 × 10] (S3: tbl_df/tbl/data.frame)
 $ ID    : chr [1:500] "SH001" "SH002" "SH003" "SH004" ...
 $ sex   : chr [1:500] "Female" "Female" "Female" "Male" ...
 $ blotch: num [1:500] 37.2 34.5 36.3 35.3 37.4 ...
 $ BPM   : num [1:500] 148 158 125 161 138 126 166 135 132 127 ...
 $ weight: num [1:500] 74.7 73.4 71.8 104.6 67.1 ...
 $ length: num [1:500] 187 189 284 171 264 ...
 $ air   : num [1:500] 37.7 35.7 34.8 36.2 33.6 ...
 $ water : num [1:500] 23.4 21.4 20.1 21.6 21.8 ...
 $ meta  : num [1:500] 64.1 73.7 54.4 86.3 108 ...
 $ depth : num [1:500] 53.2 49.6 49.4 50.3 49 ...
class(sharks) 
[1] "tbl_df"     "tbl"        "data.frame"
class(sharks$air) 
[1] "numeric"
class(sharks$water) 
[1] "numeric"
names(sharks) 
 [1] "ID"     "sex"    "blotch" "BPM"    "weight" "length" "air"    "water" 
 [9] "meta"   "depth" 
# Inspect the first few values of 'air' and 'water' 
head(sharks$air) 
[1] 37.73957 35.68413 34.79854 36.15973 33.61477 36.38343
head(sharks$water) 
[1] 23.37377 21.42088 20.05114 21.64319 21.76143 20.85200
# Check column names to ensure they are correctly referenced 
colnames(sharks) 
 [1] "ID"     "sex"    "blotch" "BPM"    "weight" "length" "air"    "water" 
 [9] "meta"   "depth" 
# Calculate the Pearsons correlation between air and water
cor.test(sharks$air, sharks$water) 

    Pearson's product-moment correlation

data:  sharks$air and sharks$water
t = -1.2346, df = 498, p-value = 0.2176
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.14224207  0.03260803
sample estimates:
        cor 
-0.05524051 
## Create a scatter plot with showing the relationship between the variables 
ggplot(sharks, aes(x = air, y = water)) + 
  geom_point(color = "blue", size = 3, shape = 16, alpha = 0.6) + 
  geom_smooth(method = "lm", se = FALSE, color = "red", linetype = "dashed") + 
  labs( 
    title = "Relationship Between Air and Water Temperatures",  
    x = "Air Temperature (°C)",  
    y = "Water Temperature (°C)")
`geom_smooth()` using formula = 'y ~ x'

Question 2: Does multiple capture have an effect on blotching time?

# Run a Welches two sample t-test to assess any differences between the variables
t.test(sharksub$blotch1, sharksub$blotch2)

    Welch Two Sample t-test

data:  sharksub$blotch1 and sharksub$blotch2
t = -4.1143, df = 97.658, p-value = 8.113e-05
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -1.3782038 -0.4812731
sample estimates:
mean of x mean of y 
 35.03042  35.96016 
# Load necessary library 
library(tidyr)  # For gathering the data into long format 

sharks <- read_excel("~/Library/CloudStorage/OneDrive-NottinghamTrentUniversity/Data Analysis/Summative/sharks.xlsx")

# Convert data from wide to long format for 'ggplot2' compatibility 
sharksub_long <- sharksub %>% 
  gather(key = "blotch_type", value = "time", blotch1, blotch2) 

# Create a boxplot to show the differences between blotch1 and blotch2 
ggplot(sharksub_long, aes(x = blotch_type, y = time, fill = blotch_type)) + 
  geom_boxplot() + 
  labs(title = "Comparison of Blotching Times: Blotch1 vs Blotch2", 
       x = "Blotch Time", 
       y = "Time (seconds)") + 
  theme_minimal() + 
  scale_fill_manual(values = c("lightblue", "lightgreen")) 

## Create density plot (alternative way to visualise data)
sharksub_long <- sharksub %>% 
  gather(key = "blotch_type", value = "time", blotch1, blotch2) 

# Create density plot 
ggplot(sharksub_long, aes(x = time, fill = blotch_type, color = blotch_type)) + 

# Plot the density curve 
geom_density(alpha = 0.4, size = 1.2) +  # alpha for transparency, size for line thickness 

# Add labels and theme
labs( 
    title = "Density Plot of Blotch Times", 
    x = "Time (seconds)", 
    y = "Density" 
    ) + 
  theme_minimal(base_size = 14) + 
  theme( 
    axis.text.x = element_text(size = 12), 
    axis.text.y = element_text(size = 12), 
    axis.title.x = element_text(size = 14), 
    axis.title.y = element_text(size = 14), 
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
    legend.position = "top"  # Position the legend at the top 
    ) + 

  
# Set color palette 
scale_fill_manual(values = c("lightblue", "lightgreen")) + 
  scale_color_manual(values = c("darkblue", "darkgreen")) 
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

Question 3: Is it possible to predict blotching time?

# Load necessary libraries 
library(readxl) 
library(ggplot2) 

# Load the data from an Excel file 
file_path <- "~/Library/CloudStorage/OneDrive-NottinghamTrentUniversity/Data Analysis/Summative/sharks.xlsx"

# Compute the Pearson correlation coefficient between 'blotch' and 'depth' 
cor.test(sharks$blotch, sharks$depth) 

    Pearson's product-moment correlation

data:  sharks$blotch and sharks$depth
t = 22.772, df = 498, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.6683963 0.7546509
sample estimates:
      cor 
0.7142247 
# Perform linear regression 
model <- lm(depth ~ blotch, data = sharks) 

# Output the regression summary 
summary(model) 

Call:
lm(formula = depth ~ blotch, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.3570 -0.9453 -0.0124  0.9863  4.7997 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 14.63435    1.56040   9.379   <2e-16 ***
blotch       1.01079    0.04439  22.772   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.415 on 498 degrees of freedom
Multiple R-squared:  0.5101,    Adjusted R-squared:  0.5091 
F-statistic: 518.6 on 1 and 498 DF,  p-value: < 2.2e-16
# Plotting the data and regression line 
ggplot(sharks, aes(x = blotch, y = depth)) + 
  geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7) +  # Scatter plot with color and transparency 
  geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +  # Regression line with confidence interval 
  labs(title = "Relationship between blotching and depth", 
       x = "Blotch (seconds)", 
       y = "Depth (metres)") + 
  theme_minimal() +  # Minimal theme for cleaner look 
  theme( 
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
    axis.title = element_text(size = 14), 
    axis.text = element_text(size = 12), 
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5), 
    panel.grid.minor = element_blank() 
    ) + 
  theme(legend.position = "none")  # Remove legend (not needed for this plot) 
Warning in geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7): Ignoring
unknown parameters: `linewidth`
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
`geom_smooth()` using formula = 'y ~ x'

# Compute the Pearson correlation coefficient between 'blotch' and 'weight' 
cor.test(sharks$blotch, sharks$weight) 

    Pearson's product-moment correlation

data:  sharks$blotch and sharks$weight
t = 0.20613, df = 498, p-value = 0.8368
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.07851766  0.09684867
sample estimates:
        cor 
0.009236525 
# Perform linear regression 
model <- lm(weight ~ blotch, data = sharks) 

# Output the regression summary 
summary(model) 

Call:
lm(formula = weight ~ blotch, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-22.9687 -12.2943  -0.1632  12.3893  22.9622 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  84.8823    14.8545   5.714  1.9e-08 ***
blotch        0.0871     0.4225   0.206    0.837    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 13.47 on 498 degrees of freedom
Multiple R-squared:  8.531e-05, Adjusted R-squared:  -0.001923 
F-statistic: 0.04249 on 1 and 498 DF,  p-value: 0.8368
# Plotting the data and regression line 
ggplot(sharks, aes(x = blotch, y = weight)) + 
  geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7) +  # Scatter plot with color and transparency 
  geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +  # Regression line with confidence interval 
  labs(title = "Relationship between blotching and weight", 
       x = "Blotch (seconds)", 
       y = "Weight (kg)") + 
  theme_minimal() +  # Minimal theme for cleaner look 
  theme( 
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
    axis.title = element_text(size = 14), 
    axis.text = element_text(size = 12), 
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5), 
    panel.grid.minor = element_blank() 
    ) + 
  theme(legend.position = "none")  # Remove legend (not needed for this plot) 
Warning in geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7): Ignoring
unknown parameters: `linewidth`
`geom_smooth()` using formula = 'y ~ x'

# Compute the Pearson correlation coefficient between 'blotch' and 'air' 
cor.test(sharks$blotch, sharks$air) 

    Pearson's product-moment correlation

data:  sharks$blotch and sharks$air
t = -0.84005, df = 498, p-value = 0.4013
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.12489535  0.05023956
sample estimates:
        cor 
-0.03761675 
# Perform linear regression 
model <- lm(air ~ blotch, data = sharks) 

# Output the regression summary 
summary(model) 

Call:
lm(formula = air ~ blotch, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.54412 -1.10096 -0.09279  1.17905  2.51344 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 36.85684    1.57452   23.41   <2e-16 ***
blotch      -0.03762    0.04479   -0.84    0.401    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.428 on 498 degrees of freedom
Multiple R-squared:  0.001415,  Adjusted R-squared:  -0.0005902 
F-statistic: 0.7057 on 1 and 498 DF,  p-value: 0.4013
# Plotting the data and regression line 
ggplot(sharks, aes(x = blotch, y = air)) + 
geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7) +  # Scatter plot with color and transparency 
geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +  # Regression line with confidence interval 
labs(title = "Relationship between blotching and air", 
     x = "Blotch (seconds)", 
     y = "Air (°C)") + 
  theme_minimal() +  # Minimal theme for cleaner look 
  theme( 
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
    axis.title = element_text(size = 14), 
    axis.text = element_text(size = 12), 
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5), 
    panel.grid.minor = element_blank() 
    ) + 
  theme(legend.position = "none")  # Remove legend (not needed for this plot) 
Warning in geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7): Ignoring
unknown parameters: `linewidth`
`geom_smooth()` using formula = 'y ~ x'

# Compute the Pearson correlation coefficient between 'blotch' and 'cortisol' 
cor.test(sharks$blotch, sharks$meta) 

    Pearson's product-moment correlation

data:  sharks$blotch and sharks$meta
t = -0.21232, df = 498, p-value = 0.8319
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.09712341  0.07824201
sample estimates:
         cor 
-0.009513855 
# Perform linear regression 
model <- lm(meta ~ blotch, data = sharks) 

# Output the regression summary 
summary(model) 

Call:
lm(formula = meta ~ blotch, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-31.856 -14.556   0.426  13.857  30.687 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  86.1257    19.2463   4.475 9.48e-06 ***
blotch       -0.1162     0.5475  -0.212    0.832    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 17.46 on 498 degrees of freedom
Multiple R-squared:  9.051e-05, Adjusted R-squared:  -0.001917 
F-statistic: 0.04508 on 1 and 498 DF,  p-value: 0.8319
# Plotting the data and regression line 
ggplot(sharks, aes(x = blotch, y = meta)) + 
geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7) +  # Scatter plot with color and transparency 
geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +  # Regression line with confidence interval 
labs(title = "Relationship between blotching and cortisol", 
     x = "Blotch (seconds)", 
     y = "Corticosterone (mcg/dl)") + 
  theme_minimal() +  # Minimal theme for cleaner look 
  theme( 
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
    axis.title = element_text(size = 14), 
    axis.text = element_text(size = 12), 
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5), 
    panel.grid.minor = element_blank() 
    ) + 
  theme(legend.position = "none")  # Remove legend (not needed for this plot) 
Warning in geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7): Ignoring
unknown parameters: `linewidth`
`geom_smooth()` using formula = 'y ~ x'

# Compute the Pearson correlation coefficient between 'blotch' and 'length' 
cor.test(sharks$blotch, sharks$length) 

    Pearson's product-moment correlation

data:  sharks$blotch and sharks$length
t = -0.36562, df = 498, p-value = 0.7148
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.1039230  0.0714115
sample estimates:
        cor 
-0.01638167 
# Perform linear regression 
model <- lm(length ~ blotch, data = sharks) 

# Output the regression summary 
summary(model) 

Call:
lm(formula = length ~ blotch, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-83.303 -38.860  -0.192  40.659  80.596 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 229.8342    51.4343   4.468 9.76e-06 ***
blotch       -0.5349     1.4631  -0.366    0.715    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 46.65 on 498 degrees of freedom
Multiple R-squared:  0.0002684, Adjusted R-squared:  -0.001739 
F-statistic: 0.1337 on 1 and 498 DF,  p-value: 0.7148
# Plotting the data and regression line 
ggplot(sharks, aes(x = blotch, y = length)) + 
geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7) +  # Scatter plot with color and transparency 
geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +  # Regression line with confidence interval 
labs(title = "Relationship between blotching and length", 
     x = "Blotch (seconds)", 
     y = "length (m)") + 
  theme_minimal() +  # Minimal theme for neater look 
  theme( 
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
    axis.title = element_text(size = 14), 
    axis.text = element_text(size = 12), 
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5), 
    panel.grid.minor = element_blank() 
    ) + 
  theme(legend.position = "none")
Warning in geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7): Ignoring
unknown parameters: `linewidth`
`geom_smooth()` using formula = 'y ~ x'

# Compute the Pearson correlation coefficient between 'blotch' and 'water' 
cor.test(sharks$blotch, sharks$water) 

    Pearson's product-moment correlation

data:  sharks$blotch and sharks$water
t = -1.1542, df = 498, p-value = 0.249
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.13871605  0.03620077
sample estimates:
        cor 
-0.05165379 
# Perform linear regression 
model <- lm(water ~ blotch, data = sharks) 

# Output the regression summary 
summary(model) 

Call:
lm(formula = water ~ blotch, data = sharks)

Residuals:
     Min       1Q   Median       3Q      Max 
-3.07227 -1.43903  0.07593  1.34741  3.04345 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 25.14433    1.84152  13.654   <2e-16 ***
blotch      -0.06046    0.05238  -1.154    0.249    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.67 on 498 degrees of freedom
Multiple R-squared:  0.002668,  Adjusted R-squared:  0.0006654 
F-statistic: 1.332 on 1 and 498 DF,  p-value: 0.249
# Plotting the data and regression line 
ggplot(sharks, aes(x = blotch, y = water)) + 
geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7) +  # Scatter plot with color and transparency 
geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +  # Regression line with confidence interval 
labs(title = "Relationship between blotching and water", 
     x = "Blotch (seconds)", 
     y = "Water (°C)") + 
  theme_minimal() +  # Minimal theme for neater look 
  theme( 
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
    axis.title = element_text(size = 14), 
    axis.text = element_text(size = 12), 
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5), 
    panel.grid.minor = element_blank() 
    ) + 
  theme(legend.position = "none")
Warning in geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7): Ignoring
unknown parameters: `linewidth`
`geom_smooth()` using formula = 'y ~ x'

# Compute the Pearson correlation coefficient between 'blotch' and 'BPM' 
cor.test(sharks$blotch, sharks$BPM) 

    Pearson's product-moment correlation

data:  sharks$blotch and sharks$BPM
t = -0.65406, df = 498, p-value = 0.5134
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.11668743  0.05854438
sample estimates:
        cor 
-0.02929661 
# Perform linear regression 
model <- lm(BPM ~ blotch, data = sharks) 

# Output the regression summary 
summary(model) 

Call:
lm(formula = BPM ~ blotch, data = sharks)

Residuals:
    Min      1Q  Median      3Q     Max 
-23.029 -13.030   0.441  11.674  24.796 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 151.9574    15.6007   9.740   <2e-16 ***
blotch       -0.2903     0.4438  -0.654    0.513    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 14.15 on 498 degrees of freedom
Multiple R-squared:  0.0008583, Adjusted R-squared:  -0.001148 
F-statistic: 0.4278 on 1 and 498 DF,  p-value: 0.5134
# Plotting the data and regression line 
ggplot(sharks, aes(x = blotch, y = BPM)) + 
geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7) +  # Scatter plot with color and transparency 
geom_smooth(method = "lm", se = TRUE, color = "red", linewidth = 1) +  # Regression line with confidence interval 
labs(title = "Relationship between blotching and heart rate", 
     x = "Blotch (seconds)", 
     y = "Heart rate (BPM)" 
) +
  theme_minimal() +  # Minimal theme for cleaner look 
  theme( 
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5), 
    axis.title = element_text(size = 14), 
    axis.text = element_text(size = 12), 
    panel.grid.major = element_line(color = "grey", linetype = "dashed", size = 0.5), 
    panel.grid.minor = element_blank() 
    ) + 
  theme(legend.position = "none")  # Remove legend (not needed for this plot) 
Warning in geom_point(color = "#1f78b4", linewidth = 3, alpha = 0.7): Ignoring
unknown parameters: `linewidth`
`geom_smooth()` using formula = 'y ~ x'

# Perform an independant samples t test (sex is categorical)
t_test_result <- t.test(sharks$blotch ~ sharks$sex, sharks = df)
print(t_test_result)

    Welch Two Sample t-test

data:  sharks$blotch by sharks$sex
t = -3.0282, df = 494.67, p-value = 0.002589
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -0.6322714 -0.1346620
sample estimates:
mean in group Female   mean in group Male 
            34.92294             35.30641 
## Create Boxplot

ggplot(sharks, aes(x = sex, y = blotch)) +
  geom_boxplot(aes(fill = sex), alpha = 0.6) + 
  labs(
    title = "Box Plot of Blotch Time by Sex",
    x = "Sex",
    y = "Blotch Time (seconds)"
  ) +
  theme_minimal(base_size = 14) +  # Change the base font size
  theme(
    plot.title = element_text(hjust = 0.5, size = 16, face = "bold"),
    axis.title = element_text(size = 14),
    axis.text = element_text(size = 12)
  )