Rows: 500 Columns: 10
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): ID, sex
dbl (8): blotch, BPM, weight, length, air, water, meta, depth
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 50 Columns: 4
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (2): ID, sex
dbl (2): blotch1, blotch2
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(sharksub)library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ purrr 1.0.2
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
Attaching package: 'kableExtra'
The following object is masked from 'package:dplyr':
group_rows
library(broom)
Blotch histogram made for question 2
library(gridExtra)
Warning: package 'gridExtra' was built under R version 4.4.2
Attaching package: 'gridExtra'
The following object is masked from 'package:dplyr':
combine
# Reshape the data into long formatsharksub_long <- sharksub %>%pivot_longer(cols =c(blotch1, blotch2), names_to ="Capture", values_to ="BlotchTime")# Create histogram for blotch1blotch1_hist <-ggplot(data = sharksub, aes(x = blotch1)) +geom_histogram(bins =30, color ="black", fill ="gold") +theme_bw() +labs(x ="Time taken for blotching to cover 30% of ventral surface (s)",y ="Frequency",title ="Distribution of Blotching Time After First Capture")# Create histogram for blotch2blotch2_hist <-ggplot(data = sharksub, aes(x = blotch2)) +geom_histogram(bins =30, color ="black", fill ="navy") +theme_bw() +labs(x ="Time taken for blotching to cover 30% of ventral surface (s)",y ="Frequency",title ="Distribution of Blotching Time After Second Capture")# Arrange the histograms verticallygrid.arrange(blotch1_hist, blotch2_hist, ncol =1)
Q1. Is there a correlation between the variables air and water?
Histogram air normality check
#| label: Air Distribution Checkggplot(data = sharks, aes(x = air)) +geom_histogram(bins =30, col ="navy", fill ="gold") +theme_dark() +theme(plot.background =element_rect(fill ="black"), # Background outside the plotpanel.background =element_rect(fill ="black"), # Background inside the plotpanel.grid.major =element_line(color ="gray"), # Major grid linespanel.grid.minor =element_line(color ="gray30"), # Minor grid linesaxis.text =element_text(color ="white"), # Axis text coloraxis.title =element_text(color ="white"), # Axis title colorplot.title =element_text(color ="white", size =14, face ="bold") # Title color ) +scale_color_brewer() +labs(x ="Air (Celsius)", y ="Frequency", title ="Distribution of Ambient Air Temperatures")
Histogram for water temperature
#| label: Water Distribution Checkggplot(data = sharks, aes(x = water)) +geom_histogram(bins =30, col ="gold", fill ="navy") +theme_dark() +theme(plot.background =element_rect(fill ="black"), panel.background =element_rect(fill ="black"), panel.grid.major =element_line(color ="gray"), panel.grid.minor =element_line(color ="gray30"), axis.text =element_text(color ="white"), axis.title =element_text(color ="white"), plot.title =element_text(color ="white", size =14, face ="bold") ) +scale_color_brewer() +labs(x ="Water temperature at surface at time of processing (Celsius)", y ="Frequency", title ="Data Distribution of Surface Water Temperature")
Creating QQ plots for air and water
QQ plot for air temperature
#| label: QQplot Airpar(bg ="black", col.axis ="white", col.lab ="white", col.main ="white")qqnorm(sharks$air, pch =1, col ="white", frame =FALSE, main ="QQ Plot of air Data", xlab ="Theoretical Quantiles", ylab ="Sample Quantiles")qqline(sharks$air, col ="gold", lwd =2)
QQ plot for water temperature
par(bg ="black", col.axis ="white", col.lab ="white", col.main ="white")qqnorm(sharks$water, pch =1, col ="white", frame =FALSE, main ="QQ Plot of Water Data", xlab ="Theoretical Quantiles", ylab ="Sample Quantiles")qqline(sharks$water, col ="lightblue", lwd =2)
#Shapiro -Wilk test for air temperature
shapiro.test(sharks$air)
Shapiro-Wilk normality test
data: sharks$air
W = 0.95885, p-value = 1.338e-10
#Shapiro-Wilk Test for water temperature
shapiro.test(sharks$water)
Shapiro-Wilk normality test
data: sharks$water
W = 0.96035, p-value = 2.371e-10
Spearman's rank correlation rho
data: sharks$air and sharks$water
S = 22007692, p-value = 0.2082
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.05637344
#Value of Spearman’s correlation coefficient, rho = -0.056, p = 0.21
#null hypothesis H0: There is no significant correlation between air and water temperatures. #alternative hypothesis H1: There is significant correlation between air and water temperatures.
cor.test(x = sharks$air, y = sharks$water, method ='pearson')
Pearson's product-moment correlation
data: sharks$air and sharks$water
t = -1.2346, df = 498, p-value = 0.2176
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.14224207 0.03260803
sample estimates:
cor
-0.05524051
#Value of Pearson’s correlation coefficient, rho = -0.055, p = 0.22
Q2. Does multiple capture have an effect on blotching time?
view(sharksub)
Blotch1 histogram
#| label: Blotch1 Distribution Checkggplot(data = sharksub, aes(x = blotch1)) +geom_histogram(bins =30, col ="navy", fill ="gold") +theme_dark() +theme(plot.background =element_rect(fill ="black"), panel.background =element_rect(fill ="black"), panel.grid.major =element_line(color ="gray"), panel.grid.minor =element_line(color ="gray30"), axis.text =element_text(color ="white"), axis.title =element_text(color ="white"), plot.title =element_text(color ="white", size =14, face ="bold") ) +scale_color_brewer() +labs(x ="Time for blotching to cover 30% of ventral surface (s)", y ="Frequency", title ="Data Distribution of blotching time for first capture")
QQplot blotch1
#| label: QQplot Blotch1par(bg ="black", col.axis ="white", col.lab ="white", col.main ="white")qqnorm(sharksub$blotch1, pch =1, col ="white", frame =FALSE, main ="QQ Plot of Blotch1 Data", xlab ="Theoretical Quantiles", ylab ="Sample Quantiles")qqline(sharksub$blotch1, col ="gold", lwd =2)
Blotch1 shapiro-wilk
#| label: ShapiroWilk-blotch1_normality_check#| warning: false#| echo: true#| output: trueshapiro.test(sharksub$blotch1) #testing normality of blotch1 data using Shapiro-Wilk test
Shapiro-Wilk normality test
data: sharksub$blotch1
W = 0.97958, p-value = 0.5345
Histogram for blotch2
#| label: Blotch2 Distribution Checkggplot(data = sharksub, aes(x = blotch2)) +geom_histogram(bins =30, col ="gold", fill ="navy") +theme_dark() +theme(plot.background =element_rect(fill ="black"), panel.background =element_rect(fill ="black"), panel.grid.major =element_line(color ="gray"), panel.grid.minor =element_line(color ="gray30"), axis.text =element_text(color ="white"), axis.title =element_text(color ="white"), plot.title =element_text(color ="white", size =14, face ="bold") ) +scale_color_brewer() +labs(x ="Time for blotching to cover 30% of ventral surface (s)", y ="Frequency", title ="Data Distribution of Blotching Time for Second Capture")
#| label: QQplot Blotch2par(bg ="black", col.axis ="white", col.lab ="white", col.main ="white")qqnorm(sharksub$blotch2, pch =1, col ="white", frame =FALSE, main ="QQ Plot of Blotch2 Data", xlab ="Theoretical Quantiles", ylab ="Sample Quantiles")qqline(sharksub$blotch2, col ="Navy", lwd =2)
Shapiro-Wilk normality test
data: sharksub$blotch2
W = 0.97936, p-value = 0.5255
Histograms reveal roughly normal distribution for both blotch1 and blotch2
#For both blotch1 and blotch2, the Shapiro-Wilk test produces p-values > 0.05.
#null hypothesis H0: There is no significant difference between blotch1 and blotch2. #alternative hypothesis H1: There is a significant difference between blotch1 and blotch2.
capture1 <- sharksub$blotch1 # assigning blotch1 to capture1capture2 <- sharksub$blotch2 # assigning blotch2 to capture2
Paired t-test
data: capture1 and capture2
t = -17.39, df = 49, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
-1.037176 -0.822301
sample estimates:
mean difference
-0.9297384
# Reshape the data into long formatsharksub_long <- sharksub %>%pivot_longer(cols =c(blotch1, blotch2), names_to ="Capture", values_to ="BlotchTime")# Create the histogramcombined_hist <-ggplot(data = sharksub_long, aes(x = BlotchTime, fill = Capture)) +geom_histogram(position ="identity", alpha =0.85, bins =30, color ="black") +theme_dark() +theme(plot.background =element_rect(fill ="black"), panel.background =element_rect(fill ="black"), panel.grid.major =element_line(color ="gray"), panel.grid.minor =element_line(color ="gray30"), axis.text =element_text(color ="white"), axis.title =element_text(color ="white"), plot.title =element_text(color ="white", size =14, face ="bold") ) +scale_fill_manual(values =c("blotch1"="navy", "blotch2"="gold")) +# Choose a color palettelabs(x ="Time for blotching to cover 30% of ventral surface (s)",y ="Frequency",title ="Distribution of Blotching Time After Capture") +theme(legend.title =element_blank()) # Remove legend title# Display the plotcombined_hist
#The output of the paired t-test gives a p-value < 2.2e-16, which is considerably less than the significance level alpha = 0.05.
Q3. Is it possible to predict blotching time?
multiple regression for blotching time prediction
mult_reg_model <-lm(blotch ~ BPM + weight + length + air+ water + meta + depth, data = sharks)summary(mult_reg_model)
Call:
lm(formula = blotch ~ BPM + weight + length + air + water + meta +
depth, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.83745 -0.66117 -0.00702 0.60110 2.74108
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 11.1405851 1.8958668 5.876 7.74e-09 ***
BPM -0.0019723 0.0031890 -0.618 0.537
weight 0.0016283 0.0033511 0.486 0.627
length 0.0012295 0.0009710 1.266 0.206
air -0.0281474 0.0318707 -0.883 0.378
water -0.0188934 0.0270782 -0.698 0.486
meta -0.0009712 0.0025951 -0.374 0.708
depth 0.5061285 0.0223191 22.677 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.002 on 492 degrees of freedom
Multiple R-squared: 0.514, Adjusted R-squared: 0.507
F-statistic: 74.32 on 7 and 492 DF, p-value: < 2.2e-16
summary(mult_reg_model)$coefficient
Estimate Std. Error t value Pr(>|t|)
(Intercept) 11.1405850738 1.8958667617 5.8762490 7.742498e-09
BPM -0.0019723462 0.0031890181 -0.6184807 5.365447e-01
weight 0.0016283130 0.0033510746 0.4859077 6.272489e-01
length 0.0012295257 0.0009710195 1.2662214 2.060330e-01
air -0.0281474117 0.0318706960 -0.8831753 3.775729e-01
water -0.0188934313 0.0270781617 -0.6977369 4.856714e-01
meta -0.0009712356 0.0025951073 -0.3742564 7.083748e-01
depth 0.5061284856 0.0223190976 22.6769242 1.816298e-78
lin_reg_model <-lm(blotch ~ depth, data = sharks)summary(lin_reg_model)
Call:
lm(formula = blotch ~ depth, data = sharks)
Residuals:
Min 1Q Median 3Q Max
-2.81869 -0.65427 -0.01035 0.58825 2.83116
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 9.82178 1.11207 8.832 <2e-16 ***
depth 0.50467 0.02216 22.772 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1 on 498 degrees of freedom
Multiple R-squared: 0.5101, Adjusted R-squared: 0.5091
F-statistic: 518.6 on 1 and 498 DF, p-value: < 2.2e-16