A little background: The assignment is being submitted as a part of the ‘Research Methods and Data Analysis’ module for the MSc qualification in Endangered Species Recovery and Conservation at Nottingham Trent University. The data set provided was information based around the phenomenon of ‘blotching’ and whether this is a stress response caused by the capture and tagging of the Caribbean Reef Shark ( Carcharhinus perezi ).
Question One: Is there a correlation between the variables of air temperature and water temperature?
Answer:
CodeR_Water_Air_ <-read_excel("C:/Users/alice/Downloads/Temp R File/CodeR - Water Air .xlsx", sheet ="Spearman")ggplot(CodeR_Water_Air_, aes(x = Air)) +geom_histogram(bins =30, fill ="ivory3", color ="seashell3", alpha =0.7, boundary =0) +labs(title ="Histogram of Air Temperature",x ="Air Temperature (°C)", y ="Frequency") +theme_minimal()
CodeR_Water_Air_ <-read_excel("C:/Users/alice/Downloads/Temp R File/CodeR - Water Air .xlsx", sheet ="Spearman")ggplot(CodeR_Water_Air_, aes(x = Water)) +geom_histogram(bins =30, fill ="lightblue", color ="skyblue", alpha =0.7, boundary =0) +labs(title ="Histogram of Water Temperature",x ="Water Temperature (°C)", y ="Frequency") +theme_minimal()
CodeR_Water_Air_ <-read_excel("C:/Users/alice/Downloads/Temp R File/CodeR - Water Air .xlsx", sheet ="Spearman")cor.test(CodeR_Water_Air_$Air, CodeR_Water_Air_$Water, method ="spearman")
Spearman's rank correlation rho
data: CodeR_Water_Air_$Air and CodeR_Water_Air_$Water
S = 22007692, p-value = 0.2082
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.05637344
ggplot(CodeR_Water_Air_, aes(x = Air, y = Water)) +geom_point(color ="skyblue1", size =2) +geom_smooth(method ="lm", color ="slategray3", se =FALSE) +labs(title ="Spearman Rank Correlation: Air vs Water Temperature", x ="Air Temperature (°C)", y ="Water Temperature (°C)") +theme_minimal()
Question Two: Does being captured on a second occasion have an impact on the time it takes for blotching to occur?
Answer:
R_Code_Recapture_Time <-read_excel("C:/Users/alice/Downloads/Temp R File/R Code - Recapture Time.xlsx")colnames(R_Code_Recapture_Time)
[1] "Initial Capture" "Re-Capture"
initial_capture_data <- R_Code_Recapture_Time$`Initial Capture`recapture_data <- R_Code_Recapture_Time$`Re-Capture`shapiro_initial_capture <-shapiro.test(initial_capture_data)shapiro_recapture <-shapiro.test(recapture_data)cat("Shapiro-Wilk Test for Initial Capture Data:\n")
Shapiro-Wilk Test for Initial Capture Data:
print(shapiro_initial_capture)
Shapiro-Wilk normality test
data: initial_capture_data
W = 0.97958, p-value = 0.5345
cat("Shapiro-Wilk Test for Re-Capture Data:\n")
Shapiro-Wilk Test for Re-Capture Data:
print(shapiro_recapture)
Shapiro-Wilk normality test
data: recapture_data
W = 0.97936, p-value = 0.5255
ggplot(data.frame(initial_capture_data), aes(x = initial_capture_data)) +geom_histogram(binwidth =0.2, fill ="thistle", color ="thistle4", alpha =0.7) +labs(title ="Histogram showing how quickly blotching occured during initial capture", x ="Capture Time", y ="Frequency") +theme_minimal()
ggplot(data.frame(recapture_data), aes(x = recapture_data)) +geom_histogram(binwidth =0.2, fill ="seagreen3", color ="mediumseagreen", alpha =0.7) +labs(title ="Histogram showing how quickly blotching occured during re-capture", x ="Capture Time", y ="Frequency") +theme_minimal()
Using the above results, I can see that the data is parametric and will therefore run a Welch two sample t-test.
R_Code_Recapture_Time <-read_excel("C:/Users/alice/Downloads/Temp R File/R Code - Recapture Time.xlsx")R_Code_Water_v_Air <-read_excel("C:\\Users\\alice\\Downloads\\Temp R File\\R Code - Recapture Time.xlsx")t.test(R_Code_Recapture_Time$`Initial Capture`, R_Code_Recapture_Time$`Re-Capture`)
Welch Two Sample t-test
data: R_Code_Recapture_Time$`Initial Capture` and R_Code_Recapture_Time$`Re-Capture`
t = -4.1143, df = 97.658, p-value = 8.113e-05
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-1.3782038 -0.4812731
sample estimates:
mean of x mean of y
35.03042 35.96016
data_long <- R_Code_Recapture_Time %>%pivot_longer(cols =c(`Initial Capture`, `Re-Capture`), names_to ="capture_time", values_to ="time")ggplot(data_long, aes(x = capture_time, y = time, fill = capture_time)) +geom_boxplot(color ="peachpuff4") +labs(title ="Boxplot Comparison of Initial Capture and Re-Capture Times",x ="Capture Time",y ="Time") +scale_fill_manual(values =c("peachpuff2", "navajowhite3")) +theme_minimal() +guides(fill =guide_legend(title =NULL))
ggplot(R_Code_CSV_Shark_Data_Set, aes(x = depth, y =`blotch.time`)) +geom_point(color ="thistle3", size =3) +geom_smooth(method ="lm", color ="ivory4", se =FALSE) +labs(title ="Linear Regression of Blotch Time vs Depth",x ="Depth",y ="Blotch Time") +theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
ggplot(R_Code_CSV_Shark_Data_Set, aes(x = sex, y =`blotch.time`, fill = sex, color = sex)) +geom_boxplot() +labs(title ="Box Plot showing Blotch Time by Sex",x ="Sex",y ="Blotch Time") +scale_fill_manual(values =c("pink", "lightskyblue")) +scale_color_manual(values =c("lightpink1", "skyblue3")) +theme_minimal()