##title: "Equine Research Formative"
##author: "Laura Marks"
##date: "2024-10-16"
setwd("/Users/Laura/Documents/Equine Masters/Data Analysis & Research Methods/")
<- read.table("Equine Summative NEW/equine.txt", header = TRUE, sep = "\t")
equine
<- read.table("Equine Summative NEW/eqsub.txt", header = TRUE, sep = "\t") eqsub
Equine Research Formative
Data Analysis
1. Is there a difference in compliance time between mares and geldings?
Hypothesis
It is hypothesised there will be no significant difference in average compliance time based on the sex of the animal (mares and geldings)
Results
# Testing for normality shows to test the residuals for normality
# Fitting a linear model
<- lm (comp ~ sex, data = equine)
model # Extract residuals
<- residuals(model)
residuals # Visual tests
hist(residuals, main = "Histogram of Residuals")
qqnorm(residuals)
qqline(residuals, col = "red")
# Formal test
shapiro.test(residuals)
Shapiro-Wilk normality test
data: residuals
W = 0.99738, p-value = 0.6242
# P > 0.05 therefore the residuals are normally distributed
# Testing via Independent Two-Sample t-test based on normality testing
# Perform independent t-test comparing 'comp' by 'sex'
<- t.test(comp ~ sex, data = equine)
t_test_result
# Print the result
print(t_test_result)
Welch Two Sample t-test
data: comp by sex
t = -2.8315, df = 494.83, p-value = 0.004821
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
-0.6105344 -0.1103336
sample estimates:
mean in group Female mean in group Male
34.94452 35.30496
# P < 0.05 therefore, there is a significant difference in compliance time between mares and geldings. This result rejects the hypothesis.
# Load necessary Libraries
library(ggplot2)
# Create the box plot
ggplot(equine, aes(x = sex, y = comp, fill = sex)) +
# Keep the outline color black
geom_boxplot(color = "black") +
# Add means
stat_summary(fun = mean, geom = "point", size = 3, color = "red", shape = 18) +
# Labeling Axis' and Titles
labs(x = "Sex", y = "Compliance Time (s)", title = "Compliance Time by Gender") +
# Add Colour to Variables
theme_minimal() +
scale_fill_manual(values = c("Male" = "skyblue", "Female" = "lightpink")) # Custom colors for each sex
2. Are there significant correlations between compliance time and other variables (IRT, cortisol level and heart rate)
Is there a significant difference between Thermographic Eye Temperature (Celcius) and Compliance Time (seconds)?
Hypothesis
It is hypothesised there will be no difference between thermographic eye temperature and compliance time
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$irt)
Shapiro-Wilk normality test
data: equine$irt
W = 0.95889, p-value = 1.447e-10
shapiro.test(equine$comp)
Shapiro-Wilk normality test
data: equine$comp
W = 0.99692, p-value = 0.4695
# P < 0.05 therefore the data is not normally distributed
cor.test(equine$irt, equine$comp, method = "spearman")
Spearman's rank correlation rho
data: equine$irt and equine$comp
S = 21203514, p-value = 0.5028
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.03008441
# P > 0.05 therefore there is a not a significant correlation between compliance time and eye temperature. This result accepts the hypothesis.
# Add a linear regression line
library(ggplot2)
ggplot(equine, aes(x = irt, y = comp)) +
geom_point() + # Add points
geom_smooth(method = "lm", se = FALSE, color = "blue") + # Add linear regression line
labs(title = "Scatterplot of Compliance Time vs. Thermographic Eye Temperature (IRT)",
x = "IRT (Celcius)",
y = "Compliance Time (seconds)")
`geom_smooth()` using formula = 'y ~ x'
Is there a significant difference between Blood Cortisol Levels (mcg/dL) and Compliance Time (seconds)?
Hypothesis
It is hypothesised there will be a difference. When blood cortisol increases, the compliance time will increase.
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$cortisol)
Shapiro-Wilk normality test
data: equine$cortisol
W = 0.96362, p-value = 9.245e-10
shapiro.test(equine$comp)
Shapiro-Wilk normality test
data: equine$comp
W = 0.99692, p-value = 0.4695
# P < 0.05 therefore the data is not normally distributed
cor.test(equine$cortisol, equine$comp, method = "spearman")
Spearman's rank correlation rho
data: equine$cortisol and equine$comp
S = 21166080, p-value = 0.529
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.02826584
# P > 0.05 therefore there is a not a significant correlation between compliance time and blood lactate levels. This result rejects the hypothesis.
# Add a linear regression line
library(ggplot2)
ggplot(equine, aes(x = cortisol, y = comp)) +
geom_point() + # Add points
geom_smooth(method = "lm", se = FALSE, color = "blue") + # Add linear regression line
labs(title = "Scatterplot of Compliance Time vs. Cortisol",
x = "Cortisol (mcg/dL)",
y = "Compliance Time (seconds)")
`geom_smooth()` using formula = 'y ~ x'
Is there a significant difference between Heart Rate (bpm) and Compliance Time (seconds)?
Hypothesis
It is hypothesised there will be a difference. When heart rate increases, the compliance time will increase.
Results
# Testing each variable for normality
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$BPM)
Shapiro-Wilk normality test
data: equine$BPM
W = 0.99739, p-value = 0.6264
shapiro.test(equine$comp)
Shapiro-Wilk normality test
data: equine$comp
W = 0.99692, p-value = 0.4695
# P > 0.05 therefore the data is normally distributed
# Testing significance of Pearson correlation
cor.test(equine$BPM, equine$comp)
Pearson's product-moment correlation
data: equine$BPM and equine$comp
t = 22.767, df = 496, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.6689964 0.7552703
sample estimates:
cor
0.7148428
# P < 0.05 therefore there is a significant correlation between compliance time and heart rate. This result accepts the hypothesis.
# Add a linear regression line
library(ggplot2)
ggplot(equine, aes(x = BPM, y = comp)) +
geom_point() + # Add points
geom_smooth(method = "lm", se = FALSE, color = "blue") + # Add linear regression line
labs(title = "Scatterplot of Compliance Time vs. Heart Rate ",
x = "Heart Rate (BPM)",
y = "Compliance Time (seconds)")
`geom_smooth()` using formula = 'y ~ x'
3. Is there a correlation between IRT and Cortisol?
Hypothesis
It is hypothesised there will be a correlation between IRT and cortisol. When IRT increases, cortisol will also increase.
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$irt)
Shapiro-Wilk normality test
data: equine$irt
W = 0.95889, p-value = 1.447e-10
shapiro.test(equine$cortisol)
Shapiro-Wilk normality test
data: equine$cortisol
W = 0.96362, p-value = 9.245e-10
# P < 0.05 therefore the data is not normally distributed
cor.test(equine$irt, equine$cortisol, method = "spearman")
Spearman's rank correlation rho
data: equine$irt and equine$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.1314346
# P < 0.05 therefore there is a significant correlation between eye temperature and blood lactate levels. This result accepts the hypothesis.
# Call necessary library
library(ggplot2)
# Call Variables
ggplot(equine, aes(x = irt, y = cortisol)) +
# Add points
geom_point() +
# Label Axis'
labs(title = "Correlation between IRT and Cortisol Levels",
x = "Thermographic Eye Temperature (Celsius)",
y = "Blood Cortisol Levels (mcg/dl)") +
# Add Linear Line Regression
theme_minimal() +
geom_smooth(method = "lm", se = FALSE, color = "blue")
`geom_smooth()` using formula = 'y ~ x'
4. Does the application of the calming spray have an effect on compliance time?
Hypothesis
It is hypothesised there will be a difference. When the calming spray is applied, compliance time will decrease.
Results
#Testing for normality shows to test the residuals for normality
# Fitting a linear model
<- lm (comp ~ comp2, data = eqsub)
model
# Extract residuals
<- residuals(model)
residuals
# Visual tests
hist(residuals, main = "Histogram of Residuals")
qqnorm(residuals)
qqline(residuals, col = "red")
# Formal test
shapiro.test(residuals)
Shapiro-Wilk normality test
data: residuals
W = 0.48102, p-value < 2.2e-16
# P < 0.05 therefore the residuals are not normally distributed
# Perform the Wilcoxon signed-rank test
<- wilcox.test(eqsub$comp, eqsub$comp2, paired = TRUE)
wilcox_test # Print the results
print(wilcox_test)
Wilcoxon signed rank test with continuity correction
data: eqsub$comp and eqsub$comp2
V = 124251, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
# P < 0.05 therefore there is a significant difference between compliance time before the calming spray and after the application of the calming spray. This result accepts the hypothesis.
# Sample Data
<- data.frame(
df comp = c(34.9, 35.1, 36.0, 34.5, 35.2),
comp2 = c(33.8, 36.1, 34.7, 35.5, 36.3)
)# Load necessary libraries
library(ggplot2)
library(tidyr)
Warning: package 'tidyr' was built under R version 4.4.1
# Reshape data to long format
<- df %>%
df_long pivot_longer(cols = c(comp, comp2), names_to = "Variable", values_to = "Value")
# Create the box plot
ggplot(df_long, aes(x = Variable, y = Value, fill = Variable)) +
geom_boxplot() +
# Labeling Axis' and Titles
labs(title = "The Difference in Compliance Time When Applying a Calming Spray",
x = "Treatment",
y = "Compliance Time") +
# Add Colour to Variables
theme_minimal() +
scale_fill_manual(values = c("comp" = "skyblue", "comp2" = "lightpink"),
name = "Treatment")
5. Is it possible to predict compliance time?
Hypothesis
The compliance time can be predicted, as time will decrease with the application of the calming spray.
# Running linear regression
<- lm(comp2 ~ comp, data = eqsub)
model
# Summary of the model
summary(model)
Call:
lm(formula = comp2 ~ comp, data = eqsub)
Residuals:
Min 1Q Median 3Q Max
-1.01780 -0.02830 0.03085 0.06742 2.60626
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.571268 0.184969 24.71 <2e-16 ***
comp 0.722302 0.005262 137.28 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.1678 on 496 degrees of freedom
Multiple R-squared: 0.9744, Adjusted R-squared: 0.9743
F-statistic: 1.884e+04 on 1 and 496 DF, p-value: < 2.2e-16
# Load necessary libraries
library(ggplot2)
# Create the linear model
<- lm(comp2 ~ comp, data = eqsub)
model # Create a scatter plot with the regression line
ggplot(eqsub, aes(x = comp, y = comp2)) +
# Scatter plot of points
geom_point(color = 'black', alpha = 0.5) +
# Add Regression line
geom_smooth(method = 'lm', color = 'blue', se = FALSE) +
# Labeling Axis'
labs(title = 'Linear Regression of Compliance Time',
x = 'Comp',
y = 'Comp2') +
theme_minimal()
`geom_smooth()` using formula = 'y ~ x'