<-read.table('~/Equine Summative NEW/eqsub.txt', header = TRUE, sep = "\t")
eqsub <-read.table('~/Equine Summative NEW/Equine.txt', header = TRUE, sep = "\t") equine
Equine Research Formative
Data Analysis
1. Is there a difference in compliance time between mares and geldings?
Hypothesis
It is hypothesised there will be no significant difference in average compliance time based on the sex of the animal (mares and geldings)
Results
# Testing for normality shows to test the residuals for normality
# Fitting a linear model
<- lm (comp ~ sex, data = equine)
model
# Extract residuals
<- residuals(model)
residuals
# Visual tests
hist(residuals, main = "Histogram of Residuals")
qqnorm(residuals)
qqline(residuals, col = "red")
# Formal test
shapiro.test(residuals)
Shapiro-Wilk normality test
data: residuals
W = 0.99738, p-value = 0.6242
# P > 0.05 therefore the residules are normally distributed
# Testing via Independent Two-Sample t-test based on normality testing
# Perform independent t-test comparing 'comp' by 'sex'
<- t.test(comp ~ sex, data = equine)
t_test_result
# Print the result
print(t_test_result)
Welch Two Sample t-test
data: comp by sex
t = -2.8315, df = 494.83, p-value = 0.004821
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
-0.6105344 -0.1103336
sample estimates:
mean in group Female mean in group Male
34.94452 35.30496
# P < 0.05 therefore, there is a significant difference in compliance time between mares and geldings. This result rejects the hypothesis.
library(ggplot2)
ggplot(equine, aes(x = sex, y = comp, fill = sex)) + geom_boxplot(color = “black”) + # Keep the outline color black stat_summary(fun = mean, geom = “point”, size = 3, color = “red”, shape = 18) + # Add means labs(x = “Sex”, y = “Compliance Time”, title = “Compliance Time by Gender”) + theme_minimal() + scale_fill_manual(values = c(“Male” = “skyblue”, “Female” = “lightpink”)) # Custom colors for each sex
library(ggplot2)
ggplot(equine, aes(x = sex, y = comp, fill = sex)) +
geom_boxplot(color = "black") + # Keep the outline color black
stat_summary(fun = mean, geom = "point", size = 3, color = "red", shape = 18) + # Add means
labs(x = "Sex", y = "Compliance Time", title = "Compliance Time by Gender") +
theme_minimal() +
scale_fill_manual(values = c("Male" = "skyblue", "Female" = "lightpink")) # Custom colors for each sex
2. Are there significant correlations between compliance time and other variables (IRT, cortisol level and heart rate)
Is there a significant difference between Thermographic Eye Temperature (Celcius) and Compliance Time (seconds)
Hypothesis
It is hypothesised there will be a difference between thermographic eye temperature and compliance time
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$irt)
Shapiro-Wilk normality test
data: equine$irt
W = 0.95889, p-value = 1.447e-10
shapiro.test(equine$comp)
Shapiro-Wilk normality test
data: equine$comp
W = 0.99692, p-value = 0.4695
# P < 0.05 therefore the data is not normally distributed
cor.test(equine$irt, equine$comp, method = "spearman")
Spearman's rank correlation rho
data: equine$irt and equine$comp
S = 21203514, p-value = 0.5028
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.03008441
# P > 0.05 therefore there is a not a significant correlation between compliance time and eye temperature. This result accepts the hypothesis.
# Add a linear regression line
library(ggplot2)
ggplot(equine, aes(x = irt, y = comp)) +
geom_point() + # Add points
geom_smooth(method = "lm", se = FALSE, color = "blue") + # Add linear regression line
labs(title = "Scatterplot of Compliance Time vs. Thermographic Eye Temperature (IRT)",
x = "IRT (Celcius)",
y = "Compliance Time (seconds)")
`geom_smooth()` using formula = 'y ~ x'
Is there a significant difference between Blood Cortisol Levels (mcg/dL) and Compliance Time (seconds)
Hypothesis
It is hypothesised there will be a difference. When blood cortisol increases, the compliance time will increase.
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$cortisol)
Shapiro-Wilk normality test
data: equine$cortisol
W = 0.96362, p-value = 9.245e-10
shapiro.test(equine$comp)
Shapiro-Wilk normality test
data: equine$comp
W = 0.99692, p-value = 0.4695
# P < 0.05 therefore the data is not normally distributed
cor.test(equine$cortisol, equine$comp, method = "spearman")
Spearman's rank correlation rho
data: equine$cortisol and equine$comp
S = 21166080, p-value = 0.529
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.02826584
# P > 0.05 therefore there is a not a significant correlation between compliance time and blood lactate levels. This result rejects the hypothesis.
# Add a linear regression line
library(ggplot2)
ggplot(equine, aes(x = cortisol, y = comp)) +
geom_point() + # Add points
geom_smooth(method = "lm", se = FALSE, color = "blue") + # Add linear regression line
labs(title = "Scatterplot of Compliance Time vs. Cortisol",
x = "Cortisol (mcg/dL)",
y = "Compliance Time (seconds)")
`geom_smooth()` using formula = 'y ~ x'
Is there a significant difference between Heart Rate (bpm) and Compliance Time (seconds)?
Hypothesis
It is hypothesised there will be a difference. When heart rate increases, the compliance time will increase.
Results
# Testing each variable for normality
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$weight)
Shapiro-Wilk normality test
data: equine$weight
W = 0.94685, p-value = 2.229e-12
shapiro.test(equine$comp)
Shapiro-Wilk normality test
data: equine$comp
W = 0.99692, p-value = 0.4695
# P > 0.05 therefore the residules are normally distributed
# Testing significance of Pearson correlation
cor.test(equine$BPM, equine$comp)
Pearson's product-moment correlation
data: equine$BPM and equine$comp
t = 22.767, df = 496, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.6689964 0.7552703
sample estimates:
cor
0.7148428
# P < 0.05 therefore there is a significant correlation between compliance time and heart rate. This result accepts the hypothesis.
# Add a linear regression line
library(ggplot2)
ggplot(equine, aes(x = BPM, y = comp)) +
geom_point() + # Add points
geom_smooth(method = "lm", se = FALSE, color = "blue") + # Add linear regression line
labs(title = "Scatterplot of Compliance Time vs. Heart Rate ",
x = "Heart Rate (BPM)",
y = "Compliance Time (seconds)")
`geom_smooth()` using formula = 'y ~ x'
3. Is there a correlation between IRT and Cortisol?
Hypothesis
It is hypothesised there will be a correlation between IRT and cortisol. When IRT increases, cortisol will also increase.
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$irt)
Shapiro-Wilk normality test
data: equine$irt
W = 0.95889, p-value = 1.447e-10
shapiro.test(equine$cortisol)
Shapiro-Wilk normality test
data: equine$cortisol
W = 0.96362, p-value = 9.245e-10
# P < 0.05 therefore the data is not normally distributed
cor.test(equine$irt, equine$cortisol, method = "spearman")
Spearman's rank correlation rho
data: equine$irt and equine$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.1314346
# P < 0.05 therefore there is a a significant correlation between eye temperature and blood lactate levels. This result accepts the hypothesis.
# Perform a paired t-test
<- t.test(equine$cortisol, equine$irt, paired = TRUE)
result
# Print the result
print(result)
Paired t-test
data: equine$cortisol and equine$irt
t = 59.819, df = 497, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
44.93358 47.98550
sample estimates:
mean difference
46.45954
# Calculate correlation between IRT and cortisol
<- cor(equine$irt, equine$cortisol, use = "complete.obs")
correlation_result correlation_result
[1] 0.1286011
# Create a scatter plot
ggplot(equine, aes(x = irt, y = cortisol)) +
geom_point() +
labs(title = "Correlation between IRT and Cortisol Levels",
x = "Thermographic Eye Temperature (Celsius)",
y = "Blood Cortisol Levels (mcg/dl)") +
theme_minimal() +
geom_smooth(method = "lm", se = FALSE, color = "blue")
`geom_smooth()` using formula = 'y ~ x'
4. Does the application of the calming spray have an effect on compliance time?
Hypothesis
# It is hypothesised there will difference. When the claming spray is applied, compliance time will decrease.
Results
#Testing for normality shows to test the residuals for normality
# Fitting a linear model
<- lm (comp ~ comp2, data = eqsub)
model
# Extract residuals
<- residuals(model)
residuals
# Visual tests
hist(residuals, main = "Histogram of Residuals")
qqnorm(residuals)
qqline(residuals, col = "red")
# Formal test
shapiro.test(residuals)
Shapiro-Wilk normality test
data: residuals
W = 0.48102, p-value < 2.2e-16
# P < 0.05 therefore the residuals are not normally distributed
# Perform the Wilcoxon signed-rank test
<- wilcox.test(eqsub$comp, eqsub$comp2, paired = TRUE)
wilcox_test
# Print the results
print(wilcox_test)
Wilcoxon signed rank test with continuity correction
data: eqsub$comp and eqsub$comp2
V = 124251, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
# P < 0.05 therefore there is a significant difference between compliance time before the calming spray and after the application of the calming spray. This result accepts the hypothesis.