<-read.table('~/Equine Summative NEW/Copy of eqsub.txt', header = TRUE, sep = "\t")
eqsub <-read.table('~/Equine Summative NEW/Equine.txt', header = TRUE, sep = "\t") equine
Research Formative
Data
1. Is there a difference in compliance time between mares and geldings?
Hypothesis
# There will be no difference in average compliance time based on the sex of the animal (geldings and mares)
Results
# Testing for normality shows to test the residuals for normality
# Fitting a linear model
<- lm (comp ~ sex, data = equine)
model
# Extract residuals
<- residuals(model)
residuals
# Visual tests
hist(residuals, main = "Histogram of Residuals")
qqnorm(residuals)
qqline(residuals, col = "red")
# Formal test
shapiro.test(residuals)
Shapiro-Wilk normality test
data: residuals
W = 0.99738, p-value = 0.6242
# P > 0.05 therefore the residules are normally distributed
# Testing via Independent Two-Sample t-test based on normality testing
# Perform independent t-test comparing 'comp' by 'sex'
<- t.test(comp ~ sex, data = equine)
t_test_result
# Print the result
print(t_test_result)
Welch Two Sample t-test
data: comp by sex
t = -2.8315, df = 494.83, p-value = 0.004821
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
-0.6105344 -0.1103336
sample estimates:
mean in group Female mean in group Male
34.94452 35.30496
# P < 0.05 therefore there is a significant difference in compliance time between mares and geldings. This result rejects the hypothesis
Graph (Box Plot)
# Load necessary Libraries
library(ggplot2)
# Create the box plot
ggplot(equine, aes(x = sex, y = comp, fill = sex)) +
# Keep the outline color black
geom_boxplot(color = "black") +
# Add means
stat_summary(fun = mean, geom = "point", size = 3, color = "red", shape = 18) +
# Labeling Axis' and Titles
labs(x = "Sex", y = "Compliance Time", title = "Compliance Time by Gender") +
# Add Colour to Variables
theme_minimal() +
scale_fill_manual(values = c("Male" = "skyblue", "Female" = "lightpink")) # Custom colors for each sex
2. Regression analysis – strongest correlation between compliance time and other variables
Is there a correlation between Heart Rate (bpm) and Compliance Time (seconds)?
Hypothesis
# It is hypothesised there will be a correlation. When heart rate increases, the compliance time will increase.
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$BPM)
Shapiro-Wilk normality test
data: equine$BPM
W = 0.99739, p-value = 0.6264
shapiro.test(equine$comp)
Shapiro-Wilk normality test
data: equine$comp
W = 0.99692, p-value = 0.4695
# P > 0.05 therefore the data is normally distributed
# Testing significance of Pearson correlation
cor.test(equine$BPM, equine$comp)
Pearson's product-moment correlation
data: equine$BPM and equine$comp
t = 22.767, df = 496, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.6689964 0.7552703
sample estimates:
cor
0.7148428
# P < 0.05 therefore there is a significant correlation between compliance time and heart rate. This result accepts the hypothesis.
Graph (Scatterplot)
# Call necessary library
library(ggplot2)
# Call Variables
ggplot(equine, aes(x = BPM, y = comp)) +
# Add points
geom_point() +
# Add Linear Line Regression
geom_smooth(method = "lm", se = FALSE, color = "blue") +
# Label Axis'
labs(title = "Scatterplot of Compliance Time vs. BPM",
x = "BPM (min)",
y = "Compliance Time (seconds)")
`geom_smooth()` using formula = 'y ~ x'
Is there a correlation between Blood Cortisol Levels (mcg/dl) and Compliance Time (seconds)
Hypothesis
# It is hypothesised there will be a correlation. When blood cortisol increases, the compliance time will increase.
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$cortisol)
Shapiro-Wilk normality test
data: equine$cortisol
W = 0.96362, p-value = 9.245e-10
shapiro.test(equine$comp)
Shapiro-Wilk normality test
data: equine$comp
W = 0.99692, p-value = 0.4695
# P < 0.05 therefore the data is not normally distributed
cor.test(equine$cortisol, equine$comp, method = "spearman")
Spearman's rank correlation rho
data: equine$cortisol and equine$comp
S = 21166080, p-value = 0.529
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.02826584
# P > 0.05 therefore there is a not a significant correlation between compliance time and blood lactate levels. This result rejects the hypothesis.
Graph (Scatterplot)
# Call necessary library
library(ggplot2)
# Call Variable
ggplot(equine, aes(x = cortisol, y = comp)) +
# Add points
geom_point() +
# Add linear regression line
geom_smooth(method = "lm", se = FALSE, color = "blue") +
# Label Axis'
labs(title = "Scatterplot of Compliance Time vs. Cortisol",
x = "Cortisol (mcg/dL)",
y = "Compliance Time (seconds)")
`geom_smooth()` using formula = 'y ~ x'
Is there a correlation between Thermographic Eye Temperature (Celsius) and Compliance Time (seconds)
Hypothesis
# It is hypothesised there will no correlation.
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$irt)
Shapiro-Wilk normality test
data: equine$irt
W = 0.95889, p-value = 1.447e-10
shapiro.test(equine$comp)
Shapiro-Wilk normality test
data: equine$comp
W = 0.99692, p-value = 0.4695
# P < 0.05 therefore the data is not normally distributed
cor.test(equine$irt, equine$comp, method = "spearman")
Spearman's rank correlation rho
data: equine$irt and equine$comp
S = 21203514, p-value = 0.5028
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.03008441
# P > 0.05 therefore there is a not a significant correlation between compliance time and eye temperature. This result accepts the hypothesis.
Graph (Scatterplot)
# Call necessary library
library(ggplot2)
# Call Variables
ggplot(equine, aes(x = irt, y = comp)) +
# Add points
geom_point() +
# Add linear regression line
geom_smooth(method = "lm", se = FALSE, color = "blue") +
# Label Axis'
labs(title = "Scatterplot of Compliance Time vs. Thermographic Eye Temperature (IRT)",
x = "IRT (Celcius)",
y = "Compliance Time (seconds)")
`geom_smooth()` using formula = 'y ~ x'
3. Is there a correlation between irt and cortisol?
Hypothesis
# It is hypothesised there will be a correlation. When irt increases, cortisol will also increase.
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$irt)
Shapiro-Wilk normality test
data: equine$irt
W = 0.95889, p-value = 1.447e-10
shapiro.test(equine$cortisol)
Shapiro-Wilk normality test
data: equine$cortisol
W = 0.96362, p-value = 9.245e-10
# P < 0.05 therefore the data is not normally distributed
cor.test(equine$irt, equine$cortisol, method = "spearman")
Spearman's rank correlation rho
data: equine$irt and equine$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.1314346
# P < 0.05 therefore there is a a significant correlation between eye temperature and blood lactate levels. This result accepts the hypothesis.
Graph (Scatterplot)
# Call necessary library
library(ggplot2)
# Call Variables
ggplot(equine, aes(x = irt, y = cortisol)) +
# Add points
geom_point() +
# Label Axis'
labs(title = "Correlation between IRT and Cortisol Levels",
x = "Thermographic Eye Temperature (Celsius)",
y = "Blood Cortisol Levels (mcg/dl)") +
# Add Linear Line Regression
theme_minimal() +
geom_smooth(method = "lm", se = FALSE, color = "blue")
`geom_smooth()` using formula = 'y ~ x'
4. Does the application of the calming spray have an effect on compliance time, if so what is it?
Hypothesis
# It is hypothesised there will difference. When the claming spray is applied, compliance time will decrease.
Results
#Testing for normality shows to test the residuals for normality
# Fitting a linear model
<- lm (comp ~ comp2, data = eqsub)
model
# Extract residuals
<- residuals(model)
residuals
# Visual tests
hist(residuals, main = "Histogram of Residuals")
qqnorm(residuals)
qqline(residuals, col = "red")
# Formal test
shapiro.test(residuals)
Shapiro-Wilk normality test
data: residuals
W = 0.48102, p-value < 2.2e-16
# P < 0.05 therefore the residules are not normally distributed
# Perform the Wilcoxon signed-rank test
<- wilcox.test(eqsub$comp, eqsub$comp2, paired = TRUE)
wilcox_test
# Print the results
print(wilcox_test)
Wilcoxon signed rank test with continuity correction
data: eqsub$comp and eqsub$comp2
V = 124251, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
# P < 0.05 therefore there is a significant difference between compliance time before the calming spray and after the application of the calming spray. This result accepts the hypothesis.
Graph (Box Plot)
# Sample Data
<- data.frame(
df comp = c(34.9, 35.1, 36.0, 34.5, 35.2),
comp2 = c(33.8, 36.1, 34.7, 35.5, 36.3)
)# Load necessary libraries
library(ggplot2)
library(tidyr)
# Reshape data to long format
<- df %>%
df_long pivot_longer(cols = c(comp, comp2), names_to = "Variable", values_to = "Value")
# Create the box plot
ggplot(df_long, aes(x = Variable, y = Value, fill = Variable)) +
geom_boxplot() +
# Labeling Axis' and Titles
labs(title = "The Difference in Compliance Time When Applying a Calming Spray",
x = "Treatment",
y = "Compliance Time") +
# Add Colour to Variables
theme_minimal() +
scale_fill_manual(values = c("comp" = "skyblue", "comp2" = "lightpink"),
name = "Treatment")
5. Is it possible to predict compliance time?
Hypothesis
# The compliance time can be predicted, as time will decrease with the application of the calming spray.
Results
# Running linear regression
<- lm(comp2 ~ comp, data = eqsub)
model
# Summary of the model
summary(model)
Call:
lm(formula = comp2 ~ comp, data = eqsub)
Residuals:
Min 1Q Median 3Q Max
-1.01780 -0.02830 0.03085 0.06742 2.60626
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.571268 0.184969 24.71 <2e-16 ***
comp 0.722302 0.005262 137.28 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.1678 on 496 degrees of freedom
Multiple R-squared: 0.9744, Adjusted R-squared: 0.9743
F-statistic: 1.884e+04 on 1 and 496 DF, p-value: < 2.2e-16
Graph (Scatterplot)
# Load necessary libraries
library(ggplot2)
# Create the linear model
<- lm(comp2 ~ comp, data = eqsub)
model # Create a scatter plot with the regression line
ggplot(eqsub, aes(x = comp, y = comp2)) +
# Scatter plot of points
geom_point(color = 'black', alpha = 0.5) +
# Add Regression line
geom_smooth(method = 'lm', color = 'blue', se = FALSE) +
# Labeling Axis'
labs(title = 'Linear Regression of Compliance Time',
x = 'Comp',
y = 'Comp2') +
theme_minimal()
`geom_smooth()` using formula = 'y ~ x'