Research Formative

Author

Caitlin Torr, Laura Marks, Tom Horn, Hannah Hockram, Ciarra Rae Mooney and Paris Fenna Owen Soest

Data

eqsub <-read.table('~/Equine Summative NEW/Copy of eqsub.txt', header = TRUE, sep = "\t")
equine <-read.table('~/Equine Summative NEW/Equine.txt', header = TRUE, sep = "\t")

1. Is there a difference in compliance time between mares and geldings?

Hypothesis
# There will be no difference in average compliance time based on the sex of the animal (geldings and mares)
Results
# Testing for normality shows to test the residuals for normality 

# Fitting a linear model
model <- lm (comp ~ sex, data = equine)

# Extract residuals
residuals <- residuals(model)

# Visual tests
hist(residuals, main = "Histogram of Residuals")

qqnorm(residuals)
qqline(residuals, col = "red")

# Formal test
shapiro.test(residuals)

    Shapiro-Wilk normality test

data:  residuals
W = 0.99738, p-value = 0.6242
# P > 0.05 therefore the residules are normally distributed
# Testing via Independent Two-Sample t-test based on normality testing

# Perform independent t-test comparing 'comp' by 'sex'
t_test_result <- t.test(comp ~ sex, data = equine)
 
# Print the result
print(t_test_result)

    Welch Two Sample t-test

data:  comp by sex
t = -2.8315, df = 494.83, p-value = 0.004821
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -0.6105344 -0.1103336
sample estimates:
mean in group Female   mean in group Male 
            34.94452             35.30496 
# P < 0.05 therefore there is a significant difference in compliance time between mares and geldings. This result rejects the hypothesis 
Graph (Box Plot)
# Load necessary Libraries 
library(ggplot2)
# Create the box plot
ggplot(equine, aes(x = sex, y = comp, fill = sex)) + 
# Keep the outline color black
  geom_boxplot(color = "black") + 
 # Add means
  stat_summary(fun = mean, geom = "point", size = 3, color = "red", shape = 18) + 
# Labeling Axis' and Titles
  labs(x = "Sex", y = "Compliance Time", title = "Compliance Time by Gender") + 
# Add Colour to Variables  
   theme_minimal() +
  scale_fill_manual(values = c("Male" = "skyblue", "Female" = "lightpink"))  # Custom colors for each sex

2. Regression analysis – strongest correlation between compliance time and other variables

Is there a correlation between Heart Rate (bpm) and Compliance Time (seconds)?

Hypothesis
#  It is hypothesised there will be a correlation. When heart rate increases, the compliance time will increase.
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$BPM)

    Shapiro-Wilk normality test

data:  equine$BPM
W = 0.99739, p-value = 0.6264
shapiro.test(equine$comp)

    Shapiro-Wilk normality test

data:  equine$comp
W = 0.99692, p-value = 0.4695
# P > 0.05 therefore the data is normally distributed
# Testing significance of Pearson correlation
cor.test(equine$BPM, equine$comp)

    Pearson's product-moment correlation

data:  equine$BPM and equine$comp
t = 22.767, df = 496, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.6689964 0.7552703
sample estimates:
      cor 
0.7148428 
# P < 0.05 therefore there is a significant correlation between compliance time and heart rate. This result accepts the hypothesis. 
Graph (Scatterplot)
# Call necessary library 
library(ggplot2)
# Call Variables
ggplot(equine, aes(x = BPM, y = comp)) +
# Add points 
   geom_point() + 
# Add Linear Line Regression 
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  
# Label Axis'
  labs(title = "Scatterplot of Compliance Time vs. BPM",
       x = "BPM (min)",
       y = "Compliance Time (seconds)")
`geom_smooth()` using formula = 'y ~ x'

Is there a correlation between Blood Cortisol Levels (mcg/dl) and Compliance Time (seconds)

Hypothesis
# It is hypothesised there will be a correlation. When blood cortisol increases, the compliance time will increase. 
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$cortisol)

    Shapiro-Wilk normality test

data:  equine$cortisol
W = 0.96362, p-value = 9.245e-10
shapiro.test(equine$comp)

    Shapiro-Wilk normality test

data:  equine$comp
W = 0.99692, p-value = 0.4695
# P < 0.05 therefore the data is not normally distributed
cor.test(equine$cortisol, equine$comp, method = "spearman")

    Spearman's rank correlation rho

data:  equine$cortisol and equine$comp
S = 21166080, p-value = 0.529
alternative hypothesis: true rho is not equal to 0
sample estimates:
        rho 
-0.02826584 
# P > 0.05 therefore there is a not a significant correlation between compliance time and blood lactate levels. This result rejects the hypothesis. 
Graph (Scatterplot)
# Call necessary library 
library(ggplot2)
# Call Variable 
ggplot(equine, aes(x = cortisol, y = comp)) +
# Add points 
   geom_point() +  
# Add linear regression line
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  
# Label Axis'
  labs(title = "Scatterplot of Compliance Time vs. Cortisol",
       x = "Cortisol (mcg/dL)",
       y = "Compliance Time (seconds)")
`geom_smooth()` using formula = 'y ~ x'

Is there a correlation between Thermographic Eye Temperature (Celsius) and Compliance Time (seconds)

Hypothesis
# It is hypothesised there will no correlation. 
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$irt)

    Shapiro-Wilk normality test

data:  equine$irt
W = 0.95889, p-value = 1.447e-10
shapiro.test(equine$comp)

    Shapiro-Wilk normality test

data:  equine$comp
W = 0.99692, p-value = 0.4695
# P < 0.05 therefore the data is not normally distributed

cor.test(equine$irt, equine$comp, method = "spearman")

    Spearman's rank correlation rho

data:  equine$irt and equine$comp
S = 21203514, p-value = 0.5028
alternative hypothesis: true rho is not equal to 0
sample estimates:
        rho 
-0.03008441 
# P > 0.05 therefore there is a not a significant correlation between compliance time and eye temperature. This result accepts the hypothesis.
Graph (Scatterplot)
# Call necessary library
library(ggplot2)
# Call Variables 
ggplot(equine, aes(x = irt, y = comp)) +
# Add points  
  geom_point() +
 # Add linear regression line
  geom_smooth(method = "lm", se = FALSE, color = "blue") +  
# Label Axis'
  labs(title = "Scatterplot of Compliance Time vs. Thermographic Eye Temperature (IRT)",
       x = "IRT (Celcius)",
       y = "Compliance Time (seconds)")
`geom_smooth()` using formula = 'y ~ x'

3. Is there a correlation between irt and cortisol?

Hypothesis
# It is hypothesised there will be a correlation. When irt increases, cortisol will also increase.
Results
# Perform the Shapiro-Wilk normality test
shapiro.test(equine$irt)

    Shapiro-Wilk normality test

data:  equine$irt
W = 0.95889, p-value = 1.447e-10
shapiro.test(equine$cortisol)

    Shapiro-Wilk normality test

data:  equine$cortisol
W = 0.96362, p-value = 9.245e-10
# P < 0.05 therefore the data is not normally distributed 

cor.test(equine$irt, equine$cortisol, method = "spearman")

    Spearman's rank correlation rho

data:  equine$irt and equine$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.1314346 
# P < 0.05 therefore there is a a significant correlation between eye temperature and blood lactate levels. This result accepts the hypothesis.
Graph (Scatterplot)
# Call necessary library 
library(ggplot2)
# Call Variables 
ggplot(equine, aes(x = irt, y = cortisol)) +
# Add points 
   geom_point() +
# Label Axis'
  labs(title = "Correlation between IRT and Cortisol Levels",
       x = "Thermographic Eye Temperature (Celsius)",
       y = "Blood Cortisol Levels (mcg/dl)") +
# Add Linear Line Regression   
  theme_minimal() +
  geom_smooth(method = "lm", se = FALSE, color = "blue")
`geom_smooth()` using formula = 'y ~ x'

4. Does the application of the calming spray have an effect on compliance time, if so what is it?

Hypothesis
# It is hypothesised there will difference. When the claming spray is applied, compliance time will decrease.
Results
#Testing for normality shows to test the residuals for normality 

# Fitting a linear model
model <- lm (comp ~ comp2, data = eqsub)

# Extract residuals
residuals <- residuals(model)

# Visual tests
hist(residuals, main = "Histogram of Residuals")

qqnorm(residuals)
qqline(residuals, col = "red")

# Formal test
shapiro.test(residuals)

    Shapiro-Wilk normality test

data:  residuals
W = 0.48102, p-value < 2.2e-16
# P < 0.05 therefore the residules are not normally distributed

# Perform the Wilcoxon signed-rank test
wilcox_test <- wilcox.test(eqsub$comp, eqsub$comp2, paired = TRUE)

# Print the results
print(wilcox_test)

    Wilcoxon signed rank test with continuity correction

data:  eqsub$comp and eqsub$comp2
V = 124251, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
# P < 0.05 therefore there is a significant difference between compliance time before the calming spray and after the application of the calming spray. This result accepts the hypothesis.
Graph (Box Plot)
# Sample Data
df <- data.frame(
  comp = c(34.9, 35.1, 36.0, 34.5, 35.2),
  comp2 = c(33.8, 36.1, 34.7, 35.5, 36.3)
)
# Load necessary libraries
library(ggplot2)
library(tidyr)

# Reshape data to long format
df_long <- df %>%
  pivot_longer(cols = c(comp, comp2), names_to = "Variable", values_to = "Value")

# Create the box plot
ggplot(df_long, aes(x = Variable, y = Value, fill = Variable)) +
  geom_boxplot() +
  
# Labeling Axis' and Titles
  labs(title = "The Difference in Compliance Time When Applying a Calming Spray",
       x = "Treatment",
       y = "Compliance Time") +
  
# Add Colour to Variables 
  theme_minimal() +
  scale_fill_manual(values = c("comp" = "skyblue", "comp2" = "lightpink"),
                    name = "Treatment")

5. Is it possible to predict compliance time?

Hypothesis
# The compliance time can be predicted, as time will decrease with the application of the calming spray.
Results
# Running linear regression
model <- lm(comp2 ~ comp, data = eqsub)

# Summary of the model
summary(model)

Call:
lm(formula = comp2 ~ comp, data = eqsub)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.01780 -0.02830  0.03085  0.06742  2.60626 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 4.571268   0.184969   24.71   <2e-16 ***
comp        0.722302   0.005262  137.28   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1678 on 496 degrees of freedom
Multiple R-squared:  0.9744,    Adjusted R-squared:  0.9743 
F-statistic: 1.884e+04 on 1 and 496 DF,  p-value: < 2.2e-16
Graph (Scatterplot)
# Load necessary libraries
library(ggplot2)
# Create the linear model
model <- lm(comp2 ~ comp, data = eqsub)
# Create a scatter plot with the regression line
ggplot(eqsub, aes(x = comp, y = comp2)) +
# Scatter plot of points
  geom_point(color = 'black', alpha = 0.5) + 
# Add Regression line
  geom_smooth(method = 'lm', color = 'blue', se = FALSE) +  
# Labeling Axis'
  labs(title = 'Linear Regression of Compliance Time',
       x = 'Comp',
       y = 'Comp2') +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'