setwd("/Users/Laura/Documents/Equine Masters/Data Analysis & Research Methods/")

equine <- read.table("Equine_Summative_NEW/equine.txt", header = TRUE, sep = "\t")

eqsub <- read.table("Equine_Summative_NEW/eqsub.txt", header = TRUE, sep = "\t")

ARES40011 - Research Methods and Data Analysis

N1022545

Data Analysis Results

1. What is the effect of gender on compliance time?

Hypothesis

Male and female horses will exhibit equal differences in compliance time, with neither gender showing quicker responsiveness during training tasks.

Results

Residuals are used in the Shapiro-Wilk test to assess normality after fitting a regression model. They represent the unexplained variation in the data. If the residuals are normally distributed (p-value > 0.05), it suggests the model is appropriate. If not (p-value < 0.05), it indicates potential issues with the model, affecting the validity of conclusions about the effect of gender on compliance time.

# Testing for normality shows to test the residuals for normality  
 # Fitting a linear model 
model <- lm (comp ~ sex, data = equine) 
 # Extract residuals
residuals <- residuals(model) 
 # Visual tests 
hist(residuals, main = "Histogram of Residuals")

qqnorm(residuals) 

qqline(residuals, col = "red")

# Formal test 
shapiro.test(residuals)


    Shapiro-Wilk normality test

data:  residuals
W = 0.99738, p-value = 0.6242

# P > 0.05 therefore the residuals are normally distributed

# Testing via Independent Two-Sample t-test based on normality testing 
# Perform independent t-test comparing 'comp' by 'sex' 
t_test_result <- t.test(comp ~ sex, data = equine) 

# Print the result 
print(t_test_result)


    Welch Two Sample t-test

data:  comp by sex
t = -2.8315, df = 494.83, p-value = 0.004821
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -0.6105344 -0.1103336
sample estimates:
mean in group Female   mean in group Male 
            34.94452             35.30496

P < 0.05 therefore, there is a significant difference in compliance time between mares and geldings. This result rejects the hypothesis as mares had faster compliance. We can visualize the correlation between the two genders using a boxplot.

# Load necessary Libraries 
library(ggplot2)
# Create the box plot
ggplot(equine, aes(x = sex, y = comp, fill = sex)) + 
# Keep the outline color black
  geom_boxplot(color = "black") + 
 # Add means
  stat_summary(fun = mean, geom = "point", size = 3, color = "red", shape = 18) + 
# Labeling Axis' and Titles
  labs(x = "Sex", y = "Compliance Time (s)", title = "Compliance Time by Gender") + 
# Add Colour to Variables  
   theme_minimal() +
  scale_fill_manual(values = c("Male" = "skyblue", "Female" = "lightpink"))  # Custom colors for each sex

2. How does the application of calming spray influence compliance time?

Hypothesis

The use of a calming spray is believed to reduce horse compliance time since the spray is thought to lessen stress and induce calmness. It is anticipated that the horses will react faster following the administration of the calming spray.

Results

To test this hypothesis, we first need to verify if the compliance times (before and after the application of the calming spray) are normally distributed, as this will determine the type of statistical test to use. A Shapiro-Wilk test should be used to assess the normality of the data for compliance time before and after the calming spray.

# Perform the Shapiro-Wilk normality test 

shapiro.test(equine$comp)


    Shapiro-Wilk normality test

data:  equine$comp
W = 0.99692, p-value = 0.4695

shapiro.test(eqsub$comp2)


    Shapiro-Wilk normality test

data:  eqsub$comp2
W = 0.9943, p-value = 0.05974

# P > 0.05 for compliance time before and after spray, indicating that the data are normally distributed.

Since the data follows a normal distribution, we can proceed with a paired t-test to compare the means of compliance times before and after the calming spray.

The paired t-test is used here because we are comparing the same set of horses (pre and post spray compliance times).

# Perform a paired t-test to compare compliance times before and after the calming spray
t_test_result <- t.test(equine$comp, eqsub$comp2, paired = TRUE)

# Display the results
t_test_result


    Paired t-test

data:  equine$comp and eqsub$comp2
t = 268.24, df = 497, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 5.144711 5.220632
sample estimates:
mean difference 
       5.182672

The application of the calming spray led to a significant reduction in compliance time. This supports the hypothesis that the calming spray helps reduce stress, promoting quicker and more efficient responses from the horses. The p-value < 0.05 allows us to reject the null hypothesis (which would state that there is no difference between the compliance times before and after the spray) and conclude that the calming spray had a significant positive effect on reducing compliance time.

We can visualize the difference in compliance times using a boxplot, which illustrates the distribution of compliance times before and after the spray. The plot shows a noticeable decrease in compliance time after the calming spray.

library(ggplot2)

data <- data.frame(
  Compliance_Time = c(equine$comp, eqsub$comp2),  
  Group = rep(c("Before Spray", "After Spray"), each = length(equine$comp))  
)

data$Group <- factor(data$Group, levels = c("Before Spray", "After Spray"))

ggplot(data, aes(x = Group, y = Compliance_Time, fill = Group)) +
  geom_boxplot() +
  labs(title = "Compliance Times Before and After the Calming Spray",
       x = "Group", 
       y = "Compliance Time (seconds)") +
  theme_minimal()

3. Is there a correlation between the variables IRT and cortisol?

Hypothesis

This study postulates a correlation between changes in thermographic eye temperature (IRT) and variations in physiological stress, as measured by cortisol levels. Variations in IRT may represent physiological and behavioural reactions to stress, as cortisol is a major stress hormone. In order to comprehend stress-induced behavioural changes better, the study aims to investigate the connection between cortisol levels and IRT.

Results

To test this hypothesis, we first need to verify if the variables IRT and cortisol are normally distributed, as this will determine the type of statistical test to use. A Shapiro-Wilk test should be used to assess the normality of the data.

# The Shapiro-Wilk test is used to assess the normality of IRT and cortisol data. It determines if each variable follows a normal distribution.
# If the p-value falls below 0.05, we reject the null hypothesis that the data is normally distributed.

# Perform the Shapiro-Wilk normality test 

shapiro.test(equine$irt)


    Shapiro-Wilk normality test

data:  equine$irt
W = 0.95889, p-value = 1.447e-10

shapiro.test(equine$cortisol)


    Shapiro-Wilk normality test

data:  equine$cortisol
W = 0.96362, p-value = 9.245e-10

# Based on the findings:
# - If p < 0.05 for either test, it indicates that the data is not regularly distributed.
# - If both variables are not normally distributed, we must perform a non-parametric test.
# In this scenario, we would use Spearman's rank correlation, which does not presume that the data be normal.

To analyse the link between IRT and cortisol, a Spearman’s rank correlation will be utilised, as the data was not normally distributed according to the Shapiro-Wilk test. Spearman’s correlation is a non-parametric method for assessing the strength and direction of a monotonic relationship between two variables.

cor.test(equine$irt, equine$cortisol, method = "spearman")


    Spearman's rank correlation rho

data:  equine$irt and equine$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.1314346

#In correlation analysis, the correlation coefficient (rho) is the crucial value that indicates the strength and direction of the association between two continuous variables. It measures the linear relationship between two variables.

‘r’ shows a very weak positive monotonic relationship between thermographic eye temperature and cortisol. This result rejects the hypothesis and indicates that increased eye temperature does not delay a horse’s ability to comply with tasks. We can visualize the correlation between the variables IRT and cortisol using a scatterplot.

library(ggplot2) 

ggplot(equine, aes(x = irt, y = cortisol)) + 

  geom_point() +  # Add points 
  
  stat_ellipse(level = 0.95, color = "blue") +  # Add ellipse (95% confidence region)

  labs(title = "Thermographic Eye Temperature vs Cortisol", 

       x = "Thermographic Eye Temperature (°C)", 

       y = "Cortisol (mcg/dL)")

4. What physiological variables contribute to predicting compliance time and how accurately can compliance time be forecasted using different predictive models??

Hypothesis

It is essential to comprehend how physiological variables impact behaviour in order to enhance training techniques and equine management. A horse’s stress levels, physical state, and general well-being can be inferred from variables including heart rate, cortisol levels, and infrared temperature (IRT). These elements could have a big impact on how quickly a horse obeys commands or training. This raises the following query: What physiological variables (heart rate (bpm), cortisol and IRT) affect compliance time in horses? It is hypothesised that compliance time can be predicted by physiological characteristics.

Results

In order to choose the appropriate statistical test for testing this hypothesis, we must first confirm the distribution of all the variables and compliance time to ascertain normality. The data’s normality should be evaluated using the Shapiro-Wilk test.

# Perform the Shapiro-Wilk normality test 

shapiro.test(equine$irt)


    Shapiro-Wilk normality test

data:  equine$irt
W = 0.95889, p-value = 1.447e-10

shapiro.test(equine$BPM)


    Shapiro-Wilk normality test

data:  equine$BPM
W = 0.99739, p-value = 0.6264

shapiro.test(equine$cortisol)


    Shapiro-Wilk normality test

data:  equine$cortisol
W = 0.96362, p-value = 9.245e-10

shapiro.test(equine$comp)


    Shapiro-Wilk normality test

data:  equine$comp
W = 0.99692, p-value = 0.4695

Since IRT and cortisol are not normally distributed, a Spearman’s rank correlation should be used. For heart rate and compliance time, which are normally distributed a Pearson’s rank correlation should be used.

cor.test(equine$irt, equine$comp, method = "spearman")


    Spearman's rank correlation rho

data:  equine$irt and equine$comp
S = 21203514, p-value = 0.5028
alternative hypothesis: true rho is not equal to 0
sample estimates:
        rho 
-0.03008441

cor.test(equine$cortisol, equine$comp, method = "spearman")


    Spearman's rank correlation rho

data:  equine$cortisol and equine$comp
S = 21166080, p-value = 0.529
alternative hypothesis: true rho is not equal to 0
sample estimates:
        rho 
-0.02826584

cor(equine$BPM, equine$comp, method = "pearson")

[1] 0.7148428

There is no significant difference (P < 0.05) between irt and compliance time, or cortisol and compliance time. However, there is a notable distinction between heart rate and compliance time (P = 0.7148428). This finding lends credence to the concept, confirming that heart rate can predict compliance time.

To predict compliance time, a multiple linear regression model was created using IRT, heart rate and cortisol as predictors.

# Fit a linear regression model to predict compliance time
linear_model <- lm(comp ~ irt + BPM + cortisol, data = equine)

# Check the summary of the model
summary(linear_model)


Call:
lm(formula = comp ~ irt + BPM + cortisol, data = equine)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.84567 -0.66433  0.00228  0.58716  2.79692 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.964e+01  3.530e+00 -11.228   <2e-16 ***
irt         -2.988e-02  3.176e-02  -0.941    0.347    
BPM          5.055e-01  2.223e-02  22.738   <2e-16 ***
cortisol    -8.752e-04  2.596e-03  -0.337    0.736    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.002 on 494 degrees of freedom
Multiple R-squared:  0.5121,    Adjusted R-squared:  0.5091 
F-statistic: 172.8 on 3 and 494 DF,  p-value: < 2.2e-16

Heart rate (BPM) is the strongest predictor of compliance time and is key in the linear regression model. IRT and cortisol had no discernible predictive value, whereas heart rate is a good indicator of compliance time in horses. Just over half of the variance in compliance time can be explained by the model, indicating that compliance time may also be impacted by additional factors not covered by the model.

Scatter plots with regression lines show the relationships between compliance time and each physiological variable (IRT, heart rate, cortisol). The strongest relationship is observed between heart rate and compliance time, as indicated by the slope of the regression line in the plot.

# Plot the relationship between irt and compliance time
ggplot(equine, aes(x = irt, y = comp)) +
  geom_point() +
  geom_smooth(method = "lm", col = "red") +
  labs(title = "Compliance Time vs Thermographic Eye Temperature (IRT)",
       x = "Thermographic Eye Temperature (°C)", 
       y = "Compliance Time (seconds)") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

# Plot the relationship between cortisol and compliance time
ggplot(equine, aes(x = cortisol, y = comp)) +
  geom_point() +
  geom_smooth(method = "lm", col = "red") +
  labs(title = "Compliance Time vs Cortisol", 
       x = "Cortisol (mcg/dL)", 
       y = "Compliance Time (seconds)") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

# Plot the relationship between BPM and compliance time
ggplot(equine, aes(x = BPM, y = comp)) +
  geom_point() +
  geom_smooth(method = "lm", col = "red") +
  labs(title = "Compliance Time vs Heart Rate", 
       x = "Heart Rate (BPM)", 
       y = "Compliance Time (seconds)") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'