setwd("/Users/Laura/Documents/Equine Masters/Data Analysis & Research Methods/")

equine <- read.table("Equine_Summative_NEW/equine.txt", header = TRUE, sep = "\t")

eqsub <- read.table("Equine_Summative_NEW/eqsub.txt", header = TRUE, sep = "\t")

Equine Research Formative

Data Analysis

A number of variables are investigated in order to determine how they affect the behaviour and performance of horses. Gender is one such element that could influence how quickly horses comply with assignments or training. Determining whether compliance time varies between male and female horses may offer important information for enhancing training methods and maximising performance. This raises the question: How does a horse’s gender affect compliance time?

1. What is the effect of gender on compliance time?


Hypothesis

Male and female horses will exhibit equal differences in compliance time, with neither gender showing quicker responsiveness during training tasks.

Results

Residuals are used in the Shapiro-Wilk test to assess normality after fitting a regression model. They represent the unexplained variation in the data. If the residuals are normally distributed (p-value > 0.05), it suggests the model is appropriate. If not (p-value < 0.05), it indicates potential issues with the model, affecting the validity of conclusions about the effect of gender on compliance time.

# Testing for normality shows to test the residuals for normality  
 # Fitting a linear model 
model <- lm (comp ~ sex, data = equine) 
 # Extract residuals
residuals <- residuals(model) 
 # Visual tests 
hist(residuals, main = "Histogram of Residuals") 

qqnorm(residuals) 

qqline(residuals, col = "red") 

# Formal test 
shapiro.test(residuals) 

    Shapiro-Wilk normality test

data:  residuals
W = 0.99738, p-value = 0.6242
# P > 0.05 therefore the residuals are normally distributed 
# Testing via Independent Two-Sample t-test based on normality testing 
# Perform independent t-test comparing 'comp' by 'sex' 
t_test_result <- t.test(comp ~ sex, data = equine) 

# Print the result 
print(t_test_result) 

    Welch Two Sample t-test

data:  comp by sex
t = -2.8315, df = 494.83, p-value = 0.004821
alternative hypothesis: true difference in means between group Female and group Male is not equal to 0
95 percent confidence interval:
 -0.6105344 -0.1103336
sample estimates:
mean in group Female   mean in group Male 
            34.94452             35.30496 
# Load necessary Libraries 
library(ggplot2)
# Create the box plot
ggplot(equine, aes(x = sex, y = comp, fill = sex)) + 
# Keep the outline color black
  geom_boxplot(color = "black") + 
 # Add means
  stat_summary(fun = mean, geom = "point", size = 3, color = "red", shape = 18) + 
# Labeling Axis' and Titles
  labs(x = "Sex", y = "Compliance Time (s)", title = "Compliance Time by Gender") + 
# Add Colour to Variables  
   theme_minimal() +
  scale_fill_manual(values = c("Male" = "skyblue", "Female" = "lightpink"))  # Custom colors for each sex

# P < 0.05 therefore, there is a significant difference in compliance time between mares and geldings. This result rejects the hypothesis as mares had faster compliance. 

It is essential to comprehend how physiological factors impact behaviour in order to enhance training techniques and equine management. A horse’s stress levels, physical state, and general well-being can be inferred from variables including heart rate, cortisol levels, and infrared temperature (IRT). These elements could have a big impact on how quickly a horse obeys commands or training. This raises the following query: How do physiological factors (heart rate, cortisol level, and IRT) affect compliance time in horses?

2. What is the effect of physiological variables (IRT, cortisol level and heart rate) on compliance time?


What is the relationship between Thermographic Eye Temperature (°C) and Compliance Time (seconds)?

Hypothesis

A higher Thermographic Eye Temperature (°C) will be positively correlated with longer compliance times in horses, indicating that increased eye temperature, potentially a sign of stress or discomfort, may delay a horse’s ability to comply with tasks.

Results
# Perform the Shapiro-Wilk normality test 

shapiro.test(equine$irt) 

    Shapiro-Wilk normality test

data:  equine$irt
W = 0.95889, p-value = 1.447e-10
shapiro.test(equine$comp) 

    Shapiro-Wilk normality test

data:  equine$comp
W = 0.99692, p-value = 0.4695
# P < 0.05, therefore the data is not normally distributed meaning a Spearman's rank should be used. 

cor.test(equine$irt, equine$comp, method = "spearman") 

    Spearman's rank correlation rho

data:  equine$irt and equine$comp
S = 21203514, p-value = 0.5028
alternative hypothesis: true rho is not equal to 0
sample estimates:
        rho 
-0.03008441 
#In the context of running a correlation analysis, the correlation coefficient (r) is the key value that tells you the strength and direction of the relationship between two continuous variables. Specifically, it quantifies how strongly the two variables are related linearly.
#The closer the absolute value of r is to 1, the stronger the linear relationship between the variables. Here’s how to interpret the values:

#0.1 ≤ |r| < 0.3: Weak positive or negative relationship.
#0.3 ≤ |r| < 0.5: Moderate positive or negative relationship.
#0.5 ≤ |r| < 0.7: Strong positive or negative relationship.
#0.7 ≤ |r| < 1.0: Very strong positive or negative relationship.
library(ggplot2) 

ggplot(equine, aes(x = irt, y = comp)) + 

  geom_point() +  # Add points 
  
  stat_ellipse(level = 0.95, color = "blue") +  # Add ellipse (95% confidence region)

  labs(title = "Compliance Time vs. Thermographic Eye Temperature", 

       x = "Thermographic Eye Temperature (°C)", 

       y = "Compliance Time (seconds)") 

# r shows a very weak negative monotonic relationship between compliance time and eye temperature. This result rejects the hypothesis and indicates that increased eye temperature does not delay a horse's ability to comply with tasks.

What is the relationship between Blood Cortisol Levels (mcg/dL) and Compliance Time (seconds)?

Hypothesis

Higher blood cortisol levels (mcg/dL) will be positively correlated with longer compliance times in horses, suggesting that increased cortisol, a marker of stress, may lead to slower responses and delays in completing training tasks.

Results
# Perform the Shapiro-Wilk normality test 

shapiro.test(equine$cortisol) 

    Shapiro-Wilk normality test

data:  equine$cortisol
W = 0.96362, p-value = 9.245e-10
shapiro.test(equine$comp) 

    Shapiro-Wilk normality test

data:  equine$comp
W = 0.99692, p-value = 0.4695
cor.test(equine$cortisol, equine$comp, method = "spearman") 

    Spearman's rank correlation rho

data:  equine$cortisol and equine$comp
S = 21166080, p-value = 0.529
alternative hypothesis: true rho is not equal to 0
sample estimates:
        rho 
-0.02826584 

``

#In the context of running a correlation analysis, the correlation coefficient (r) is the key value that tells you the strength and direction of the relationship between two continuous variables. Specifically, it quantifies how strongly the two variables are related linearly.
#The closer the absolute value of r is to 1, the stronger the linear relationship between the variables. Here’s how to interpret the values:

#0.1 ≤ |r| < 0.3: Weak positive or negative relationship.
#0.3 ≤ |r| < 0.5: Moderate positive or negative relationship.
#0.5 ≤ |r| < 0.7: Strong positive or negative relationship.
#0.7 ≤ |r| < 1.0: Very strong positive or negative relationship.
library(ggplot2) 

ggplot(equine, aes(x = irt, y = comp)) + 

  geom_point() +  # Add points 
  
  stat_ellipse(level = 0.95, color = "blue") +  # Add ellipse (95% confidence region)

  labs(title = "Compliance Time vs. Blood Cortisol", 

       x = "Blood Cortisol (mcg/dL)", 

       y = "Compliance Time (seconds)") 

# r shows a very weak negative monotonic relationship between compliance time and blood cortisol. This result rejects the hypothesis and suggests that increased cortisol does not lead to slower responses and delays in completing training tasks.

What is the relationship between Heart Rate (bpm) and Compliance Time (seconds)?

Hypothesis

A higher heart rate (bpm) will be positively correlated with longer compliance times in horses, indicating that increased heart rate, likely associated with stress or physical exertion, may result in slower responses and delays in complying with tasks.

Results
# Testing each variable for normality  

# Perform the Shapiro-Wilk normality test 

shapiro.test(equine$BPM) 

    Shapiro-Wilk normality test

data:  equine$BPM
W = 0.99739, p-value = 0.6264
shapiro.test(equine$comp) 

    Shapiro-Wilk normality test

data:  equine$comp
W = 0.99692, p-value = 0.4695
# P > 0.05 therefore the data is normally distributed 
# Testing significance of Pearson correlation 
cor.test(equine$BPM, equine$comp) 

    Pearson's product-moment correlation

data:  equine$BPM and equine$comp
t = 22.767, df = 496, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.6689964 0.7552703
sample estimates:
      cor 
0.7148428 
#In the context of running a correlation analysis, the correlation coefficient (r) is the key value that tells you the strength and direction of the relationship between two continuous variables. Specifically, it quantifies how strongly the two variables are related linearly.
#The closer the absolute value of r is to 1, the stronger the linear relationship between the variables. Here’s how to interpret the values:

#0.1 ≤ |r| < 0.3: Weak positive or negative relationship.
#0.3 ≤ |r| < 0.5: Moderate positive or negative relationship.
#0.5 ≤ |r| < 0.7: Strong positive or negative relationship.
#0.7 ≤ |r| < 1.0: Very strong positive or negative relationship.
library(ggplot2) 

ggplot(equine, aes(x = BPM, y = comp)) + 

  geom_point() +  # Add points 

  stat_ellipse(level = 0.95, color = "blue") +  # Add ellipse (95% confidence region)
  
  labs(title = "Compliance Time vs. Heart Rate ", 

       x = "Heart Rate (BPM)", 

       y = "Compliance Time (seconds)") 

# r shows a strong positive linear relationship between compliance time and heart rate. This result accepts the hypothesis and indicates that increased heart rate does result in slower responses and delays in complying with tasks.

3. What is the correlation between Infrared Thermography (IRT) measurements and Cortisol levels?


Hypothesis

There will be a positive correlation between Infrared Thermography (IRT) measurements and cortisol levels, suggesting that higher infrared temperature readings, which may indicate stress or discomfort, will be associated with elevated cortisol levels in horses.

Results
# Perform the Shapiro-Wilk normality test 
shapiro.test(equine$irt) 

    Shapiro-Wilk normality test

data:  equine$irt
W = 0.95889, p-value = 1.447e-10
shapiro.test(equine$cortisol) 

    Shapiro-Wilk normality test

data:  equine$cortisol
W = 0.96362, p-value = 9.245e-10
# P < 0.05 therefore the data is not normally distributed  
cor.test(equine$irt, equine$cortisol, method = "spearman") 

    Spearman's rank correlation rho

data:  equine$irt and equine$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.1314346 
#In the context of running a correlation analysis, the correlation coefficient (r) is the key value that tells you the strength and direction of the relationship between two continuous variables. Specifically, it quantifies how strongly the two variables are related linearly.
#The closer the absolute value of r is to 1, the stronger the linear relationship between the variables. Here’s how to interpret the values:

#0.1 ≤ |r| < 0.3: Weak positive or negative relationship.
#0.3 ≤ |r| < 0.5: Moderate positive or negative relationship.
#0.5 ≤ |r| < 0.7: Strong positive or negative relationship.
#0.7 ≤ |r| < 1.0: Very strong positive or negative relationship.
library(ggplot2)
# Call Variables 
ggplot(equine, aes(x = irt, y = cortisol)) +
# Add points 
   geom_point() +
# Label Axis'
  
  stat_ellipse(level = 0.95, color = "blue") +  # Add ellipse (95% confidence region)
  
  labs(title = "Correlation between IRT and Cortisol Levels",
       x = "Thermographic Eye Temperature (°C)",
       y = "Blood Cortisol Levels (mcg/dl)") +
  theme_minimal() 

# r shows a weak positive monotonic relationship between thermographic eye temperature and cortisol levels. This result accepts the hypothesis and suggests that higher infrared temperature readings are associated with elevated cortisol levels in horses.

Numerous techniques are investigated to enhance a horse’s reactivity and lessen stress while doing activities. Applying calming sprays, which are intended to reduce anxiety and encourage relaxation, is one such technique. Approaches to horse management may be improved if it is known whether the use of soothing sprays influences how quickly a horse complies during training. This begs the question: What is the effect of soothing spray on horses’ compliance time?

4. How does the application of calming spray influence compliance time?


Hypothesis

The application of calming spray will result in a reduction in compliance time, as the spray is expected to lower stress levels and promote relaxation, enabling horses to respond more quickly and efficiently during training tasks.

Results

# P < 0.05 therefore there is a significant difference between compliance time before the calming spray and after the application of the calming spray. This result accepts the hypothesis. 
df <- data.frame(
  comp = c(34.9, 35.1, 36.0, 34.5, 35.2),
  comp2 = c(33.8, 36.1, 34.7, 35.5, 36.3)
)
# Load necessary libraries
library(ggplot2)
library(tidyr)
Warning: package 'tidyr' was built under R version 4.4.1
# Reshape data to long format
df_long <- df %>%
  pivot_longer(cols = c(comp, comp2), names_to = "Variable", values_to = "Value")

# Create the box plot
ggplot(df_long, aes(x = Variable, y = Value, fill = Variable)) +
  geom_boxplot() +
  
# Labeling Axis' and Titles
  labs(title = "The Difference in Compliance Time When Applying a Calming Spray",
       x = "Treatment",
       y = "Compliance Time (s)") +
  
# Add Colour to Variables 
  theme_minimal() +
  scale_fill_manual(values = c("comp" = "skyblue", "comp2" = "lightpink"),
                    name = "Treatment")

Horses’ compliance time can be predicted by taking into account a number of elements that may affect their behaviour, such as ambient circumstances, gender and physiological characteristics including heart rate, cortisol levels, and infrared thermographic temperature. Furthermore, a variety of prediction models, including statistical techniques or machine learning algorithms, can be used to anticipate compliance time with differing degrees of accuracy. Gaining knowledge about the most important variables and how well these models predict compliance time will help with better management and training of horses. This raises the following query: What characteristics or factors influence compliance time prediction, and how precise is it possible to estimate compliance time using various predictive models?

5. What factors or variables contribute to predicting compliance time, and how accurately can compliance time be forecasted using different predictive models?


Hypothesis

The compliance time can be predicted, as compliance time will decrease with the application of the calming spray.

# Perform Kruskal-Wallis tests and display results for each variable

# BPM
kruskal_test_bpm <- kruskal.test(comp ~ BPM, data = equine)
print(kruskal_test_bpm)

    Kruskal-Wallis rank sum test

data:  comp by BPM
Kruskal-Wallis chi-squared = 497, df = 497, p-value = 0.4916
# sex
kruskal_test_sex <- kruskal.test(comp ~ sex, data = equine)
print(kruskal_test_sex)

    Kruskal-Wallis rank sum test

data:  comp by sex
Kruskal-Wallis chi-squared = 6.2696, df = 1, p-value = 0.01228
# cortisol
kruskal_test_cortisol <- kruskal.test(comp ~ cortisol, data = equine)
print(kruskal_test_cortisol)

    Kruskal-Wallis rank sum test

data:  comp by cortisol
Kruskal-Wallis chi-squared = 497, df = 497, p-value = 0.4916
# irt
kruskal_test_irt <- kruskal.test(comp ~ irt, data = equine)
print(kruskal_test_irt)

    Kruskal-Wallis rank sum test

data:  comp by irt
Kruskal-Wallis chi-squared = 497, df = 497, p-value = 0.4916