Research Summative NEW

Loading Data

eqsub <-read.table('~/Equine Summative NEW/eqsub1.txt', header = TRUE, sep = "\t")
equine <-read.table('~/Equine Summative NEW/Equine.txt', header = TRUE, sep = "\t")

1. Is there a Correlation Between IRT and Blood Cortisol Levels?

Testing for Normality

# Based on a normality testing, this question requires each variable to be tested separately 
# Perform Kolmogorov-Smirnov test of IRT 
ks.test(equine$irt, "pnorm", mean = mean(equine$irt), sd = sd(equine$irt))

    Asymptotic one-sample Kolmogorov-Smirnov test

data:  equine$irt
D = 0.062317, p-value = 0.0418
alternative hypothesis: two-sided
# Perform Kolmogorov-Smirnov test of Cortisol 
ks.test(equine$cortisol, "pnorm", mean = mean(equine$cortisol), sd = sd(equine$cortisol))

    Asymptotic one-sample Kolmogorov-Smirnov test

data:  equine$cortisol
D = 0.061555, p-value = 0.04593
alternative hypothesis: two-sided
# P < 0.05 therefore the data is not normally distributed 

Results

cor.test(equine$irt, equine$cortisol, method = "spearman")

    Spearman's rank correlation rho

data:  equine$irt and equine$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.1314346 
# P < 0.05 and r < 0.20 therefore there is a significant weak correlation between IRT and blood cortisol levels. 

Scatter Plot

library(ggplot2)

# Call Variables 
X <- equine$irt
Y <- equine$cortisol

# Plot
ggplot(equine, aes(x = irt, y = cortisol)) +
  geom_point(color = "black", size = 1) +

# Labeling Axis' and Titles
  labs(title = "Correlation between IRT and Cortisol Levels",
       x = "Thermographic Eye Temperature (°C)",
       y = "Blood Cortisol Levels (mcg/dl)")

2. Does Calming Spray have an effect on compliance time, if so what is it?

Testing for Normality

# Based on a normality testing, this question requires the residuals to be tested 

# Call Variables 
X <- eqsub$comp
Y <- eqsub$comp2

# Fit linear model
model <- lm(X ~ Y)

# Extract residuals
residuals <- residuals(model)

# Perform Kolmogorov-Smirnov test
ks.test(residuals, "pnorm", mean = mean(residuals), sd = sd(residuals))
Warning in ks.test.default(residuals, "pnorm", mean = mean(residuals), sd =
sd(residuals)): ties should not be present for the one-sample
Kolmogorov-Smirnov test

    Asymptotic one-sample Kolmogorov-Smirnov test

data:  residuals
D = 0.21835, p-value < 2.2e-16
alternative hypothesis: two-sided
# P < 0.05 therefore the data is not normally distributed

Results

# Non-normal distribution -> Wilcoxon Signed-Rank Test
  wilcox_test <- wilcox.test(X, Y, paired = TRUE)
  print(wilcox_test)

    Wilcoxon signed rank test with continuity correction

data:  X and Y
V = 124251, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
# P < 0.05 therefore there is a significant difference 

Box Plot

# Sample Data 
df <- data.frame(
  comp = eqsub$comp,  # Compliance time before spray
  comp2 = eqsub$comp2 # Compliance time after spray
)

# Load necessary libraries
library(ggplot2)
library(tidyr)

# Reshape data to long format
df_long <- df %>%
  pivot_longer(cols = c(comp, comp2), names_to = "Variable", values_to = "Value")

# Create the box plot
ggplot(df_long, aes(x = Variable, y = Value, fill = Variable)) +
  geom_boxplot() +
  
# Labeling Axis and Titles
  labs(title = "The Difference in Compliance Time When Applying a Calming Spray",
       x = "Treatment",
       y = "Compliance Time (s)") +
  
# Add Colour to Variables
  theme_minimal() +
  scale_fill_manual(values = c("comp" = "skyblue", "comp2" = "lightpink"), name = "Treatment")

3. Is it possible to predict compliance time?

Testing normality

# Based on a normality testing, this question requires each variable to be tested separately 
# Perform Kolmogorov-Smirnov test of weight
ks.test(equine$weight, "pnorm", mean = mean(equine$weight), sd = sd(equine$weight))

    Asymptotic one-sample Kolmogorov-Smirnov test

data:  equine$weight
D = 0.085047, p-value = 0.001487
alternative hypothesis: two-sided
# P < 0.05 therefore the data is not normally distributed
# Perform Kolmogorov-Smirnov test of IRT 
ks.test(equine$irt, "pnorm", mean = mean(equine$irt), sd = sd(equine$irt))

    Asymptotic one-sample Kolmogorov-Smirnov test

data:  equine$irt
D = 0.062317, p-value = 0.0418
alternative hypothesis: two-sided
# P < 0.05 therefore the data is not normally distributed
# Perform Kolmogorov-Smirnov test of Cortisol 
ks.test(equine$cortisol, "pnorm", mean = mean(equine$cortisol), sd = sd(equine$cortisol))

    Asymptotic one-sample Kolmogorov-Smirnov test

data:  equine$cortisol
D = 0.061555, p-value = 0.04593
alternative hypothesis: two-sided
# P < 0.05 therefore the data is not normally distributed
# Perform Kolmogorov-Smirnov test of Air
ks.test(equine$air, "pnorm", mean = mean(equine$air), sd = sd(equine$air))

    Asymptotic one-sample Kolmogorov-Smirnov test

data:  equine$air
D = 0.068116, p-value = 0.01968
alternative hypothesis: two-sided
# P < 0.05 therefore the data is not normally distributed
# Perform Kolmogorov-Smirnov test of BPM
ks.test(equine$BPM, "pnorm", mean = mean(equine$BPM), sd = sd(equine$BPM))

    Asymptotic one-sample Kolmogorov-Smirnov test

data:  equine$BPM
D = 0.031659, p-value = 0.7004
alternative hypothesis: two-sided
# P > 0.05 therefore the data is normally distributed
# Perform Kolmogorov-Smirnov test of compliacne time 
ks.test(equine$comp, "pnorm", mean = mean(equine$comp), sd = sd(equine$comp))

    Asymptotic one-sample Kolmogorov-Smirnov test

data:  equine$comp
D = 0.03136, p-value = 0.7115
alternative hypothesis: two-sided
# P > 0.05 therefore the data is normally distributed

Results

# Fit a linear regression model
model <- lm(comp ~ weight + irt + air + cortisol + BPM, data = equine)

# View model summary
summary(model)

Call:
lm(formula = comp ~ weight + irt + air + cortisol + BPM, data = equine)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.88796 -0.65339  0.00537  0.58312  2.79258 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.910e+01  3.641e+00 -10.739   <2e-16 ***
weight       1.325e-03  3.361e-03   0.394    0.694    
irt         -3.078e-02  3.191e-02  -0.964    0.335    
air         -2.256e-02  2.708e-02  -0.833    0.405    
cortisol    -8.361e-04  2.602e-03  -0.321    0.748    
BPM          5.048e-01  2.228e-02  22.661   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.003 on 492 degrees of freedom
Multiple R-squared:  0.5129,    Adjusted R-squared:  0.5079 
F-statistic: 103.6 on 5 and 492 DF,  p-value: < 2.2e-16
# Test all variables via Anova despite not all variables being normally distributed as parametric testing (Anova) is suitable due to the large sample size. 
anova(model)
Analysis of Variance Table

Response: comp
           Df Sum Sq Mean Sq  F value  Pr(>F)    
weight      1   0.09    0.09   0.0856 0.76999    
irt         1   1.39    1.39   1.3760 0.24135    
air         1   3.05    3.05   3.0276 0.08248 .  
cortisol    1   0.01    0.01   0.0140 0.90599    
BPM         1 516.94  516.94 513.5131 < 2e-16 ***
Residuals 492 495.29    1.01                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation Between comp and BPM
# P < 0.05 for BPM and cortisol therefore the data is normally distributed. 

Results

# Pearson correlation test 
cor.test(equine$comp, equine$BPM, method = c ("pearson"))

    Pearson's product-moment correlation

data:  equine$comp and equine$BPM
t = 22.767, df = 496, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.6689964 0.7552703
sample estimates:
      cor 
0.7148428 
# P < 2.2e-16 and r= 0.71 therefore there is a significantly strong correlation.
data <- equine

# Fit a linear regression model
model <- lm(comp ~ BPM, data = data)

# View model summary
summary(model)

Call:
lm(formula = comp ~ BPM, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.81905 -0.65308 -0.01023  0.59085  2.83181 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -40.79081    3.33482  -12.23   <2e-16 ***
BPM           0.50564    0.02221   22.77   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.001 on 496 degrees of freedom
Multiple R-squared:  0.511, Adjusted R-squared:   0.51 
F-statistic: 518.3 on 1 and 496 DF,  p-value: < 2.2e-16
# P < 2.2e-16 and R2= 0.51 therefore the model explained 51% of the variance, with BPM significantly contributing to the prediction

Scatter Plot

BPM
# Load ggplot2 library
library(ggplot2)

# Create scatter plot with regression line
ggplot(data, aes(x = comp, y = BPM)) +
# Add Scatter points
  geom_point(color = "black", size = 1) +  
# Add Regression line
  geom_smooth(method = "lm", col = "blue") + 
# Add title and Label Axis 
  labs(title = "Scatterplot with Linear Regression Line",
       x = "Compliance time (s)",
       y = "Heart Rate (BPM)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'