EQUINE ANALYSIS KAMRAN BIN LATEEF(N1340143)

LOADING DATA

Load equine.csv

equine_data <- read.csv("D:\\LECTURES\\Research Method\\MAIN ASSESSMENT\\equine.csv")

Load eqsub.csv

eqsub_data <- read.csv("D:\\LECTURES\\Research Method\\MAIN ASSESSMENT\\eqsub.csv")

Display the first few rows of both datasets

head(equine_data)

     ID    sex     comp    weight      irt      air  cortisol      BPM
1 Eq001 Female 37.17081  74.69050 37.73957 23.37377  64.11183 153.2263
2 Eq002 Female 34.54973  73.41627 35.68413 21.42088  73.68676 149.6390
3 Eq003 Female 36.32861  71.80837 34.79854 20.05114  54.43466 149.4406
4 Eq004   Male 35.33881 104.62985 36.15973 21.64319  86.32615 150.2971
5 Eq005 Female 37.39799  67.13098 33.61477 21.76143 107.97796 149.0318
6 Eq006   Male 33.54668 110.49396 36.38343 20.85200 108.86475 146.8415

head(eqsub_data)

     ID    sex  comp comp2
1 EQ001 Female 37.17 31.50
2 EQ002 Female 34.55 29.63
3 EQ003 Female 36.33 30.80
4 EQ004   Male 35.34 30.14
5 EQ005 Female 37.40 31.66
6 EQ006   Male 33.55 28.57

Check the structure of the datasets

str(equine_data)

'data.frame':   498 obs. of  8 variables:
 $ ID      : chr  "Eq001" "Eq002" "Eq003" "Eq004" ...
 $ sex     : chr  "Female" "Female" "Female" "Male" ...
 $ comp    : num  37.2 34.5 36.3 35.3 37.4 ...
 $ weight  : num  74.7 73.4 71.8 104.6 67.1 ...
 $ irt     : num  37.7 35.7 34.8 36.2 33.6 ...
 $ air     : num  23.4 21.4 20.1 21.6 21.8 ...
 $ cortisol: num  64.1 73.7 54.4 86.3 108 ...
 $ BPM     : num  153 150 149 150 149 ...

str(eqsub_data)

'data.frame':   498 obs. of  4 variables:
 $ ID   : chr  "EQ001" "EQ002" "EQ003" "EQ004" ...
 $ sex  : chr  "Female" "Female" "Female" "Male" ...
 $ comp : num  37.2 34.5 36.3 35.3 37.4 ...
 $ comp2: num  31.5 29.6 30.8 30.1 31.7 ...

Get a summary of the datasets

summary(equine_data)

      ID                sex                 comp           weight      
 Length:498         Length:498         Min.   :30.78   Min.   : 65.10  
 Class :character   Class :character   1st Qu.:34.16   1st Qu.: 75.67  
 Mode  :character   Mode  :character   Median :35.04   Median : 87.82  
                                       Mean   :35.12   Mean   : 87.93  
                                       3rd Qu.:36.05   3rd Qu.:100.34  
                                       Max.   :40.08   Max.   :110.94  
      irt             air           cortisol           BPM       
 Min.   :33.00   Min.   :20.01   Min.   : 50.03   Min.   :144.6  
 1st Qu.:34.42   1st Qu.:21.54   1st Qu.: 67.27   1st Qu.:148.9  
 Median :35.43   Median :23.11   Median : 82.43   Median :150.1  
 Mean   :35.54   Mean   :23.02   Mean   : 82.00   Mean   :150.1  
 3rd Qu.:36.71   3rd Qu.:24.35   3rd Qu.: 95.96   3rd Qu.:151.3  
 Max.   :38.00   Max.   :25.99   Max.   :112.45   Max.   :156.8

summary(eqsub_data)

      ID                sex                 comp           comp2      
 Length:498         Length:498         Min.   :30.78   Min.   :27.37  
 Class :character   Class :character   1st Qu.:34.16   1st Qu.:29.23  
 Mode  :character   Mode  :character   Median :35.04   Median :29.99  
                                       Mean   :35.12   Mean   :29.94  
                                       3rd Qu.:36.05   3rd Qu.:30.65  
                                       Max.   :40.08   Max.   :32.81

###Histogram for Compliance Time (comp)

# Histogram for compliance time
hist(equine_data$comp, 
     main = "Distribution of Compliance Time", 
     xlab = "Compliance Time (Seconds)", 
     col = "lightblue", 
     border = "black")

###Boxplot for Cortisol Levels by Sex

# Boxplot for cortisol levels by sex
boxplot(cortisol ~ sex, 
        data = equine_data, 
        main = "Cortisol Levels by Sex", 
        xlab = "Sex", 
        ylab = "Cortisol (mcg/dl)", 
        col = c("pink", "lightblue"))

###Scatter plot for irt vs cortisol

# Scatterplot for irt vs cortisol
plot(equine_data$irt, equine_data$cortisol, 
     main = "Scatterplot: IRT vs Cortisol", 
     xlab = "IRT (°C)", 
     ylab = "Cortisol (mcg/dl)", 
     col = "blue", 
     pch = 19)

###Scatter plot for weight vs BPM

# Scatterplot for weight vs BPM
plot(equine_data$weight, equine_data$BPM, 
     main = "Scatterplot: Weight vs BPM", 
     xlab = "Weight (Kg)", 
     ylab = "BPM (Beats per Minute)", 
     col = "green", 
     pch = 19)

###Scatter plot for Compliance Time Before and After Spray

# Scatter plot for compliance time before and after spray
plot(eqsub_data$comp, eqsub_data$comp2, 
     main = "Scatterplot: Compliance Time Before vs After Spray",
     xlab = "Compliance Time Before Spray (Seconds)",
     ylab = "Compliance Time After Spray (Seconds)",
     col = "blue", 
     pch = 19)
# Add a diagonal line for reference
abline(a = 0, b = 1, col = "red", lty = 2)

Q1 Is there a correlation between the variables irt and cortisol?

Scatter plot for IRT vs Cortisol

plot(equine_data$irt, equine_data$cortisol,
     main = "Scatterplot: IRT vs Cortisol",
     xlab = "IRT (°C)",
     ylab = "Cortisol (mcg/dl)",
     col = "blue", pch = 16)

Shapiro-Wilk test for normality

shapiro.test(equine_data$irt)


    Shapiro-Wilk normality test

data:  equine_data$irt
W = 0.95889, p-value = 1.447e-10

shapiro.test(equine_data$cortisol)


    Shapiro-Wilk normality test

data:  equine_data$cortisol
W = 0.96362, p-value = 9.245e-10

Spearman correlation

cor.test(equine_data$irt, equine_data$cortisol, method = "spearman")


    Spearman's rank correlation rho

data:  equine_data$irt and equine_data$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.1314346

Scatterplot for IRT vs Cortisol using ggplot2

library(ggplot2)

Warning: package 'ggplot2' was built under R version 4.4.2

ggplot(equine_data, aes(x = irt, y = cortisol)) +
  geom_point(color = "blue", size = 2) +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  labs(title = "Scatterplot: IRT vs Cortisol",
       x = "IRT (°C)",
       y = "Cortisol (mcg/dl)") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

Q2 Does the application of the calming spray have an effect on compliance time?

Calculate the differences

diff_comp <- eqsub_data$comp2 - eqsub_data$comp

Shapiro-Wilk test for normality of differences

shapiro.test(diff_comp)


    Shapiro-Wilk normality test

data:  diff_comp
W = 0.84738, p-value < 2.2e-16

Perform Wilcoxon Signed-Rank Test

wilcox.test(eqsub_data$comp, eqsub_data$comp2, paired = TRUE, alternative = "two.sided")


    Wilcoxon signed rank test with continuity correction

data:  eqsub_data$comp and eqsub_data$comp2
V = 124251, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0

###MEDIAN

median_before <- median(eqsub_data$comp)
median_after <- median(eqsub_data$comp2)
median_before

[1] 35.04

median_after

[1] 29.99

###SCATTER PLOT RESULT

library(ggplot2)

# Scatter plot for compliance time before and after spray
ggplot(eqsub_data, aes(x = comp, y = comp2)) +
  geom_point(color = "blue") +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
  labs(title = "Compliance Time: Before vs After Spray",
       x = "Compliance Time Before Spray (Seconds)",
       y = "Compliance Time After Spray (Seconds)") +
  theme_minimal()

Q3 Is it possible to predict compliance time?

Scatter plot for weight vs compliance time

library(ggplot2)

ggplot(equine_data, aes(x = weight, y = comp)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", color = "red", se = TRUE) +
  labs(title = "Compliance Time vs Weight",
       x = "Weight (Kg)",
       y = "Compliance Time (Seconds)") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

Scatter plot with regression line

library(ggplot2)

ggplot(equine_data, aes(x = BPM, y = comp)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  labs(title = "Compliance Time vs BPM",
       x = "BPM (Beats Per Minute)",
       y = "Compliance Time (Seconds)") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

Scatter plot with regression line

library(ggplot2)

ggplot(equine_data, aes(x = irt, y = comp)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  labs(title = "Compliance Time vs IRT",
       x = "IRT (°C)",
       y = "Compliance Time (Seconds)") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

Scatter plot with regression line

library(ggplot2)

ggplot(equine_data, aes(x = air, y = comp)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  labs(title = "Compliance Time vs Air Temperature",
       x = "Air Temperature (°C)",
       y = "Compliance Time (Seconds)") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

Scatter plot with regression line

library(ggplot2)

ggplot(equine_data, aes(x = cortisol, y = comp)) +
  geom_point(color = "blue") +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  labs(title = "Compliance Time vs Cortisol Levels",
       x = "Cortisol Levels (mcg/dl)",
       y = "Compliance Time (Seconds)") +
  theme_minimal()

`geom_smooth()` using formula = 'y ~ x'

Fit a linear regression model

lm_bpm <- lm(comp ~ BPM, data = equine_data)

Display the summary of the regression

summary(lm_bpm)


Call:
lm(formula = comp ~ BPM, data = equine_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.81905 -0.65308 -0.01023  0.59085  2.83181 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -40.79081    3.33482  -12.23   <2e-16 ***
BPM           0.50564    0.02221   22.77   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.001 on 496 degrees of freedom
Multiple R-squared:  0.511, Adjusted R-squared:   0.51 
F-statistic: 518.3 on 1 and 496 DF,  p-value: < 2.2e-16

shapiro.test(residuals(lm_bpm))


    Shapiro-Wilk normality test

data:  residuals(lm_bpm)
W = 0.99741, p-value = 0.6327

Residuals vs Fitted Values Plot

plot(fitted(lm_bpm), residuals(lm_bpm),
     main = "Residuals vs Fitted Values",
     xlab = "Fitted Values",
     ylab = "Residuals",
     col = "blue",
     pch = 19)
abline(h = 0, col = "red", lty = 2)

# Install and load the 'lmtest' package if not already installed
if (!require("lmtest")) install.packages("lmtest")

Loading required package: lmtest

Loading required package: zoo


Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric

library(lmtest)

# Perform Durbin-Watson test
dw_test <- dwtest(lm_bpm)
print(dw_test)


    Durbin-Watson test

data:  lm_bpm
DW = 2.0934, p-value = 0.8519
alternative hypothesis: true autocorrelation is greater than 0

# Calculate RMSE
rmse <- sqrt(mean(residuals(lm_bpm)^2))
print(paste("RMSE:", rmse))

[1] "RMSE: 0.999194284706538"

# Calculate MAE
mae <- mean(abs(residuals(lm_bpm)))
print(paste("MAE:", mae))

[1] "MAE: 0.787982898953818"

Scatterplot of Actual vs Predicted Values

plot(equine_data$comp, fitted(lm_bpm),
     main = "Actual vs Predicted Compliance Time",
     xlab = "Actual Compliance Time (Seconds)",
     ylab = "Predicted Compliance Time (Seconds)",
     col = "blue", pch = 19)
abline(a = 0, b = 1, col = "red", lty = 2) # Diagonal line for reference