NEW MAIN ASSESSMENT

Load equine.csv

equine_data <- read.csv("D:\\LECTURES\\Research Method\\MAIN ASSESSMENT\\equine.csv")

Load eqsub.csv

eqsub_data <- read.csv("D:\\LECTURES\\Research Method\\MAIN ASSESSMENT\\eqsub.csv")

Display the first few rows of both datasets

head(equine_data)
     ID    sex     comp    weight      irt      air  cortisol      BPM
1 Eq001 Female 37.17081  74.69050 37.73957 23.37377  64.11183 153.2263
2 Eq002 Female 34.54973  73.41627 35.68413 21.42088  73.68676 149.6390
3 Eq003 Female 36.32861  71.80837 34.79854 20.05114  54.43466 149.4406
4 Eq004   Male 35.33881 104.62985 36.15973 21.64319  86.32615 150.2971
5 Eq005 Female 37.39799  67.13098 33.61477 21.76143 107.97796 149.0318
6 Eq006   Male 33.54668 110.49396 36.38343 20.85200 108.86475 146.8415
head(eqsub_data)
     ID    sex  comp comp2
1 EQ001 Female 37.17 31.50
2 EQ002 Female 34.55 29.63
3 EQ003 Female 36.33 30.80
4 EQ004   Male 35.34 30.14
5 EQ005 Female 37.40 31.66
6 EQ006   Male 33.55 28.57

Check the structure of the datasets

str(equine_data)
'data.frame':   498 obs. of  8 variables:
 $ ID      : chr  "Eq001" "Eq002" "Eq003" "Eq004" ...
 $ sex     : chr  "Female" "Female" "Female" "Male" ...
 $ comp    : num  37.2 34.5 36.3 35.3 37.4 ...
 $ weight  : num  74.7 73.4 71.8 104.6 67.1 ...
 $ irt     : num  37.7 35.7 34.8 36.2 33.6 ...
 $ air     : num  23.4 21.4 20.1 21.6 21.8 ...
 $ cortisol: num  64.1 73.7 54.4 86.3 108 ...
 $ BPM     : num  153 150 149 150 149 ...
str(eqsub_data)
'data.frame':   498 obs. of  4 variables:
 $ ID   : chr  "EQ001" "EQ002" "EQ003" "EQ004" ...
 $ sex  : chr  "Female" "Female" "Female" "Male" ...
 $ comp : num  37.2 34.5 36.3 35.3 37.4 ...
 $ comp2: num  31.5 29.6 30.8 30.1 31.7 ...

Get a summary of the datasets

summary(equine_data)
      ID                sex                 comp           weight      
 Length:498         Length:498         Min.   :30.78   Min.   : 65.10  
 Class :character   Class :character   1st Qu.:34.16   1st Qu.: 75.67  
 Mode  :character   Mode  :character   Median :35.04   Median : 87.82  
                                       Mean   :35.12   Mean   : 87.93  
                                       3rd Qu.:36.05   3rd Qu.:100.34  
                                       Max.   :40.08   Max.   :110.94  
      irt             air           cortisol           BPM       
 Min.   :33.00   Min.   :20.01   Min.   : 50.03   Min.   :144.6  
 1st Qu.:34.42   1st Qu.:21.54   1st Qu.: 67.27   1st Qu.:148.9  
 Median :35.43   Median :23.11   Median : 82.43   Median :150.1  
 Mean   :35.54   Mean   :23.02   Mean   : 82.00   Mean   :150.1  
 3rd Qu.:36.71   3rd Qu.:24.35   3rd Qu.: 95.96   3rd Qu.:151.3  
 Max.   :38.00   Max.   :25.99   Max.   :112.45   Max.   :156.8  
summary(eqsub_data)
      ID                sex                 comp           comp2      
 Length:498         Length:498         Min.   :30.78   Min.   :27.37  
 Class :character   Class :character   1st Qu.:34.16   1st Qu.:29.23  
 Mode  :character   Mode  :character   Median :35.04   Median :29.99  
                                       Mean   :35.12   Mean   :29.94  
                                       3rd Qu.:36.05   3rd Qu.:30.65  
                                       Max.   :40.08   Max.   :32.81  

###Histogram for Compliance Time (comp)

# Histogram for compliance time
hist(equine_data$comp, 
     main = "Distribution of Compliance Time", 
     xlab = "Compliance Time (Seconds)", 
     col = "lightblue", 
     border = "black")

###Boxplot for Cortisol Levels by Sex

# Boxplot for cortisol levels by sex
boxplot(cortisol ~ sex, 
        data = equine_data, 
        main = "Cortisol Levels by Sex", 
        xlab = "Sex", 
        ylab = "Cortisol (mcg/dl)", 
        col = c("pink", "lightblue"))

###Scatter plot for irt vs cortisol

# Scatterplot for irt vs cortisol
plot(equine_data$irt, equine_data$cortisol, 
     main = "Scatterplot: IRT vs Cortisol", 
     xlab = "IRT (°C)", 
     ylab = "Cortisol (mcg/dl)", 
     col = "blue", 
     pch = 19)

###Scatter plot for weight vs BPM

# Scatterplot for weight vs BPM
plot(equine_data$weight, equine_data$BPM, 
     main = "Scatterplot: Weight vs BPM", 
     xlab = "Weight (Kg)", 
     ylab = "BPM (Beats per Minute)", 
     col = "green", 
     pch = 19)

###Scatter plot for Compliance Time Before and After Spray

# Scatter plot for compliance time before and after spray
plot(eqsub_data$comp, eqsub_data$comp2, 
     main = "Scatterplot: Compliance Time Before vs After Spray",
     xlab = "Compliance Time Before Spray (Seconds)",
     ylab = "Compliance Time After Spray (Seconds)",
     col = "blue", 
     pch = 19)
# Add a diagonal line for reference
abline(a = 0, b = 1, col = "red", lty = 2)

Scatter plot for IRT vs Cortisol

plot(equine_data$irt, equine_data$cortisol,
     main = "Scatterplot: IRT vs Cortisol",
     xlab = "IRT (°C)",
     ylab = "Cortisol (mcg/dl)",
     col = "blue", pch = 16)

Shapiro-Wilk test for normality

shapiro.test(equine_data$irt)

    Shapiro-Wilk normality test

data:  equine_data$irt
W = 0.95889, p-value = 1.447e-10
shapiro.test(equine_data$cortisol)

    Shapiro-Wilk normality test

data:  equine_data$cortisol
W = 0.96362, p-value = 9.245e-10

Spearman correlation

cor.test(equine_data$irt, equine_data$cortisol, method = "spearman")

    Spearman's rank correlation rho

data:  equine_data$irt and equine_data$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.1314346 

Scatterplot for IRT vs Cortisol using ggplot2

library(ggplot2)
Warning: package 'ggplot2' was built under R version 4.4.2
ggplot(equine_data, aes(x = irt, y = cortisol)) +
  geom_point(color = "blue", size = 2) +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  labs(title = "Scatterplot: IRT vs Cortisol",
       x = "IRT (°C)",
       y = "Cortisol (mcg/dl)") +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

Calculate the differences

diff_comp <- eqsub_data$comp2 - eqsub_data$comp

Shapiro-Wilk test for normality of differences

shapiro.test(diff_comp)

    Shapiro-Wilk normality test

data:  diff_comp
W = 0.84738, p-value < 2.2e-16

Perform Wilcoxon Signed-Rank Test

wilcox.test(eqsub_data$comp, eqsub_data$comp2, paired = TRUE, alternative = "two.sided")

    Wilcoxon signed rank test with continuity correction

data:  eqsub_data$comp and eqsub_data$comp2
V = 124251, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0

###MEDIAN

median_before <- median(eqsub_data$comp)
median_after <- median(eqsub_data$comp2)
median_before
[1] 35.04
median_after
[1] 29.99

###BOXPLOT RESULT

library(ggplot2)

# Scatter plot for compliance time before and after spray
ggplot(eqsub_data, aes(x = comp, y = comp2)) +
  geom_point(color = "blue") +
  #geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
  labs(title = "Compliance Time: Before vs After Spray",
       x = "Compliance Time Before Spray (Seconds)",
       y = "Compliance Time After Spray (Seconds)") +
  theme_minimal()

Scatterplots for compliance time vs independent variables

pairs(equine_data[, c("comp", "weight", "cortisol", "BPM", "irt", "air")], 
      main = "Scatterplots of Compliance Time with Other Variables")

Correlation matrix

cor(equine_data[, c("comp", "weight", "cortisol", "BPM", "irt", "air")])
                 comp       weight     cortisol          BPM          irt
comp      1.000000000  0.009205350 -0.009849812  0.714842827 -0.037368697
weight    0.009205350  1.000000000  0.020371404 -0.003608681 -0.056144259
cortisol -0.009849812  0.020371404  1.000000000  0.006528990  0.128601080
BPM       0.714842827 -0.003608681  0.006528990  1.000000000 -0.008652045
irt      -0.037368697 -0.056144259  0.128601080 -0.008652045  1.000000000
air      -0.051747939  0.083481211  0.023078942 -0.039173303 -0.057830148
                 air
comp     -0.05174794
weight    0.08348121
cortisol  0.02307894
BPM      -0.03917330
irt      -0.05783015
air       1.00000000

Linear regression model

model <- lm(comp ~ weight + BPM + cortisol + irt + air, data = equine_data)

Model summary

summary(model)

Call:
lm(formula = comp ~ weight + BPM + cortisol + irt + air, data = equine_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.88796 -0.65339  0.00537  0.58312  2.79258 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.910e+01  3.641e+00 -10.739   <2e-16 ***
weight       1.325e-03  3.361e-03   0.394    0.694    
BPM          5.048e-01  2.228e-02  22.661   <2e-16 ***
cortisol    -8.361e-04  2.602e-03  -0.321    0.748    
irt         -3.078e-02  3.191e-02  -0.964    0.335    
air         -2.256e-02  2.708e-02  -0.833    0.405    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.003 on 492 degrees of freedom
Multiple R-squared:  0.5129,    Adjusted R-squared:  0.5079 
F-statistic: 103.6 on 5 and 492 DF,  p-value: < 2.2e-16

Stepwise regression for optimal variable selection

stepwise_model <- step(model, direction = "both")
Start:  AIC=9.28
comp ~ weight + BPM + cortisol + irt + air

           Df Sum of Sq     RSS    AIC
- cortisol  1      0.10  495.39   7.38
- weight    1      0.16  495.44   7.44
- air       1      0.70  495.99   7.98
- irt       1      0.94  496.22   8.22
<none>                   495.29   9.28
- BPM       1    516.94 1012.23 363.24

Step:  AIC=7.38
comp ~ weight + BPM + irt + air

           Df Sum of Sq     RSS    AIC
- weight    1      0.15  495.54   5.54
- air       1      0.71  496.11   6.10
- irt       1      1.04  496.43   6.43
<none>                   495.39   7.38
+ cortisol  1      0.10  495.29   9.28
- BPM       1    516.85 1012.25 361.24

Step:  AIC=5.54
comp ~ BPM + irt + air

           Df Sum of Sq     RSS    AIC
- air       1      0.67  496.21   4.21
- irt       1      1.08  496.62   4.62
<none>                   495.54   5.54
+ weight    1      0.15  495.39   7.38
+ cortisol  1      0.10  495.44   7.44
- BPM       1    516.84 1012.38 359.31

Step:  AIC=4.21
comp ~ BPM + irt

           Df Sum of Sq     RSS    AIC
- irt       1      0.99  497.20   3.20
<none>                   496.21   4.21
+ air       1      0.67  495.54   5.54
+ cortisol  1      0.11  496.09   6.09
+ weight    1      0.10  496.11   6.10
- BPM       1    519.14 1015.35 358.77

Step:  AIC=3.2
comp ~ BPM

           Df Sum of Sq     RSS    AIC
<none>                   497.20   3.20
+ irt       1      0.99  496.21   4.21
+ air       1      0.57  496.62   4.62
+ cortisol  1      0.21  496.98   4.98
+ weight    1      0.14  497.06   5.06
- BPM       1    519.57 1016.77 357.46

Summary of the stepwise regression model

summary(stepwise_model)

Call:
lm(formula = comp ~ BPM, data = equine_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.81905 -0.65308 -0.01023  0.59085  2.83181 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -40.79081    3.33482  -12.23   <2e-16 ***
BPM           0.50564    0.02221   22.77   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.001 on 496 degrees of freedom
Multiple R-squared:  0.511, Adjusted R-squared:   0.51 
F-statistic: 518.3 on 1 and 496 DF,  p-value: < 2.2e-16

Scatterplot with regression line

plot(equine_data$BPM, equine_data$comp, 
     main = "Compliance Time vs BPM",
     xlab = "BPM (Beats Per Minute)", 
     ylab = "Compliance Time (Seconds)", 
     pch = 19, col = "blue")

Add regression line