Summative Assessment

Author

Inusa Yawuza Musa (N1349229)

LOADING PACKAGES

library(tidyverse)
library(ggplot2)
library(ggpubr)

LOADING EQUINE DATA

# Loading the EQUINE CSV file
data <- read.csv("C:\\Users\\user\\OneDrive\\Desktop\\equine.csv")

# Display the first few rows of the dataset to confirm it's loaded correctly
head(data)



# Loading the EQSUB CSV file
data2 <- read.csv("C:\\Users\\user\\OneDrive\\Desktop\\eqsub.csv")

# Displaying the first few rows of the dataset to confirm it's loaded correctly
head(data)

EXPLORATORY DATA ANALYSIS

#Structure of the EQUINE dataset
str(data)
'data.frame':   498 obs. of  8 variables:
 $ ID      : chr  "Eq001" "Eq002" "Eq003" "Eq004" ...
 $ sex     : chr  "Female" "Female" "Female" "Male" ...
 $ comp    : num  37.2 34.5 36.3 35.3 37.4 ...
 $ weight  : num  74.7 73.4 71.8 104.6 67.1 ...
 $ irt     : num  37.7 35.7 34.8 36.2 33.6 ...
 $ air     : num  23.4 21.4 20.1 21.6 21.8 ...
 $ cortisol: num  64.1 73.7 54.4 86.3 108 ...
 $ BPM     : num  153 150 149 150 149 ...
#Summary of the EQUINE dataset
summary(data)
      ID                sex                 comp           weight      
 Length:498         Length:498         Min.   :30.78   Min.   : 65.10  
 Class :character   Class :character   1st Qu.:34.16   1st Qu.: 75.67  
 Mode  :character   Mode  :character   Median :35.04   Median : 87.82  
                                       Mean   :35.12   Mean   : 87.93  
                                       3rd Qu.:36.05   3rd Qu.:100.34  
                                       Max.   :40.08   Max.   :110.94  
      irt             air           cortisol           BPM       
 Min.   :33.00   Min.   :20.01   Min.   : 50.03   Min.   :144.6  
 1st Qu.:34.42   1st Qu.:21.54   1st Qu.: 67.27   1st Qu.:148.9  
 Median :35.43   Median :23.11   Median : 82.43   Median :150.1  
 Mean   :35.54   Mean   :23.02   Mean   : 82.00   Mean   :150.1  
 3rd Qu.:36.71   3rd Qu.:24.35   3rd Qu.: 95.96   3rd Qu.:151.3  
 Max.   :38.00   Max.   :25.99   Max.   :112.45   Max.   :156.8  
#Structure of the EQSUB dataset
str(data2)
'data.frame':   498 obs. of  4 variables:
 $ ID   : chr  "EQ001" "EQ002" "EQ003" "EQ004" ...
 $ sex  : chr  "Female" "Female" "Female" "Male" ...
 $ comp : num  37.2 34.5 36.3 35.3 37.4 ...
 $ comp2: num  31.5 29.6 30.8 30.1 31.7 ...
#Summary of the EQSUB dataset
summary(data2)
      ID                sex                 comp           comp2      
 Length:498         Length:498         Min.   :30.78   Min.   :27.37  
 Class :character   Class :character   1st Qu.:34.16   1st Qu.:29.23  
 Mode  :character   Mode  :character   Median :35.04   Median :29.99  
                                       Mean   :35.12   Mean   :29.94  
                                       3rd Qu.:36.05   3rd Qu.:30.65  
                                       Max.   :40.08   Max.   :32.81  
#Checking for missing values
colSums(is.na(data))
      ID      sex     comp   weight      irt      air cortisol      BPM 
       0        0        0        0        0        0        0        0 

DATA VISUALIZATION

# Histogram for thermographic eye temperature
hist(data$irt, main = "Histogram of Thermographic eye temperature", xlab = "thermographic eye temperature (C)", col = "yellow")

# Histogram for weight
hist(data$weight, main = "Histogram of Weight", xlab = "Weight (Kg)", col = "green")

# Histogram for heart rate 
hist(data$BPM, main = "Histogram of Heart rate", xlab = "Heart rate (BPM)", col = "skyblue")

# Histogram for cortisol levels
hist(data$cortisol, main = "Histogram of Cortisol Levels", xlab="Cortisol (mcg/dl)", col = "orange")

# Histogram for comp
hist(data$comp, main = "Histogram of comp", xlab="Comp (s)", col = "violet")

# Histogram for comp2
hist(data2$comp2, main = "Histogram of comp2", xlab="Comp2 (s)", col = "red")

CORTISOL LEVELS AMONG SEXES

# Boxplot for Cortisol Levels by Sex
ggplot(data, aes(x = sex, y = cortisol, fill = sex)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = "Cortisol Levels by Sex", x = "Sex", y = "Cortisol (mcg/dl)") +
  scale_fill_manual(values = c("lightblue", "pink")) 

# Performing a one-way ANOVA
anova_result <- aov(cortisol ~ sex, data = data)

# Displaying the summary of the ANOVA test
summary(anova_result)
             Df Sum Sq Mean Sq F value Pr(>F)
sex           1     47   47.17   0.155  0.694
Residuals   496 151425  305.29               

THERMOGRAPHIC EYE TEMPERATURE(irt) AMONG SEXES

# Boxplot for irt by Sex
ggplot(data, aes(x = sex, y = irt, fill = sex)) +
  geom_boxplot() +
  theme_minimal() +
  labs(title = " Thermographic eye temperature by Sex", x = "Sex", y = "irt") +
  scale_fill_manual(values = c("lightblue", "pink")) 

# Performing a one-way ANOVA
anova_result <- aov(irt ~ sex, data = data)

# Displaying the summary of the ANOVA test
summary(anova_result)
             Df Sum Sq Mean Sq F value Pr(>F)
sex           1      1  0.9801   0.481  0.488
Residuals   496   1011  2.0386               

THERMOGRAPHIC EYE TEMPERATURE VS CORTISOL LEVEL

# Creating a scatterplot of IRT vs. Cortisol
ggplot(data, aes(x = irt, y = cortisol)) +
  geom_point(color = "red", size = 1.5) +        
 # geom_smooth(method = "lm", color = "black", se = FALSE) +  # Add linear regression line
  labs(
    title = "Scatterplot of IRT vs. Cortisol Levels",
    x = "Thermographic Eye Temperature",
    y = "Cortisol Levels (mcg/dl)"
  ) +
  theme_minimal()                             

# Calculating the correlation between IRT and Cortisol
cor.test(data$irt, data$cortisol)

    Pearson's product-moment correlation

data:  data$irt and data$cortisol
t = 2.8881, df = 496, p-value = 0.004046
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.04119998 0.21404903
sample estimates:
      cor 
0.1286011 

EFFECT OF CALMING SPRAY ON COMPLIANCE TIME

# Creating a scatterplot comparing comp and comp2
ggplot(data2, aes(x = comp, y = comp2)) +
  geom_point(color = "red", size = 1.5) +       
  #geom_smooth(method = "lm", color = "black", se = FALSE) +  # Add linear regression line
  labs(
    title = "Scatterplot of Compliance Time (comp vs. comp2)",
    x = "Compliance Time (Before Spray) [Seconds]",
    y = "Compliance Time (After Spray) [Seconds]"
  ) +
  theme_minimal()                             

# Calculating the correlation between comp and comp2
cor.test(data2$comp, data2$comp2)

    Pearson's product-moment correlation

data:  data2$comp and data2$comp2
t = 137.28, df = 496, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.9846266 0.9891675
sample estimates:
     cor 
0.987094 

PREDICTION OF COMPLIANCE TIME

# Build the linear regression model
model <- lm(comp ~ cortisol + irt + BPM + air + weight, data = data)

# Summary of the model
summary(model)

Call:
lm(formula = comp ~ cortisol + irt + BPM + air + weight, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.88796 -0.65339  0.00537  0.58312  2.79258 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.910e+01  3.641e+00 -10.739   <2e-16 ***
cortisol    -8.361e-04  2.602e-03  -0.321    0.748    
irt         -3.078e-02  3.191e-02  -0.964    0.335    
BPM          5.048e-01  2.228e-02  22.661   <2e-16 ***
air         -2.256e-02  2.708e-02  -0.833    0.405    
weight       1.325e-03  3.361e-03   0.394    0.694    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.003 on 492 degrees of freedom
Multiple R-squared:  0.5129,    Adjusted R-squared:  0.5079 
F-statistic: 103.6 on 5 and 492 DF,  p-value: < 2.2e-16

RELATIONSHIP BETWEEN HEART RATE(BPM) AND COMPLIANCE TIME

# Create a linear model for BPM and compliance time
lm_bpm_comp <- lm(comp ~ BPM, data = data)

# Summarize the linear model
summary(lm_bpm_comp)

Call:
lm(formula = comp ~ BPM, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.81905 -0.65308 -0.01023  0.59085  2.83181 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -40.79081    3.33482  -12.23   <2e-16 ***
BPM           0.50564    0.02221   22.77   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.001 on 496 degrees of freedom
Multiple R-squared:  0.511, Adjusted R-squared:   0.51 
F-statistic: 518.3 on 1 and 496 DF,  p-value: < 2.2e-16
# Visualize the relationship with a scatterplot and regression line
library(ggplot2)

ggplot(data, aes(x = BPM, y = comp)) +
  geom_point(color = "blue", size = 2) +              
  geom_smooth(method = "lm", color = "red", se = FALSE) + # Regression line with confidence interval
  labs(
    title = "Relationship Between Heart Rate (BPM) and Compliance Time",
    x = "Heart Rate (BPM)",
    y = "Compliance Time (Seconds)"
  ) +
  theme_minimal()                                      # Simple and clean theme
`geom_smooth()` using formula = 'y ~ x'