library(tidyverse)
library(ggplot2)
library(ggpubr)
Summative Assessment
LOADING PACKAGES
LOADING EQUINE DATA
# Loading the EQUINE CSV file
<- read.csv("C:\\Users\\user\\OneDrive\\Desktop\\equine.csv")
data
# Display the first few rows of the dataset to confirm it's loaded correctly
head(data)
# Loading the EQSUB CSV file
<- read.csv("C:\\Users\\user\\OneDrive\\Desktop\\eqsub.csv")
data2
# Displaying the first few rows of the dataset to confirm it's loaded correctly
head(data)
EXPLORATORY DATA ANALYSIS
#Structure of the EQUINE dataset
str(data)
'data.frame': 498 obs. of 8 variables:
$ ID : chr "Eq001" "Eq002" "Eq003" "Eq004" ...
$ sex : chr "Female" "Female" "Female" "Male" ...
$ comp : num 37.2 34.5 36.3 35.3 37.4 ...
$ weight : num 74.7 73.4 71.8 104.6 67.1 ...
$ irt : num 37.7 35.7 34.8 36.2 33.6 ...
$ air : num 23.4 21.4 20.1 21.6 21.8 ...
$ cortisol: num 64.1 73.7 54.4 86.3 108 ...
$ BPM : num 153 150 149 150 149 ...
#Summary of the EQUINE dataset
summary(data)
ID sex comp weight
Length:498 Length:498 Min. :30.78 Min. : 65.10
Class :character Class :character 1st Qu.:34.16 1st Qu.: 75.67
Mode :character Mode :character Median :35.04 Median : 87.82
Mean :35.12 Mean : 87.93
3rd Qu.:36.05 3rd Qu.:100.34
Max. :40.08 Max. :110.94
irt air cortisol BPM
Min. :33.00 Min. :20.01 Min. : 50.03 Min. :144.6
1st Qu.:34.42 1st Qu.:21.54 1st Qu.: 67.27 1st Qu.:148.9
Median :35.43 Median :23.11 Median : 82.43 Median :150.1
Mean :35.54 Mean :23.02 Mean : 82.00 Mean :150.1
3rd Qu.:36.71 3rd Qu.:24.35 3rd Qu.: 95.96 3rd Qu.:151.3
Max. :38.00 Max. :25.99 Max. :112.45 Max. :156.8
#Structure of the EQSUB dataset
str(data2)
'data.frame': 498 obs. of 4 variables:
$ ID : chr "EQ001" "EQ002" "EQ003" "EQ004" ...
$ sex : chr "Female" "Female" "Female" "Male" ...
$ comp : num 37.2 34.5 36.3 35.3 37.4 ...
$ comp2: num 31.5 29.6 30.8 30.1 31.7 ...
#Summary of the EQSUB dataset
summary(data2)
ID sex comp comp2
Length:498 Length:498 Min. :30.78 Min. :27.37
Class :character Class :character 1st Qu.:34.16 1st Qu.:29.23
Mode :character Mode :character Median :35.04 Median :29.99
Mean :35.12 Mean :29.94
3rd Qu.:36.05 3rd Qu.:30.65
Max. :40.08 Max. :32.81
#Checking for missing values
colSums(is.na(data))
ID sex comp weight irt air cortisol BPM
0 0 0 0 0 0 0 0
DATA VISUALIZATION
# Histogram for thermographic eye temperature
hist(data$irt, main = "Histogram of Thermographic eye temperature", xlab = "thermographic eye temperature (C)", col = "yellow")
# Histogram for weight
hist(data$weight, main = "Histogram of Weight", xlab = "Weight (Kg)", col = "green")
# Histogram for heart rate
hist(data$BPM, main = "Histogram of Heart rate", xlab = "Heart rate (BPM)", col = "skyblue")
# Histogram for cortisol levels
hist(data$cortisol, main = "Histogram of Cortisol Levels", xlab="Cortisol (mcg/dl)", col = "orange")
# Histogram for comp
hist(data$comp, main = "Histogram of comp", xlab="Comp (s)", col = "violet")
# Histogram for comp2
hist(data2$comp2, main = "Histogram of comp2", xlab="Comp2 (s)", col = "red")
CORTISOL LEVELS AMONG SEXES
# Boxplot for Cortisol Levels by Sex
ggplot(data, aes(x = sex, y = cortisol, fill = sex)) +
geom_boxplot() +
theme_minimal() +
labs(title = "Cortisol Levels by Sex", x = "Sex", y = "Cortisol (mcg/dl)") +
scale_fill_manual(values = c("lightblue", "pink"))
# Performing a one-way ANOVA
<- aov(cortisol ~ sex, data = data)
anova_result
# Displaying the summary of the ANOVA test
summary(anova_result)
Df Sum Sq Mean Sq F value Pr(>F)
sex 1 47 47.17 0.155 0.694
Residuals 496 151425 305.29
THERMOGRAPHIC EYE TEMPERATURE(irt) AMONG SEXES
# Boxplot for irt by Sex
ggplot(data, aes(x = sex, y = irt, fill = sex)) +
geom_boxplot() +
theme_minimal() +
labs(title = " Thermographic eye temperature by Sex", x = "Sex", y = "irt") +
scale_fill_manual(values = c("lightblue", "pink"))
# Performing a one-way ANOVA
<- aov(irt ~ sex, data = data)
anova_result
# Displaying the summary of the ANOVA test
summary(anova_result)
Df Sum Sq Mean Sq F value Pr(>F)
sex 1 1 0.9801 0.481 0.488
Residuals 496 1011 2.0386
THERMOGRAPHIC EYE TEMPERATURE VS CORTISOL LEVEL
# Creating a scatterplot of IRT vs. Cortisol
ggplot(data, aes(x = irt, y = cortisol)) +
geom_point(color = "red", size = 1.5) +
# geom_smooth(method = "lm", color = "black", se = FALSE) + # Add linear regression line
labs(
title = "Scatterplot of IRT vs. Cortisol Levels",
x = "Thermographic Eye Temperature",
y = "Cortisol Levels (mcg/dl)"
+
) theme_minimal()
# Calculating the correlation between IRT and Cortisol
cor.test(data$irt, data$cortisol)
Pearson's product-moment correlation
data: data$irt and data$cortisol
t = 2.8881, df = 496, p-value = 0.004046
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.04119998 0.21404903
sample estimates:
cor
0.1286011
EFFECT OF CALMING SPRAY ON COMPLIANCE TIME
# Creating a scatterplot comparing comp and comp2
ggplot(data2, aes(x = comp, y = comp2)) +
geom_point(color = "red", size = 1.5) +
#geom_smooth(method = "lm", color = "black", se = FALSE) + # Add linear regression line
labs(
title = "Scatterplot of Compliance Time (comp vs. comp2)",
x = "Compliance Time (Before Spray) [Seconds]",
y = "Compliance Time (After Spray) [Seconds]"
+
) theme_minimal()
# Calculating the correlation between comp and comp2
cor.test(data2$comp, data2$comp2)
Pearson's product-moment correlation
data: data2$comp and data2$comp2
t = 137.28, df = 496, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.9846266 0.9891675
sample estimates:
cor
0.987094
PREDICTION OF COMPLIANCE TIME
# Build the linear regression model
<- lm(comp ~ cortisol + irt + BPM + air + weight, data = data)
model
# Summary of the model
summary(model)
Call:
lm(formula = comp ~ cortisol + irt + BPM + air + weight, data = data)
Residuals:
Min 1Q Median 3Q Max
-2.88796 -0.65339 0.00537 0.58312 2.79258
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.910e+01 3.641e+00 -10.739 <2e-16 ***
cortisol -8.361e-04 2.602e-03 -0.321 0.748
irt -3.078e-02 3.191e-02 -0.964 0.335
BPM 5.048e-01 2.228e-02 22.661 <2e-16 ***
air -2.256e-02 2.708e-02 -0.833 0.405
weight 1.325e-03 3.361e-03 0.394 0.694
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.003 on 492 degrees of freedom
Multiple R-squared: 0.5129, Adjusted R-squared: 0.5079
F-statistic: 103.6 on 5 and 492 DF, p-value: < 2.2e-16
RELATIONSHIP BETWEEN HEART RATE(BPM) AND COMPLIANCE TIME
# Create a linear model for BPM and compliance time
<- lm(comp ~ BPM, data = data)
lm_bpm_comp
# Summarize the linear model
summary(lm_bpm_comp)
Call:
lm(formula = comp ~ BPM, data = data)
Residuals:
Min 1Q Median 3Q Max
-2.81905 -0.65308 -0.01023 0.59085 2.83181
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -40.79081 3.33482 -12.23 <2e-16 ***
BPM 0.50564 0.02221 22.77 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.001 on 496 degrees of freedom
Multiple R-squared: 0.511, Adjusted R-squared: 0.51
F-statistic: 518.3 on 1 and 496 DF, p-value: < 2.2e-16
# Visualize the relationship with a scatterplot and regression line
library(ggplot2)
ggplot(data, aes(x = BPM, y = comp)) +
geom_point(color = "blue", size = 2) +
geom_smooth(method = "lm", color = "red", se = FALSE) + # Regression line with confidence interval
labs(
title = "Relationship Between Heart Rate (BPM) and Compliance Time",
x = "Heart Rate (BPM)",
y = "Compliance Time (Seconds)"
+
) theme_minimal() # Simple and clean theme
`geom_smooth()` using formula = 'y ~ x'