equine_data <- read.csv("D:\\LECTURES\\Research Method\\MAIN ASSESSMENT\\equine.csv")NEW MAIN ASSESSMENT
Load equine.csv
Load eqsub.csv
eqsub_data <- read.csv("D:\\LECTURES\\Research Method\\MAIN ASSESSMENT\\eqsub.csv")Display the first few rows of both datasets
head(equine_data) ID sex comp weight irt air cortisol BPM
1 Eq001 Female 37.17081 74.69050 37.73957 23.37377 64.11183 153.2263
2 Eq002 Female 34.54973 73.41627 35.68413 21.42088 73.68676 149.6390
3 Eq003 Female 36.32861 71.80837 34.79854 20.05114 54.43466 149.4406
4 Eq004 Male 35.33881 104.62985 36.15973 21.64319 86.32615 150.2971
5 Eq005 Female 37.39799 67.13098 33.61477 21.76143 107.97796 149.0318
6 Eq006 Male 33.54668 110.49396 36.38343 20.85200 108.86475 146.8415
head(eqsub_data) ID sex comp comp2
1 EQ001 Female 37.17 31.50
2 EQ002 Female 34.55 29.63
3 EQ003 Female 36.33 30.80
4 EQ004 Male 35.34 30.14
5 EQ005 Female 37.40 31.66
6 EQ006 Male 33.55 28.57
Check the structure of the datasets
str(equine_data)'data.frame': 498 obs. of 8 variables:
$ ID : chr "Eq001" "Eq002" "Eq003" "Eq004" ...
$ sex : chr "Female" "Female" "Female" "Male" ...
$ comp : num 37.2 34.5 36.3 35.3 37.4 ...
$ weight : num 74.7 73.4 71.8 104.6 67.1 ...
$ irt : num 37.7 35.7 34.8 36.2 33.6 ...
$ air : num 23.4 21.4 20.1 21.6 21.8 ...
$ cortisol: num 64.1 73.7 54.4 86.3 108 ...
$ BPM : num 153 150 149 150 149 ...
str(eqsub_data)'data.frame': 498 obs. of 4 variables:
$ ID : chr "EQ001" "EQ002" "EQ003" "EQ004" ...
$ sex : chr "Female" "Female" "Female" "Male" ...
$ comp : num 37.2 34.5 36.3 35.3 37.4 ...
$ comp2: num 31.5 29.6 30.8 30.1 31.7 ...
Get a summary of the datasets
summary(equine_data) ID sex comp weight
Length:498 Length:498 Min. :30.78 Min. : 65.10
Class :character Class :character 1st Qu.:34.16 1st Qu.: 75.67
Mode :character Mode :character Median :35.04 Median : 87.82
Mean :35.12 Mean : 87.93
3rd Qu.:36.05 3rd Qu.:100.34
Max. :40.08 Max. :110.94
irt air cortisol BPM
Min. :33.00 Min. :20.01 Min. : 50.03 Min. :144.6
1st Qu.:34.42 1st Qu.:21.54 1st Qu.: 67.27 1st Qu.:148.9
Median :35.43 Median :23.11 Median : 82.43 Median :150.1
Mean :35.54 Mean :23.02 Mean : 82.00 Mean :150.1
3rd Qu.:36.71 3rd Qu.:24.35 3rd Qu.: 95.96 3rd Qu.:151.3
Max. :38.00 Max. :25.99 Max. :112.45 Max. :156.8
summary(eqsub_data) ID sex comp comp2
Length:498 Length:498 Min. :30.78 Min. :27.37
Class :character Class :character 1st Qu.:34.16 1st Qu.:29.23
Mode :character Mode :character Median :35.04 Median :29.99
Mean :35.12 Mean :29.94
3rd Qu.:36.05 3rd Qu.:30.65
Max. :40.08 Max. :32.81
###Histogram for Compliance Time (comp)
# Histogram for compliance time
hist(equine_data$comp,
main = "Distribution of Compliance Time",
xlab = "Compliance Time (Seconds)",
col = "lightblue",
border = "black")###Boxplot for Cortisol Levels by Sex
# Boxplot for cortisol levels by sex
boxplot(cortisol ~ sex,
data = equine_data,
main = "Cortisol Levels by Sex",
xlab = "Sex",
ylab = "Cortisol (mcg/dl)",
col = c("pink", "lightblue"))###Scatter plot for irt vs cortisol
# Scatterplot for irt vs cortisol
plot(equine_data$irt, equine_data$cortisol,
main = "Scatterplot: IRT vs Cortisol",
xlab = "IRT (°C)",
ylab = "Cortisol (mcg/dl)",
col = "blue",
pch = 19)###Scatter plot for weight vs BPM
# Scatterplot for weight vs BPM
plot(equine_data$weight, equine_data$BPM,
main = "Scatterplot: Weight vs BPM",
xlab = "Weight (Kg)",
ylab = "BPM (Beats per Minute)",
col = "green",
pch = 19)###Scatter plot for Compliance Time Before and After Spray
# Scatter plot for compliance time before and after spray
plot(eqsub_data$comp, eqsub_data$comp2,
main = "Scatterplot: Compliance Time Before vs After Spray",
xlab = "Compliance Time Before Spray (Seconds)",
ylab = "Compliance Time After Spray (Seconds)",
col = "blue",
pch = 19)
# Add a diagonal line for reference
abline(a = 0, b = 1, col = "red", lty = 2)Scatter plot for IRT vs Cortisol
plot(equine_data$irt, equine_data$cortisol,
main = "Scatterplot: IRT vs Cortisol",
xlab = "IRT (°C)",
ylab = "Cortisol (mcg/dl)",
col = "blue", pch = 16)Shapiro-Wilk test for normality
shapiro.test(equine_data$irt)
Shapiro-Wilk normality test
data: equine_data$irt
W = 0.95889, p-value = 1.447e-10
shapiro.test(equine_data$cortisol)
Shapiro-Wilk normality test
data: equine_data$cortisol
W = 0.96362, p-value = 9.245e-10
Spearman correlation
cor.test(equine_data$irt, equine_data$cortisol, method = "spearman")
Spearman's rank correlation rho
data: equine_data$irt and equine_data$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.1314346
Scatterplot for IRT vs Cortisol using ggplot2
library(ggplot2)Warning: package 'ggplot2' was built under R version 4.4.2
ggplot(equine_data, aes(x = irt, y = cortisol)) +
geom_point(color = "blue", size = 2) +
geom_smooth(method = "lm", color = "red", se = FALSE) +
labs(title = "Scatterplot: IRT vs Cortisol",
x = "IRT (°C)",
y = "Cortisol (mcg/dl)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
Calculate the differences
diff_comp <- eqsub_data$comp2 - eqsub_data$compShapiro-Wilk test for normality of differences
shapiro.test(diff_comp)
Shapiro-Wilk normality test
data: diff_comp
W = 0.84738, p-value < 2.2e-16
Perform Wilcoxon Signed-Rank Test
wilcox.test(eqsub_data$comp, eqsub_data$comp2, paired = TRUE, alternative = "two.sided")
Wilcoxon signed rank test with continuity correction
data: eqsub_data$comp and eqsub_data$comp2
V = 124251, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
###MEDIAN
median_before <- median(eqsub_data$comp)
median_after <- median(eqsub_data$comp2)
median_before[1] 35.04
median_after[1] 29.99
###BOXPLOT RESULT
library(ggplot2)
# Scatter plot for compliance time before and after spray
ggplot(eqsub_data, aes(x = comp, y = comp2)) +
geom_point(color = "blue") +
#geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
labs(title = "Compliance Time: Before vs After Spray",
x = "Compliance Time Before Spray (Seconds)",
y = "Compliance Time After Spray (Seconds)") +
theme_minimal()Scatterplots for compliance time vs independent variables
pairs(equine_data[, c("comp", "weight", "cortisol", "BPM", "irt", "air")],
main = "Scatterplots of Compliance Time with Other Variables")Correlation matrix
cor(equine_data[, c("comp", "weight", "cortisol", "BPM", "irt", "air")]) comp weight cortisol BPM irt
comp 1.000000000 0.009205350 -0.009849812 0.714842827 -0.037368697
weight 0.009205350 1.000000000 0.020371404 -0.003608681 -0.056144259
cortisol -0.009849812 0.020371404 1.000000000 0.006528990 0.128601080
BPM 0.714842827 -0.003608681 0.006528990 1.000000000 -0.008652045
irt -0.037368697 -0.056144259 0.128601080 -0.008652045 1.000000000
air -0.051747939 0.083481211 0.023078942 -0.039173303 -0.057830148
air
comp -0.05174794
weight 0.08348121
cortisol 0.02307894
BPM -0.03917330
irt -0.05783015
air 1.00000000
Linear regression model
model <- lm(comp ~ weight + BPM + cortisol + irt + air, data = equine_data)Model summary
summary(model)
Call:
lm(formula = comp ~ weight + BPM + cortisol + irt + air, data = equine_data)
Residuals:
Min 1Q Median 3Q Max
-2.88796 -0.65339 0.00537 0.58312 2.79258
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.910e+01 3.641e+00 -10.739 <2e-16 ***
weight 1.325e-03 3.361e-03 0.394 0.694
BPM 5.048e-01 2.228e-02 22.661 <2e-16 ***
cortisol -8.361e-04 2.602e-03 -0.321 0.748
irt -3.078e-02 3.191e-02 -0.964 0.335
air -2.256e-02 2.708e-02 -0.833 0.405
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.003 on 492 degrees of freedom
Multiple R-squared: 0.5129, Adjusted R-squared: 0.5079
F-statistic: 103.6 on 5 and 492 DF, p-value: < 2.2e-16
Stepwise regression for optimal variable selection
stepwise_model <- step(model, direction = "both")Start: AIC=9.28
comp ~ weight + BPM + cortisol + irt + air
Df Sum of Sq RSS AIC
- cortisol 1 0.10 495.39 7.38
- weight 1 0.16 495.44 7.44
- air 1 0.70 495.99 7.98
- irt 1 0.94 496.22 8.22
<none> 495.29 9.28
- BPM 1 516.94 1012.23 363.24
Step: AIC=7.38
comp ~ weight + BPM + irt + air
Df Sum of Sq RSS AIC
- weight 1 0.15 495.54 5.54
- air 1 0.71 496.11 6.10
- irt 1 1.04 496.43 6.43
<none> 495.39 7.38
+ cortisol 1 0.10 495.29 9.28
- BPM 1 516.85 1012.25 361.24
Step: AIC=5.54
comp ~ BPM + irt + air
Df Sum of Sq RSS AIC
- air 1 0.67 496.21 4.21
- irt 1 1.08 496.62 4.62
<none> 495.54 5.54
+ weight 1 0.15 495.39 7.38
+ cortisol 1 0.10 495.44 7.44
- BPM 1 516.84 1012.38 359.31
Step: AIC=4.21
comp ~ BPM + irt
Df Sum of Sq RSS AIC
- irt 1 0.99 497.20 3.20
<none> 496.21 4.21
+ air 1 0.67 495.54 5.54
+ cortisol 1 0.11 496.09 6.09
+ weight 1 0.10 496.11 6.10
- BPM 1 519.14 1015.35 358.77
Step: AIC=3.2
comp ~ BPM
Df Sum of Sq RSS AIC
<none> 497.20 3.20
+ irt 1 0.99 496.21 4.21
+ air 1 0.57 496.62 4.62
+ cortisol 1 0.21 496.98 4.98
+ weight 1 0.14 497.06 5.06
- BPM 1 519.57 1016.77 357.46
Summary of the stepwise regression model
summary(stepwise_model)
Call:
lm(formula = comp ~ BPM, data = equine_data)
Residuals:
Min 1Q Median 3Q Max
-2.81905 -0.65308 -0.01023 0.59085 2.83181
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -40.79081 3.33482 -12.23 <2e-16 ***
BPM 0.50564 0.02221 22.77 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.001 on 496 degrees of freedom
Multiple R-squared: 0.511, Adjusted R-squared: 0.51
F-statistic: 518.3 on 1 and 496 DF, p-value: < 2.2e-16
Scatterplot with regression line
plot(equine_data$BPM, equine_data$comp,
main = "Compliance Time vs BPM",
xlab = "BPM (Beats Per Minute)",
ylab = "Compliance Time (Seconds)",
pch = 19, col = "blue")