equine_data <- read.csv("D:\\LECTURES\\Research Method\\MAIN ASSESSMENT\\equine.csv")EQUINE ANALYSIS KAMRAN BIN LATEEF(N1340143)
LOADING DATA
Load equine.csv
Load eqsub.csv
eqsub_data <- read.csv("D:\\LECTURES\\Research Method\\MAIN ASSESSMENT\\eqsub.csv")Display the first few rows of both datasets
head(equine_data) ID sex comp weight irt air cortisol BPM
1 Eq001 Female 37.17081 74.69050 37.73957 23.37377 64.11183 153.2263
2 Eq002 Female 34.54973 73.41627 35.68413 21.42088 73.68676 149.6390
3 Eq003 Female 36.32861 71.80837 34.79854 20.05114 54.43466 149.4406
4 Eq004 Male 35.33881 104.62985 36.15973 21.64319 86.32615 150.2971
5 Eq005 Female 37.39799 67.13098 33.61477 21.76143 107.97796 149.0318
6 Eq006 Male 33.54668 110.49396 36.38343 20.85200 108.86475 146.8415
head(eqsub_data) ID sex comp comp2
1 EQ001 Female 37.17 31.50
2 EQ002 Female 34.55 29.63
3 EQ003 Female 36.33 30.80
4 EQ004 Male 35.34 30.14
5 EQ005 Female 37.40 31.66
6 EQ006 Male 33.55 28.57
Check the structure of the datasets
str(equine_data)'data.frame': 498 obs. of 8 variables:
$ ID : chr "Eq001" "Eq002" "Eq003" "Eq004" ...
$ sex : chr "Female" "Female" "Female" "Male" ...
$ comp : num 37.2 34.5 36.3 35.3 37.4 ...
$ weight : num 74.7 73.4 71.8 104.6 67.1 ...
$ irt : num 37.7 35.7 34.8 36.2 33.6 ...
$ air : num 23.4 21.4 20.1 21.6 21.8 ...
$ cortisol: num 64.1 73.7 54.4 86.3 108 ...
$ BPM : num 153 150 149 150 149 ...
str(eqsub_data)'data.frame': 498 obs. of 4 variables:
$ ID : chr "EQ001" "EQ002" "EQ003" "EQ004" ...
$ sex : chr "Female" "Female" "Female" "Male" ...
$ comp : num 37.2 34.5 36.3 35.3 37.4 ...
$ comp2: num 31.5 29.6 30.8 30.1 31.7 ...
Get a summary of the datasets
summary(equine_data) ID sex comp weight
Length:498 Length:498 Min. :30.78 Min. : 65.10
Class :character Class :character 1st Qu.:34.16 1st Qu.: 75.67
Mode :character Mode :character Median :35.04 Median : 87.82
Mean :35.12 Mean : 87.93
3rd Qu.:36.05 3rd Qu.:100.34
Max. :40.08 Max. :110.94
irt air cortisol BPM
Min. :33.00 Min. :20.01 Min. : 50.03 Min. :144.6
1st Qu.:34.42 1st Qu.:21.54 1st Qu.: 67.27 1st Qu.:148.9
Median :35.43 Median :23.11 Median : 82.43 Median :150.1
Mean :35.54 Mean :23.02 Mean : 82.00 Mean :150.1
3rd Qu.:36.71 3rd Qu.:24.35 3rd Qu.: 95.96 3rd Qu.:151.3
Max. :38.00 Max. :25.99 Max. :112.45 Max. :156.8
summary(eqsub_data) ID sex comp comp2
Length:498 Length:498 Min. :30.78 Min. :27.37
Class :character Class :character 1st Qu.:34.16 1st Qu.:29.23
Mode :character Mode :character Median :35.04 Median :29.99
Mean :35.12 Mean :29.94
3rd Qu.:36.05 3rd Qu.:30.65
Max. :40.08 Max. :32.81
###Histogram for Compliance Time (comp)
# Histogram for compliance time
hist(equine_data$comp,
main = "Distribution of Compliance Time",
xlab = "Compliance Time (Seconds)",
col = "lightblue",
border = "black")###Boxplot for Cortisol Levels by Sex
# Boxplot for cortisol levels by sex
boxplot(cortisol ~ sex,
data = equine_data,
main = "Cortisol Levels by Sex",
xlab = "Sex",
ylab = "Cortisol (mcg/dl)",
col = c("pink", "lightblue"))###Scatter plot for irt vs cortisol
# Scatterplot for irt vs cortisol
plot(equine_data$irt, equine_data$cortisol,
main = "Scatterplot: IRT vs Cortisol",
xlab = "IRT (°C)",
ylab = "Cortisol (mcg/dl)",
col = "blue",
pch = 19)###Scatter plot for weight vs BPM
# Scatterplot for weight vs BPM
plot(equine_data$weight, equine_data$BPM,
main = "Scatterplot: Weight vs BPM",
xlab = "Weight (Kg)",
ylab = "BPM (Beats per Minute)",
col = "green",
pch = 19)###Scatter plot for Compliance Time Before and After Spray
# Scatter plot for compliance time before and after spray
plot(eqsub_data$comp, eqsub_data$comp2,
main = "Scatterplot: Compliance Time Before vs After Spray",
xlab = "Compliance Time Before Spray (Seconds)",
ylab = "Compliance Time After Spray (Seconds)",
col = "blue",
pch = 19)
# Add a diagonal line for reference
abline(a = 0, b = 1, col = "red", lty = 2)Q1 Is there a correlation between the variables irt and cortisol?
Scatter plot for IRT vs Cortisol
plot(equine_data$irt, equine_data$cortisol,
main = "Scatterplot: IRT vs Cortisol",
xlab = "IRT (°C)",
ylab = "Cortisol (mcg/dl)",
col = "blue", pch = 16)Shapiro-Wilk test for normality
shapiro.test(equine_data$irt)
Shapiro-Wilk normality test
data: equine_data$irt
W = 0.95889, p-value = 1.447e-10
shapiro.test(equine_data$cortisol)
Shapiro-Wilk normality test
data: equine_data$cortisol
W = 0.96362, p-value = 9.245e-10
Spearman correlation
cor.test(equine_data$irt, equine_data$cortisol, method = "spearman")
Spearman's rank correlation rho
data: equine_data$irt and equine_data$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.1314346
Scatterplot for IRT vs Cortisol using ggplot2
library(ggplot2)Warning: package 'ggplot2' was built under R version 4.4.2
ggplot(equine_data, aes(x = irt, y = cortisol)) +
geom_point(color = "blue", size = 2) +
geom_smooth(method = "lm", color = "red", se = FALSE) +
labs(title = "Scatterplot: IRT vs Cortisol",
x = "IRT (°C)",
y = "Cortisol (mcg/dl)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
Q2 Does the application of the calming spray have an effect on compliance time?
Calculate the differences
diff_comp <- eqsub_data$comp2 - eqsub_data$compShapiro-Wilk test for normality of differences
shapiro.test(diff_comp)
Shapiro-Wilk normality test
data: diff_comp
W = 0.84738, p-value < 2.2e-16
Perform Wilcoxon Signed-Rank Test
wilcox.test(eqsub_data$comp, eqsub_data$comp2, paired = TRUE, alternative = "two.sided")
Wilcoxon signed rank test with continuity correction
data: eqsub_data$comp and eqsub_data$comp2
V = 124251, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
###MEDIAN
median_before <- median(eqsub_data$comp)
median_after <- median(eqsub_data$comp2)
median_before[1] 35.04
median_after[1] 29.99
###SCATTER PLOT RESULT
library(ggplot2)
# Scatter plot for compliance time before and after spray
ggplot(eqsub_data, aes(x = comp, y = comp2)) +
geom_point(color = "blue") +
geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "red") +
labs(title = "Compliance Time: Before vs After Spray",
x = "Compliance Time Before Spray (Seconds)",
y = "Compliance Time After Spray (Seconds)") +
theme_minimal()Q3 Is it possible to predict compliance time?
Scatter plot for weight vs compliance time
library(ggplot2)
ggplot(equine_data, aes(x = weight, y = comp)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", color = "red", se = TRUE) +
labs(title = "Compliance Time vs Weight",
x = "Weight (Kg)",
y = "Compliance Time (Seconds)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
Scatter plot with regression line
library(ggplot2)
ggplot(equine_data, aes(x = BPM, y = comp)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", color = "red", se = FALSE) +
labs(title = "Compliance Time vs BPM",
x = "BPM (Beats Per Minute)",
y = "Compliance Time (Seconds)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
Scatter plot with regression line
library(ggplot2)
ggplot(equine_data, aes(x = irt, y = comp)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", color = "red", se = FALSE) +
labs(title = "Compliance Time vs IRT",
x = "IRT (°C)",
y = "Compliance Time (Seconds)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
Scatter plot with regression line
library(ggplot2)
ggplot(equine_data, aes(x = air, y = comp)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", color = "red", se = FALSE) +
labs(title = "Compliance Time vs Air Temperature",
x = "Air Temperature (°C)",
y = "Compliance Time (Seconds)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
Scatter plot with regression line
library(ggplot2)
ggplot(equine_data, aes(x = cortisol, y = comp)) +
geom_point(color = "blue") +
geom_smooth(method = "lm", color = "red", se = FALSE) +
labs(title = "Compliance Time vs Cortisol Levels",
x = "Cortisol Levels (mcg/dl)",
y = "Compliance Time (Seconds)") +
theme_minimal()`geom_smooth()` using formula = 'y ~ x'
Fit a linear regression model
lm_bpm <- lm(comp ~ BPM, data = equine_data)Display the summary of the regression
summary(lm_bpm)
Call:
lm(formula = comp ~ BPM, data = equine_data)
Residuals:
Min 1Q Median 3Q Max
-2.81905 -0.65308 -0.01023 0.59085 2.83181
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -40.79081 3.33482 -12.23 <2e-16 ***
BPM 0.50564 0.02221 22.77 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.001 on 496 degrees of freedom
Multiple R-squared: 0.511, Adjusted R-squared: 0.51
F-statistic: 518.3 on 1 and 496 DF, p-value: < 2.2e-16
shapiro.test(residuals(lm_bpm))
Shapiro-Wilk normality test
data: residuals(lm_bpm)
W = 0.99741, p-value = 0.6327
Residuals vs Fitted Values Plot
plot(fitted(lm_bpm), residuals(lm_bpm),
main = "Residuals vs Fitted Values",
xlab = "Fitted Values",
ylab = "Residuals",
col = "blue",
pch = 19)
abline(h = 0, col = "red", lty = 2)# Install and load the 'lmtest' package if not already installed
if (!require("lmtest")) install.packages("lmtest")Loading required package: lmtest
Loading required package: zoo
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
library(lmtest)
# Perform Durbin-Watson test
dw_test <- dwtest(lm_bpm)
print(dw_test)
Durbin-Watson test
data: lm_bpm
DW = 2.0934, p-value = 0.8519
alternative hypothesis: true autocorrelation is greater than 0
# Calculate RMSE
rmse <- sqrt(mean(residuals(lm_bpm)^2))
print(paste("RMSE:", rmse))[1] "RMSE: 0.999194284706538"
# Calculate MAE
mae <- mean(abs(residuals(lm_bpm)))
print(paste("MAE:", mae))[1] "MAE: 0.787982898953818"
Scatterplot of Actual vs Predicted Values
plot(equine_data$comp, fitted(lm_bpm),
main = "Actual vs Predicted Compliance Time",
xlab = "Actual Compliance Time (Seconds)",
ylab = "Predicted Compliance Time (Seconds)",
col = "blue", pch = 19)
abline(a = 0, b = 1, col = "red", lty = 2) # Diagonal line for reference