<-read.table('~/Equine Summative NEW/eqsub1.txt', header = TRUE, sep = "\t")
eqsub <-read.table('~/Equine Summative NEW/Equine.txt', header = TRUE, sep = "\t") equine
Research Summative NEW
Loading Data
1. Is there a Correlation Between IRT and Blood Cortisol Levels?
Testing for Normality
# Based on a normality testing, this question requires each variable to be tested separately
# Perform Kolmogorov-Smirnov test of IRT
ks.test(equine$irt, "pnorm", mean = mean(equine$irt), sd = sd(equine$irt))
Asymptotic one-sample Kolmogorov-Smirnov test
data: equine$irt
D = 0.062317, p-value = 0.0418
alternative hypothesis: two-sided
# Perform Kolmogorov-Smirnov test of Cortisol
ks.test(equine$cortisol, "pnorm", mean = mean(equine$cortisol), sd = sd(equine$cortisol))
Asymptotic one-sample Kolmogorov-Smirnov test
data: equine$cortisol
D = 0.061555, p-value = 0.04593
alternative hypothesis: two-sided
# P < 0.05 therefore the data is not normally distributed
Results
cor.test(equine$irt, equine$cortisol, method = "spearman")
Spearman's rank correlation rho
data: equine$irt and equine$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.1314346
# P < 0.05 and r < 0.20 therefore there is a significant weak correlation between IRT and blood cortisol levels.
Scatter Plot
library(ggplot2)
# Call Variables
<- equine$irt
X <- equine$cortisol
Y
# Plot
ggplot(equine, aes(x = irt, y = cortisol)) +
geom_point(color = "black", size = 1) +
# Labeling Axis' and Titles
labs(title = "Correlation between IRT and Cortisol Levels",
x = "Thermographic Eye Temperature (°C)",
y = "Blood Cortisol Levels (mcg/dl)")
2. Does Calming Spray have an effect on compliance time, if so what is it?
Testing for Normality
# Based on a normality testing, this question requires the residuals to be tested
# Call Variables
<- eqsub$comp
X <- eqsub$comp2
Y
# Fit linear model
<- lm(X ~ Y)
model
# Extract residuals
<- residuals(model)
residuals
# Perform Kolmogorov-Smirnov test
ks.test(residuals, "pnorm", mean = mean(residuals), sd = sd(residuals))
Warning in ks.test.default(residuals, "pnorm", mean = mean(residuals), sd =
sd(residuals)): ties should not be present for the one-sample
Kolmogorov-Smirnov test
Asymptotic one-sample Kolmogorov-Smirnov test
data: residuals
D = 0.21835, p-value < 2.2e-16
alternative hypothesis: two-sided
# P < 0.05 therefore the data is not normally distributed
Results
# Non-normal distribution -> Wilcoxon Signed-Rank Test
<- wilcox.test(X, Y, paired = TRUE)
wilcox_test print(wilcox_test)
Wilcoxon signed rank test with continuity correction
data: X and Y
V = 124251, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
# P < 0.05 therefore there is a significant difference
Box Plot
# Sample Data
<- data.frame(
df comp = eqsub$comp, # Compliance time before spray
comp2 = eqsub$comp2 # Compliance time after spray
)
# Load necessary libraries
library(ggplot2)
library(tidyr)
# Reshape data to long format
<- df %>%
df_long pivot_longer(cols = c(comp, comp2), names_to = "Variable", values_to = "Value")
# Create the box plot
ggplot(df_long, aes(x = Variable, y = Value, fill = Variable)) +
geom_boxplot() +
# Labeling Axis and Titles
labs(title = "The Difference in Compliance Time When Applying a Calming Spray",
x = "Treatment",
y = "Compliance Time (s)") +
# Add Colour to Variables
theme_minimal() +
scale_fill_manual(values = c("comp" = "skyblue", "comp2" = "lightpink"), name = "Treatment")
3. Is it possible to predict compliance time?
Testing normality
# Based on a normality testing, this question requires each variable to be tested separately
# Perform Kolmogorov-Smirnov test of weight
ks.test(equine$weight, "pnorm", mean = mean(equine$weight), sd = sd(equine$weight))
Asymptotic one-sample Kolmogorov-Smirnov test
data: equine$weight
D = 0.085047, p-value = 0.001487
alternative hypothesis: two-sided
# P < 0.05 therefore the data is not normally distributed
# Perform Kolmogorov-Smirnov test of IRT
ks.test(equine$irt, "pnorm", mean = mean(equine$irt), sd = sd(equine$irt))
Asymptotic one-sample Kolmogorov-Smirnov test
data: equine$irt
D = 0.062317, p-value = 0.0418
alternative hypothesis: two-sided
# P < 0.05 therefore the data is not normally distributed
# Perform Kolmogorov-Smirnov test of Cortisol
ks.test(equine$cortisol, "pnorm", mean = mean(equine$cortisol), sd = sd(equine$cortisol))
Asymptotic one-sample Kolmogorov-Smirnov test
data: equine$cortisol
D = 0.061555, p-value = 0.04593
alternative hypothesis: two-sided
# P < 0.05 therefore the data is not normally distributed
# Perform Kolmogorov-Smirnov test of Air
ks.test(equine$air, "pnorm", mean = mean(equine$air), sd = sd(equine$air))
Asymptotic one-sample Kolmogorov-Smirnov test
data: equine$air
D = 0.068116, p-value = 0.01968
alternative hypothesis: two-sided
# P < 0.05 therefore the data is not normally distributed
# Perform Kolmogorov-Smirnov test of BPM
ks.test(equine$BPM, "pnorm", mean = mean(equine$BPM), sd = sd(equine$BPM))
Asymptotic one-sample Kolmogorov-Smirnov test
data: equine$BPM
D = 0.031659, p-value = 0.7004
alternative hypothesis: two-sided
# P > 0.05 therefore the data is normally distributed
# Perform Kolmogorov-Smirnov test of compliacne time
ks.test(equine$comp, "pnorm", mean = mean(equine$comp), sd = sd(equine$comp))
Asymptotic one-sample Kolmogorov-Smirnov test
data: equine$comp
D = 0.03136, p-value = 0.7115
alternative hypothesis: two-sided
# P > 0.05 therefore the data is normally distributed
Results
# Fit a linear regression model
<- lm(comp ~ weight + irt + air + cortisol + BPM, data = equine)
model
# View model summary
summary(model)
Call:
lm(formula = comp ~ weight + irt + air + cortisol + BPM, data = equine)
Residuals:
Min 1Q Median 3Q Max
-2.88796 -0.65339 0.00537 0.58312 2.79258
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.910e+01 3.641e+00 -10.739 <2e-16 ***
weight 1.325e-03 3.361e-03 0.394 0.694
irt -3.078e-02 3.191e-02 -0.964 0.335
air -2.256e-02 2.708e-02 -0.833 0.405
cortisol -8.361e-04 2.602e-03 -0.321 0.748
BPM 5.048e-01 2.228e-02 22.661 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.003 on 492 degrees of freedom
Multiple R-squared: 0.5129, Adjusted R-squared: 0.5079
F-statistic: 103.6 on 5 and 492 DF, p-value: < 2.2e-16
# Test all variables via Anova despite not all variables being normally distributed as parametric testing (Anova) is suitable due to the large sample size.
anova(model)
Analysis of Variance Table
Response: comp
Df Sum Sq Mean Sq F value Pr(>F)
weight 1 0.09 0.09 0.0856 0.76999
irt 1 1.39 1.39 1.3760 0.24135
air 1 3.05 3.05 3.0276 0.08248 .
cortisol 1 0.01 0.01 0.0140 0.90599
BPM 1 516.94 516.94 513.5131 < 2e-16 ***
Residuals 492 495.29 1.01
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Correlation Between comp and BPM
# P < 0.05 for BPM and cortisol therefore the data is normally distributed.
Results
# Pearson correlation test
cor.test(equine$comp, equine$BPM, method = c ("pearson"))
Pearson's product-moment correlation
data: equine$comp and equine$BPM
t = 22.767, df = 496, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.6689964 0.7552703
sample estimates:
cor
0.7148428
# P < 2.2e-16 and r= 0.71 therefore there is a significantly strong correlation.
<- equine
data
# Fit a linear regression model
<- lm(comp ~ BPM, data = data)
model
# View model summary
summary(model)
Call:
lm(formula = comp ~ BPM, data = data)
Residuals:
Min 1Q Median 3Q Max
-2.81905 -0.65308 -0.01023 0.59085 2.83181
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -40.79081 3.33482 -12.23 <2e-16 ***
BPM 0.50564 0.02221 22.77 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.001 on 496 degrees of freedom
Multiple R-squared: 0.511, Adjusted R-squared: 0.51
F-statistic: 518.3 on 1 and 496 DF, p-value: < 2.2e-16
# P < 2.2e-16 and R2= 0.51 therefore the model explained 51% of the variance, with BPM significantly contributing to the prediction
Scatter Plot
BPM
# Load ggplot2 library
library(ggplot2)
# Create scatter plot with regression line
ggplot(data, aes(x = comp, y = BPM)) +
# Add Scatter points
geom_point(color = "black", size = 1) +
# Add Regression line
geom_smooth(method = "lm", col = "blue") +
# Add title and Label Axis
labs(title = "Scatterplot with Linear Regression Line",
x = "Compliance time (s)",
y = "Heart Rate (BPM)") +
theme_minimal()
`geom_smooth()` using formula = 'y ~ x'