── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("readr")library("knitr")library("ggplot2")library(dplyr)# Set working directorysetwd("/Users/parisfenna/Downloads")# Load datasetsequine <-read_excel("equine.xlsx")eqsub <-read_excel("eqsub.xlsx")
Data Summaries
Summary of Data
# Create a summary table for numeric columns in the baseline test datasetequine_summary <- equine %>%select(where(is.numeric)) %>%# Select only numeric columnssummarise(across(everything(),list(Mean =~mean(.x, na.rm =TRUE),Median =~median(.x, na.rm =TRUE),Min =~min(.x, na.rm =TRUE),Max =~max(.x, na.rm =TRUE) ) ) )# Transform the output into a clean, readable formatequine_summary <- equine_summary %>%pivot_longer(cols =everything(),names_to =c("Variable", "Statistic"),names_sep ="_",values_to ="Value" ) %>%pivot_wider(names_from ="Statistic",values_from ="Value" )# Display the final summary tableequine_summary
# A tibble: 6 × 5
Variable Mean Median Min Max
<chr> <dbl> <dbl> <dbl> <dbl>
1 comp 35.1 35.0 30.8 40.1
2 weight 87.9 87.8 65.1 111.
3 irt 35.5 35.4 33.0 38.0
4 air 23.0 23.1 20.0 26.0
5 cortisol 82.0 82.4 50.0 112.
6 BPM 150. 150. 145. 157.
#Display table equine_summary %>%kable(col.names =c("Variable", "Mean", "Median", "Min", "Max"),caption ="Summary of Variables of Baseline Trial" )
Summary of Variables of Baseline Trial
Variable
Mean
Median
Min
Max
comp
35.12474
35.04061
30.77585
40.08356
weight
87.92709
87.81711
65.10202
110.94466
irt
35.53856
35.43304
33.00454
37.99978
air
23.01925
23.11390
20.00503
25.98523
cortisol
81.99811
82.42856
50.02600
112.44503
BPM
150.13649
150.13792
144.64474
156.82916
# Create a summary table for numeric columns in the 'eqsub' dataseteqsub_summary <- eqsub %>%select(where(is.numeric)) %>%# Select only numeric columnssummarise(across(everything(),list(Mean =~mean(.x, na.rm =TRUE),Median =~median(.x, na.rm =TRUE),Min =~min(.x, na.rm =TRUE),Max =~max(.x, na.rm =TRUE) ) ) )# Transform the output into a clean, readable formateqsub_summary <- eqsub_summary %>%pivot_longer(cols =everything(),names_to =c("Variable", "Statistic"),names_sep ="_",values_to ="Value" ) %>%pivot_wider(names_from ="Statistic",values_from ="Value" )# Display the summary tableeqsub_summary
# A tibble: 2 × 5
Variable Mean Median Min Max
<chr> <dbl> <dbl> <dbl> <dbl>
1 comp 35.1 35.0 30.8 40.1
2 comp2 29.9 30.0 27.4 32.8
# Display table for eqsub dataseteqsub_summary %>%kable(col.names =c("Variable", "Mean", "Median", "Min", "Max"),caption ="Summary of Variables of Calming Spray Trial" )
Summary of Variables of Calming Spray Trial
Variable
Mean
Median
Min
Max
comp
35.12474
35.04061
30.77585
40.08356
comp2
29.94209
29.98684
27.36538
32.81070
Sex Distribution
# Create table for sex distributiontable(equine$sex)
Female Male
249 249
Standard Deviation of Weight
# Calculate standard deviation of weightsd(equine$weight)
[1] 13.45918
IRT vs Cortisol Analysis
Check for Normality
# Shapiro-Wilk test for IRTshapiro_irt <-shapiro.test(equine$irt)shapiro_irt
Shapiro-Wilk normality test
data: equine$irt
W = 0.95889, p-value = 1.447e-10
# Shapiro-Wilk test for Cortisolshapiro_cortisol <-shapiro.test(equine$cortisol)shapiro_cortisol
Shapiro-Wilk normality test
data: equine$cortisol
W = 0.96362, p-value = 9.245e-10
Spearman's rank correlation rho
data: equine$irt and equine$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
0.1314346
Scatter Plot
# Scatter plot of IRT vs Cortisolggplot(equine, aes(x = irt, y = cortisol)) +geom_point(alpha =0.7) +geom_smooth(method ="lm", color ="blue", fill ="lightblue", se =TRUE) +theme_minimal() +labs(title ="Relationship Between IRT and Cortisol",x ="IRT (°C)",y ="Cortisol (ng/mL)" )
`geom_smooth()` using formula = 'y ~ x'
Effect of Spray on Compliance
Data Summaries
# Summary statistics for comp and comp2summary(eqsub$comp)
Min. 1st Qu. Median Mean 3rd Qu. Max.
30.78 34.16 35.04 35.12 36.05 40.08
summary(eqsub$comp2)
Min. 1st Qu. Median Mean 3rd Qu. Max.
27.37 29.23 29.99 29.94 30.65 32.81
Calculating Means and SD Compliance Time
# Mean and standard deviation for compliance time before the spraymean(eqsub$comp)
[1] 35.12474
sd(eqsub$comp)
[1] 1.430316
# Mean and standard deviation for compliance time after the spraymean(eqsub$comp2)
[1] 29.94209
sd(eqsub$comp2)
[1] 1.046679
Normality Test
# Calculate differences and test normalitydifferences <- eqsub$comp - eqsub$comp2shapiro_result <-shapiro.test(differences)shapiro_result
Shapiro-Wilk normality test
data: differences
W = 0.84738, p-value < 2.2e-16
Paired t-test
data: eqsub$comp and eqsub$comp2
t = 268.31, df = 497, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
5.144700 5.220602
sample estimates:
mean difference
5.182651
Boxplot
# Create long-format data frameCondition <-rep(c("Before Spray", "After Spray"), each =nrow(eqsub))ComplianceTime <-c(eqsub$comp, eqsub$comp2)eqsub_long <-data.frame(Condition, ComplianceTime)# Re-order Condition levelseqsub_long$Condition <-factor(eqsub_long$Condition, levels =c("After Spray", "Before Spray"))# Create boxplotggplot(eqsub_long, aes(x = Condition, y = ComplianceTime, fill = Condition)) +geom_boxplot(alpha =0.7) +theme_minimal() +scale_fill_manual(values =c("Before Spray"="skyblue", "After Spray"="blue")) +labs(title ="Effect of Calming Spray on Compliance Times",x ="Condition",y ="Compliance Time (seconds)",fill ="Condition" )
Predicting Compliance Time
Linear Regression
# Fit linear regression modelmodel <-lm(comp ~ irt + BPM + cortisol, data = equine)summary(model)
Call:
lm(formula = comp ~ irt + BPM + cortisol, data = equine)
Residuals:
Min 1Q Median 3Q Max
-2.84567 -0.66433 0.00228 0.58716 2.79692
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.964e+01 3.530e+00 -11.228 <2e-16 ***
irt -2.988e-02 3.176e-02 -0.941 0.347
BPM 5.055e-01 2.223e-02 22.738 <2e-16 ***
cortisol -8.752e-04 2.596e-03 -0.337 0.736
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.002 on 494 degrees of freedom
Multiple R-squared: 0.5121, Adjusted R-squared: 0.5091
F-statistic: 172.8 on 3 and 494 DF, p-value: < 2.2e-16
Scatter Plot of Compliance Time vs BPM
# Scatter plot of Compliance Time vs BPMggplot(equine, aes(x = BPM, y = comp)) +geom_point(alpha =0.7) +geom_smooth(method ="lm", color ="blue", se =TRUE) +theme_minimal() +labs(title ="Scatter Plot of Compliance Time vs BPM",x ="BPM (Heart Rate)",y ="Compliance Time (seconds)" )
`geom_smooth()` using formula = 'y ~ x'
Boxplot by BPM Bins
# Create BPM binsequine$BPM_bin <-cut(equine$BPM, breaks =seq(140, 160, by =5), include.lowest =TRUE)# Boxplot for BPM rangesggplot(equine, aes(x = BPM_bin, y = comp)) +geom_boxplot(fill ="skyblue", alpha =0.7) +theme_minimal() +labs(title ="Compliance Time Across BPM Ranges",x ="BPM Range (Heart Rate)",y ="Compliance Time (seconds)" )