R-Script Equine Compliance Time

Author

N1298801

Initial File and Library Load

# Load required libraries
library("readxl")
library("tidyverse")
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("readr")
library("knitr")
library("ggplot2")
library(dplyr)

# Set working directory
setwd("/Users/parisfenna/Downloads")

# Load datasets
equine <- read_excel("equine.xlsx")
eqsub <- read_excel("eqsub.xlsx")

Data Summaries

Summary of Data

# Create a summary table for numeric columns in the baseline test dataset
equine_summary <- equine %>%
  select(where(is.numeric)) %>% # Select only numeric columns
  summarise(
    across(
      everything(),
      list(
        Mean = ~mean(.x, na.rm = TRUE),
        Median = ~median(.x, na.rm = TRUE),
        Min = ~min(.x, na.rm = TRUE),
        Max = ~max(.x, na.rm = TRUE)
      )
    )
  )

# Transform the output into a clean, readable format
equine_summary <- equine_summary %>%
  pivot_longer(
    cols = everything(),
    names_to = c("Variable", "Statistic"),
    names_sep = "_",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from = "Statistic",
    values_from = "Value"
  )

# Display the final summary table
equine_summary
# A tibble: 6 × 5
  Variable  Mean Median   Min   Max
  <chr>    <dbl>  <dbl> <dbl> <dbl>
1 comp      35.1   35.0  30.8  40.1
2 weight    87.9   87.8  65.1 111. 
3 irt       35.5   35.4  33.0  38.0
4 air       23.0   23.1  20.0  26.0
5 cortisol  82.0   82.4  50.0 112. 
6 BPM      150.   150.  145.  157. 
#Display table 
equine_summary %>%
  kable(
    col.names = c("Variable", "Mean", "Median", "Min", "Max"),
    caption = "Summary of Variables of Baseline Trial"
  )
Summary of Variables of Baseline Trial
Variable Mean Median Min Max
comp 35.12474 35.04061 30.77585 40.08356
weight 87.92709 87.81711 65.10202 110.94466
irt 35.53856 35.43304 33.00454 37.99978
air 23.01925 23.11390 20.00503 25.98523
cortisol 81.99811 82.42856 50.02600 112.44503
BPM 150.13649 150.13792 144.64474 156.82916
# Create a summary table for numeric columns in the 'eqsub' dataset
eqsub_summary <- eqsub %>%
  select(where(is.numeric)) %>% # Select only numeric columns
  summarise(
    across(
      everything(),
      list(
        Mean = ~mean(.x, na.rm = TRUE),
        Median = ~median(.x, na.rm = TRUE),
        Min = ~min(.x, na.rm = TRUE),
        Max = ~max(.x, na.rm = TRUE)
      )
    )
  )

# Transform the output into a clean, readable format
eqsub_summary <- eqsub_summary %>%
  pivot_longer(
    cols = everything(),
    names_to = c("Variable", "Statistic"),
    names_sep = "_",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from = "Statistic",
    values_from = "Value"
  )

# Display the summary table
eqsub_summary
# A tibble: 2 × 5
  Variable  Mean Median   Min   Max
  <chr>    <dbl>  <dbl> <dbl> <dbl>
1 comp      35.1   35.0  30.8  40.1
2 comp2     29.9   30.0  27.4  32.8
# Display table for eqsub dataset
eqsub_summary %>%
  kable(
    col.names = c("Variable", "Mean", "Median", "Min", "Max"),
    caption = "Summary of Variables of Calming Spray Trial"
  )
Summary of Variables of Calming Spray Trial
Variable Mean Median Min Max
comp 35.12474 35.04061 30.77585 40.08356
comp2 29.94209 29.98684 27.36538 32.81070

Sex Distribution

# Create table for sex distribution
table(equine$sex)

Female   Male 
   249    249 

Standard Deviation of Weight

# Calculate standard deviation of weight
sd(equine$weight)
[1] 13.45918

IRT vs Cortisol Analysis

Check for Normality

# Shapiro-Wilk test for IRT
shapiro_irt <- shapiro.test(equine$irt)
shapiro_irt

    Shapiro-Wilk normality test

data:  equine$irt
W = 0.95889, p-value = 1.447e-10
# Shapiro-Wilk test for Cortisol
shapiro_cortisol <- shapiro.test(equine$cortisol)
shapiro_cortisol

    Shapiro-Wilk normality test

data:  equine$cortisol
W = 0.96362, p-value = 9.245e-10

Spearman’s Rank Correlation

# Spearman's rank correlation
correlation_result <- cor.test(equine$irt, equine$cortisol, method = "spearman")
correlation_result

    Spearman's rank correlation rho

data:  equine$irt and equine$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.1314346 

Scatter Plot

# Scatter plot of IRT vs Cortisol
ggplot(equine, aes(x = irt, y = cortisol)) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", color = "blue", fill = "lightblue", se = TRUE) +
  theme_minimal() +
  labs(
    title = "Relationship Between IRT and Cortisol",
    x = "IRT (°C)",
    y = "Cortisol (ng/mL)"
  )
`geom_smooth()` using formula = 'y ~ x'

Effect of Spray on Compliance

Data Summaries

# Summary statistics for comp and comp2
summary(eqsub$comp)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  30.78   34.16   35.04   35.12   36.05   40.08 
summary(eqsub$comp2)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  27.37   29.23   29.99   29.94   30.65   32.81 

Calculating Means and SD Compliance Time

# Mean and standard deviation for compliance time before the spray
mean(eqsub$comp)
[1] 35.12474
sd(eqsub$comp)
[1] 1.430316
# Mean and standard deviation for compliance time after the spray
mean(eqsub$comp2)
[1] 29.94209
sd(eqsub$comp2)
[1] 1.046679

Normality Test

# Calculate differences and test normality
differences <- eqsub$comp - eqsub$comp2
shapiro_result <- shapiro.test(differences)
shapiro_result

    Shapiro-Wilk normality test

data:  differences
W = 0.84738, p-value < 2.2e-16

Paired t-Test

# Perform Paired t-Test
t_test_result <- t.test(eqsub$comp, eqsub$comp2, paired = TRUE)
t_test_result

    Paired t-test

data:  eqsub$comp and eqsub$comp2
t = 268.31, df = 497, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 5.144700 5.220602
sample estimates:
mean difference 
       5.182651 

Boxplot

# Create long-format data frame
Condition <- rep(c("Before Spray", "After Spray"), each = nrow(eqsub))
ComplianceTime <- c(eqsub$comp, eqsub$comp2)
eqsub_long <- data.frame(Condition, ComplianceTime)

# Re-order Condition levels
eqsub_long$Condition <- factor(eqsub_long$Condition, levels = c("After Spray", "Before Spray"))

# Create boxplot
ggplot(eqsub_long, aes(x = Condition, y = ComplianceTime, fill = Condition)) +
  geom_boxplot(alpha = 0.7) +
  theme_minimal() +
  scale_fill_manual(values = c("Before Spray" = "skyblue", "After Spray" = "blue")) +
  labs(
    title = "Effect of Calming Spray on Compliance Times",
    x = "Condition",
    y = "Compliance Time (seconds)",
    fill = "Condition"
  )

Predicting Compliance Time

Linear Regression

# Fit linear regression model
model <- lm(comp ~ irt + BPM + cortisol, data = equine)
summary(model)

Call:
lm(formula = comp ~ irt + BPM + cortisol, data = equine)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.84567 -0.66433  0.00228  0.58716  2.79692 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.964e+01  3.530e+00 -11.228   <2e-16 ***
irt         -2.988e-02  3.176e-02  -0.941    0.347    
BPM          5.055e-01  2.223e-02  22.738   <2e-16 ***
cortisol    -8.752e-04  2.596e-03  -0.337    0.736    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.002 on 494 degrees of freedom
Multiple R-squared:  0.5121,    Adjusted R-squared:  0.5091 
F-statistic: 172.8 on 3 and 494 DF,  p-value: < 2.2e-16

Scatter Plot of Compliance Time vs BPM

# Scatter plot of Compliance Time vs BPM
ggplot(equine, aes(x = BPM, y = comp)) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", color = "blue", se = TRUE) +
  theme_minimal() +
  labs(
    title = "Scatter Plot of Compliance Time vs BPM",
    x = "BPM (Heart Rate)",
    y = "Compliance Time (seconds)"
  )
`geom_smooth()` using formula = 'y ~ x'

Boxplot by BPM Bins

# Create BPM bins
equine$BPM_bin <- cut(equine$BPM, breaks = seq(140, 160, by = 5), include.lowest = TRUE)

# Boxplot for BPM ranges
ggplot(equine, aes(x = BPM_bin, y = comp)) +
  geom_boxplot(fill = "skyblue", alpha = 0.7) +
  theme_minimal() +
  labs(
    title = "Compliance Time Across BPM Ranges",
    x = "BPM Range (Heart Rate)",
    y = "Compliance Time (seconds)"
  )