R-Script Equine Compliance Time

Author

N1298801

Initial File and Library Load

# Load required libraries
library("readxl")
library("tidyverse")

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library("readr")
library("knitr")
library("ggplot2")
library(dplyr)

# Set working directory
setwd("/Users/parisfenna/Downloads")

# Load datasets
equine <- read_excel("equine.xlsx")
eqsub <- read_excel("eqsub.xlsx")

Data Summaries

Summary of Data

# Create a summary table for numeric columns in the baseline test dataset
equine_summary <- equine %>%
  select(where(is.numeric)) %>% # Select only numeric columns
  summarise(
    across(
      everything(),
      list(
        Mean = ~mean(.x, na.rm = TRUE),
        Median = ~median(.x, na.rm = TRUE),
        Min = ~min(.x, na.rm = TRUE),
        Max = ~max(.x, na.rm = TRUE)
      )
    )
  )

# Transform the output into a clean, readable format
equine_summary <- equine_summary %>%
  pivot_longer(
    cols = everything(),
    names_to = c("Variable", "Statistic"),
    names_sep = "_",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from = "Statistic",
    values_from = "Value"
  )

# Display the final summary table
equine_summary

# A tibble: 6 × 5
  Variable  Mean Median   Min   Max
  <chr>    <dbl>  <dbl> <dbl> <dbl>
1 comp      35.1   35.0  30.8  40.1
2 weight    87.9   87.8  65.1 111. 
3 irt       35.5   35.4  33.0  38.0
4 air       23.0   23.1  20.0  26.0
5 cortisol  82.0   82.4  50.0 112. 
6 BPM      150.   150.  145.  157.

#Display table 
equine_summary %>%
  kable(
    col.names = c("Variable", "Mean", "Median", "Min", "Max"),
    caption = "Summary of Variables of Baseline Trial"
  )

Summary of Variables of Baseline Trial
Variable	Mean	Median	Min	Max
comp	35.12474	35.04061	30.77585	40.08356
weight	87.92709	87.81711	65.10202	110.94466
irt	35.53856	35.43304	33.00454	37.99978
air	23.01925	23.11390	20.00503	25.98523
cortisol	81.99811	82.42856	50.02600	112.44503
BPM	150.13649	150.13792	144.64474	156.82916

# Create a summary table for numeric columns in the 'eqsub' dataset
eqsub_summary <- eqsub %>%
  select(where(is.numeric)) %>% # Select only numeric columns
  summarise(
    across(
      everything(),
      list(
        Mean = ~mean(.x, na.rm = TRUE),
        Median = ~median(.x, na.rm = TRUE),
        Min = ~min(.x, na.rm = TRUE),
        Max = ~max(.x, na.rm = TRUE)
      )
    )
  )

# Transform the output into a clean, readable format
eqsub_summary <- eqsub_summary %>%
  pivot_longer(
    cols = everything(),
    names_to = c("Variable", "Statistic"),
    names_sep = "_",
    values_to = "Value"
  ) %>%
  pivot_wider(
    names_from = "Statistic",
    values_from = "Value"
  )

# Display the summary table
eqsub_summary

# A tibble: 2 × 5
  Variable  Mean Median   Min   Max
  <chr>    <dbl>  <dbl> <dbl> <dbl>
1 comp      35.1   35.0  30.8  40.1
2 comp2     29.9   30.0  27.4  32.8

# Display table for eqsub dataset
eqsub_summary %>%
  kable(
    col.names = c("Variable", "Mean", "Median", "Min", "Max"),
    caption = "Summary of Variables of Calming Spray Trial"
  )

Summary of Variables of Calming Spray Trial
Variable	Mean	Median	Min	Max
comp	35.12474	35.04061	30.77585	40.08356
comp2	29.94209	29.98684	27.36538	32.81070

Sex Distribution

# Create table for sex distribution
table(equine$sex)


Female   Male 
   249    249

Standard Deviation of Weight

# Calculate standard deviation of weight
sd(equine$weight)

[1] 13.45918

IRT vs Cortisol Analysis

Check for Normality

# Shapiro-Wilk test for IRT
shapiro_irt <- shapiro.test(equine$irt)
shapiro_irt


    Shapiro-Wilk normality test

data:  equine$irt
W = 0.95889, p-value = 1.447e-10

# Shapiro-Wilk test for Cortisol
shapiro_cortisol <- shapiro.test(equine$cortisol)
shapiro_cortisol


    Shapiro-Wilk normality test

data:  equine$cortisol
W = 0.96362, p-value = 9.245e-10

Spearman’s Rank Correlation

# Spearman's rank correlation
correlation_result <- cor.test(equine$irt, equine$cortisol, method = "spearman")
correlation_result


    Spearman's rank correlation rho

data:  equine$irt and equine$cortisol
S = 17878766, p-value = 0.00332
alternative hypothesis: true rho is not equal to 0
sample estimates:
      rho 
0.1314346

Scatter Plot

# Scatter plot of IRT vs Cortisol
ggplot(equine, aes(x = irt, y = cortisol)) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", color = "blue", fill = "lightblue", se = TRUE) +
  theme_minimal() +
  labs(
    title = "Relationship Between IRT and Cortisol",
    x = "IRT (°C)",
    y = "Cortisol (ng/mL)"
  )

`geom_smooth()` using formula = 'y ~ x'

Effect of Spray on Compliance

Data Summaries

# Summary statistics for comp and comp2
summary(eqsub$comp)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  30.78   34.16   35.04   35.12   36.05   40.08

summary(eqsub$comp2)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  27.37   29.23   29.99   29.94   30.65   32.81

Calculating Means and SD Compliance Time

# Mean and standard deviation for compliance time before the spray
mean(eqsub$comp)

[1] 35.12474

sd(eqsub$comp)

[1] 1.430316

# Mean and standard deviation for compliance time after the spray
mean(eqsub$comp2)

[1] 29.94209

sd(eqsub$comp2)

[1] 1.046679

Normality Test

# Calculate differences and test normality
differences <- eqsub$comp - eqsub$comp2
shapiro_result <- shapiro.test(differences)
shapiro_result


    Shapiro-Wilk normality test

data:  differences
W = 0.84738, p-value < 2.2e-16

Paired t-Test

# Perform Paired t-Test
t_test_result <- t.test(eqsub$comp, eqsub$comp2, paired = TRUE)
t_test_result


    Paired t-test

data:  eqsub$comp and eqsub$comp2
t = 268.31, df = 497, p-value < 2.2e-16
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 5.144700 5.220602
sample estimates:
mean difference 
       5.182651

Boxplot

# Create long-format data frame
Condition <- rep(c("Before Spray", "After Spray"), each = nrow(eqsub))
ComplianceTime <- c(eqsub$comp, eqsub$comp2)
eqsub_long <- data.frame(Condition, ComplianceTime)

# Re-order Condition levels
eqsub_long$Condition <- factor(eqsub_long$Condition, levels = c("After Spray", "Before Spray"))

# Create boxplot
ggplot(eqsub_long, aes(x = Condition, y = ComplianceTime, fill = Condition)) +
  geom_boxplot(alpha = 0.7) +
  theme_minimal() +
  scale_fill_manual(values = c("Before Spray" = "skyblue", "After Spray" = "blue")) +
  labs(
    title = "Effect of Calming Spray on Compliance Times",
    x = "Condition",
    y = "Compliance Time (seconds)",
    fill = "Condition"
  )

Predicting Compliance Time

Linear Regression

# Fit linear regression model
model <- lm(comp ~ irt + BPM + cortisol, data = equine)
summary(model)


Call:
lm(formula = comp ~ irt + BPM + cortisol, data = equine)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.84567 -0.66433  0.00228  0.58716  2.79692 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -3.964e+01  3.530e+00 -11.228   <2e-16 ***
irt         -2.988e-02  3.176e-02  -0.941    0.347    
BPM          5.055e-01  2.223e-02  22.738   <2e-16 ***
cortisol    -8.752e-04  2.596e-03  -0.337    0.736    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.002 on 494 degrees of freedom
Multiple R-squared:  0.5121,    Adjusted R-squared:  0.5091 
F-statistic: 172.8 on 3 and 494 DF,  p-value: < 2.2e-16

Scatter Plot of Compliance Time vs BPM

# Scatter plot of Compliance Time vs BPM
ggplot(equine, aes(x = BPM, y = comp)) +
  geom_point(alpha = 0.7) +
  geom_smooth(method = "lm", color = "blue", se = TRUE) +
  theme_minimal() +
  labs(
    title = "Scatter Plot of Compliance Time vs BPM",
    x = "BPM (Heart Rate)",
    y = "Compliance Time (seconds)"
  )

`geom_smooth()` using formula = 'y ~ x'

Boxplot by BPM Bins

# Create BPM bins
equine$BPM_bin <- cut(equine$BPM, breaks = seq(140, 160, by = 5), include.lowest = TRUE)

# Boxplot for BPM ranges
ggplot(equine, aes(x = BPM_bin, y = comp)) +
  geom_boxplot(fill = "skyblue", alpha = 0.7) +
  theme_minimal() +
  labs(
    title = "Compliance Time Across BPM Ranges",
    x = "BPM Range (Heart Rate)",
    y = "Compliance Time (seconds)"
  )