# Load necessary libraries for data manipulation, ART ANOVA, mixed models, and post-hoc tests
library(dplyr)      # For data manipulation
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ARTool)     # For Aligned Rank Transform (ART) ANOVA
library(lme4)       # For fitting linear mixed-effects models
## Loading required package: Matrix
library(multcomp)   # For post-hoc comparisons
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
library(emmeans)    # For estimated marginal means
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
# Set seed for reproducibility (ensures that the random data generated is the same each time)
set.seed(123)
# Create random data with 2900 observations
data <- data.frame(
  Subject = as.factor(rep(1:58, each = 50)),  # 58 subjects, each with approximately 50 measurements (2900 / 58 ≈ 50)
  Exercise = as.factor(sample(c("Low", "Moderate", "High"), 2900, replace = TRUE)),  # Randomly assign exercise levels
  Diet = as.factor(sample(c("A", "B"), 2900, replace = TRUE)),  # Randomly assign diet types
  BloodPressureReduction = rnorm(2900, mean = 10, sd = 5) # Simulate blood pressure reduction
)

# Display the first few rows of the dataset
head(data)
##   Subject Exercise Diet BloodPressureReduction
## 1       1     High    A               3.320685
## 2       1     High    A               3.317583
## 3       1     High    B               5.801795
## 4       1 Moderate    B              12.031272
## 5       1     High    B              10.337532
## 6       1 Moderate    A               7.608363

# Convert the Exercise and Diet columns to factors (ensures that they are treated as categorical variables)
data$Exercise <- as.factor(data$Exercise)
data$Diet <- as.factor(data$Diet)

# Set sum contrasts for the factors (this coding scheme is used for the ART ANOVA)
contrasts(data$Exercise) <- "contr.sum"  # Apply sum contrasts to the Exercise factor
contrasts(data$Diet) <- "contr.sum"      # Apply sum contrasts to the Diet factor

# Verify that contrasts have been set correctly for each factor
contrasts(data$Exercise)  # Show the contrast matrix for Exercise
##          [,1] [,2]
## High        1    0
## Low         0    1
## Moderate   -1   -1
contrasts(data$Diet)      # Show the contrast matrix for Diet
##   [,1]
## A    1
## B   -1

# Apply ART ANOVA to the data
art_model <- art(BloodPressureReduction ~ Exercise * Diet + (1|Subject), data = data)

anova(art_model )
## Analysis of Variance of Aligned Rank Transformed Data
## 
## Table Type: Analysis of Deviance Table (Type III Wald F tests with Kenward-Roger df) 
## Model: Mixed Effects (lmer)
## Response: art(BloodPressureReduction)
## 
##                       F Df Df.res  Pr(>F)  
## 1 Exercise      2.19528  2 2890.8 0.11151  
## 2 Diet          1.50975  1 2891.7 0.21928  
## 3 Exercise:Diet 0.44628  2 2893.8 0.64005  
## ---
## Signif. codes:   0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# For demonstration purposes, simulate the creation of ART-transformed columns
# This is a placeholder to illustrate how transformed data would be used
# In practice, these columns would be created by the ART process itself. # Example of transforming data
data$ART_BloodPressureReduction_for_Exercise <- scale(data$BloodPressureReduction)  
data$ART_BloodPressureReduction_for_Diet <- scale(data$BloodPressureReduction)
data$ART_BloodPressureReduction_for_Exercise_Diet <- scale(data$BloodPressureReduction)

# For demonstration purposes, simulate the creation of ART-transformed columns
# This is a placeholder to illustrate how transformed data would be used
# In practice, these columns would be created by the ART process itself
data$ART_BloodPressureReduction_for_Exercise <- scale(data$BloodPressureReduction)  # Example of transforming data
data$ART_BloodPressureReduction_for_Diet <- scale(data$BloodPressureReduction)
data$ART_BloodPressureReduction_for_Exercise_Diet <- scale(data$BloodPressureReduction)

# Fit a linear mixed model using the ART-transformed data
# This model analyzes the effects of Exercise and Diet on Blood Pressure Reduction, accounting for random effects due to Subject
m5 <- lmer(ART_BloodPressureReduction_for_Exercise_Diet ~ Exercise * Diet + (1|Subject), data = data)

# Output a summary of the linear mixed model
# Provides estimates of fixed effects, random effects, and overall model fit statistics
summary(m5)
## Linear mixed model fit by REML ['lmerMod']
## Formula: ART_BloodPressureReduction_for_Exercise_Diet ~ Exercise * Diet +  
##     (1 | Subject)
##    Data: data
## 
## REML criterion at convergence: 8256.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7750 -0.6702 -0.0094  0.6859  3.6145 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Subject  (Intercept) 0.001478 0.03844 
##  Residual             0.997975 0.99899 
## Number of obs: 2900, groups:  Subject, 58
## 
## Fixed effects:
##                   Estimate Std. Error t value
## (Intercept)      0.0003088  0.0192459   0.016
## Exercise1        0.0447608  0.0260351   1.719
## Exercise2       -0.0438248  0.0263979  -1.660
## Diet1            0.0240362  0.0185875   1.293
## Exercise1:Diet1  0.0049646  0.0260386   0.191
## Exercise2:Diet1  0.0201130  0.0263912   0.762
## 
## Correlation of Fixed Effects:
##             (Intr) Exrcs1 Exrcs2 Diet1  Ex1:D1
## Exercise1   -0.025                            
## Exercise2    0.011 -0.492                     
## Diet1        0.036 -0.011  0.016              
## Exercs1:Dt1 -0.010  0.031 -0.022 -0.026       
## Exercs2:Dt1  0.015 -0.023  0.048  0.012 -0.492

# Perform post-hoc pairwise comparisons for the interaction between Exercise and Diet
# This is useful if the interaction term is significant and you want to compare specific levels
posthoc <- glht(m5, emm(pairwise ~ Exercise * Diet), test = adjusted(type = "holm"))
## Note: df set to 2891
# Output the summary of the post-hoc tests
# Shows which specific pairs of factor levels differ from each other, adjusted for multiple comparisons
summary(posthoc)
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Fit: lmer(formula = ART_BloodPressureReduction_for_Exercise_Diet ~ 
##     Exercise * Diet + (1 | Subject), data = data)
## 
## Linear Hypotheses:
##                                Estimate Std. Error t value Pr(>|t|)
## High A - Low A == 0           0.0734371  0.0653690   1.123    0.872
## High A - Moderate A == 0      0.0757390  0.0649513   1.166    0.853
## High A - High B == 0          0.0580016  0.0631792   0.918    0.942
## High A - Low B == 0           0.1617356  0.0634493   2.549    0.111
## High A - Moderate B == 0      0.0736563  0.0639869   1.151    0.860
## Low A - Moderate A == 0       0.0023019  0.0663960   0.035    1.000
## Low A - High B == 0          -0.0154355  0.0646616  -0.239    1.000
## Low A - Low B == 0            0.0882985  0.0649185   1.360    0.751
## Low A - Moderate B == 0       0.0002192  0.0654580   0.003    1.000
## Moderate A - High B == 0     -0.0177373  0.0642265  -0.276    1.000
## Moderate A - Low B == 0       0.0859966  0.0645031   1.333    0.766
## Moderate A - Moderate B == 0 -0.0020826  0.0650359  -0.032    1.000
## High B - Low B == 0           0.1037340  0.0627068   1.654    0.562
## High B - Moderate B == 0      0.0156547  0.0632533   0.247    1.000
## Low B - Moderate B == 0      -0.0880793  0.0635314  -1.386    0.735
## (Adjusted p values reported -- single-step method)