# Load necessary librarieslibrary(dplyr)# Set seed for reproducibilityset.seed(123)# Number of subjectsn_subjects <-30# Function to generate synthetic datasimulate_data <-function(n_subjects) {# Data 1: Fiber type distributions fiber_data <-data.frame(Subject_ID =1:n_subjects,Total_Fibers =sample(200:2000, n_subjects, replace =TRUE),Fiber_Type_1_Percentage =round(rbeta(n_subjects, 4, 3) *100, 1) # Beta distribution for a heavier Type 1 split ) %>%mutate(Fiber_Type_2_Percentage =100- Fiber_Type_1_Percentage,Fiber_Type_1_Count =round(Total_Fibers * Fiber_Type_1_Percentage /100),Fiber_Type_2_Count = Total_Fibers - Fiber_Type_1_Count )# Data 2: 3000m running time trials (pre, middle, post) trial_data <-data.frame(Subject_ID =rep(1:n_subjects, each =3),Time_Point =rep(c("Pre", "Middle", "Post"), times = n_subjects) ) %>%group_by(Subject_ID) %>%mutate(Pre_Time =round(rnorm(1, mean =650, sd =50), 0),Running_Time =case_when( Time_Point =="Pre"~ Pre_Time, Time_Point =="Middle"~max(round(rnorm(1, mean = Pre_Time *0.9, sd =30), 0), 500), Time_Point =="Post"~max(round(rnorm(1, mean = Pre_Time *0.8, sd =30), 0), 500) ) ) %>%ungroup() %>%select(-Pre_Time)# Data 3: Delta efficiency delta_efficiency_data <-data.frame(Subject_ID =1:n_subjects,Delta_Efficiency =round(runif(n_subjects, min =15, max =30), 1) # Random values in a reasonable range )# Merge all data into one data frame merged_data <- fiber_data %>%left_join(trial_data, by ="Subject_ID") %>%left_join(delta_efficiency_data, by ="Subject_ID")return(merged_data)}# Generate the simulated datasimulated_data <-simulate_data(n_subjects)
Table
Characteristic
Overall N = 901
Middle N = 301
Post N = 301
Pre N = 301
Fiber Type 1 (%)
59 (17)
59 (18)
59 (18)
59 (18)
Fiber Type 2 (%)
41 (17)
41 (18)
41 (18)
41 (18)
Running Time (seconds)
589 (65)
587 (44)
528 (33)
652 (46)
Delta Efficiency (%)
21.9 (5.0)
21.9 (5.0)
21.9 (5.0)
21.9 (5.0)
1 Mean (SD)
The data
The data
Improvement over time
Checking assumption
Distribution
Levene’s test
Correlations
# Correlation 1: % Fiber Type I vs Running Time (Post)corr1_data <- simulated_data %>%filter(Time_Point =="Post") %>%select(Fiber_Type_1_Percentage, Percentage_Improvement_Total)#Using the package "stats" function: corcorr1 <-cor(corr1_data$Fiber_Type_1_Percentage, corr1_data$Percentage_Improvement_Total)
Correlations
Correlations
Correlations
Correlations
Regressions
Y = β0 + β1(FT I) +ϵ
lm_model <-lm(Improvement ~ Fiber_Type_1, data = merged_data)
Regressions
Y = β0 + β1(FT I) + β2(Sex) +ϵ
lm2 <-lm(Improvement ~ Fiber_Type_1 + Sex, data = merged_data)
Regressions
Y = β0 + β1(FT I) + β2(Sex) + β3(FT I × sex) +ϵ
lm3 <-lm(Improvement ~ Fiber_Type_1 * Sex, data = merged_data)