# Load necessary librarieslibrary(dplyr)# Set seed for reproducibilityset.seed(123)# Number of subjectsn_subjects <-30# Function to generate synthetic datasimulate_data <-function(n_subjects) {# Data 1: Fiber type distributions fiber_data <-data.frame(Subject_ID =1:n_subjects,Total_Fibers =sample(200:2000, n_subjects, replace =TRUE),Fiber_Type_1_Percentage =round(rbeta(n_subjects, 4, 3) *100, 1) # Beta distribution for a heavier Type 1 split ) %>%mutate(Fiber_Type_2_Percentage =100- Fiber_Type_1_Percentage,Fiber_Type_1_Count =round(Total_Fibers * Fiber_Type_1_Percentage /100),Fiber_Type_2_Count = Total_Fibers - Fiber_Type_1_Count )# Data 2: 3000m running time trials (pre, middle, post) trial_data <-data.frame(Subject_ID =rep(1:n_subjects, each =3),Time_Point =rep(c("Pre", "Middle", "Post"), times = n_subjects) ) %>%group_by(Subject_ID) %>%mutate(Pre_Time =round(rnorm(1, mean =650, sd =50), 0),Running_Time =case_when( Time_Point =="Pre"~ Pre_Time, Time_Point =="Middle"~max(round(rnorm(1, mean = Pre_Time *0.9, sd =30), 0), 500), Time_Point =="Post"~max(round(rnorm(1, mean = Pre_Time *0.8, sd =30), 0), 500) ) ) %>%ungroup() %>%select(-Pre_Time)# Data 3: Delta efficiency delta_efficiency_data <-data.frame(Subject_ID =1:n_subjects,Delta_Efficiency =round(runif(n_subjects, min =15, max =30), 1) # Random values in a reasonable range )# Merge all data into one data frame merged_data <- fiber_data %>%left_join(trial_data, by ="Subject_ID") %>%left_join(delta_efficiency_data, by ="Subject_ID")return(merged_data)}# Generate the simulated datasimulated_data <-simulate_data(n_subjects)
Table
Characteristic
Overall N = 901
Middle N = 301
Post N = 301
Pre N = 301
Fiber Type 1 (%)
59 (17)
59 (18)
59 (18)
59 (18)
Fiber Type 2 (%)
41 (17)
41 (18)
41 (18)
41 (18)
Running Time (seconds)
589 (65)
587 (44)
528 (33)
652 (46)
Delta Efficiency (%)
21.9 (5.0)
21.9 (5.0)
21.9 (5.0)
21.9 (5.0)
1 Mean (SD)
The data
The data
Improvement over time
Correlations
# Correlation 1: % Fiber Type I vs Running Time (Post)corr1_data <- simulated_data %>%filter(Time_Point =="Post") %>%select(Fiber_Type_1_Percentage, Percentage_Improvement_Total)#Using the package "stats" function: corcorr1 <-cor(corr1_data$Fiber_Type_1_Percentage, corr1_data$Percentage_Improvement_Total)
Correlations
Correlations
Correlations
Correlations
Regressions
Linear regression
lm_model <-lm(Improvement ~ Fiber_Type_1, data = merged_data)
Regressions
Linear regression
lm2 <-lm(Improvement ~ Fiber_Type_1 + Sex, data = merged_data)
Musltiple stuff
Linear regression for Delta and FT1
Call:
lm(formula = Delta_Efficiency ~ Fiber_Type_1_Percentage, data = simulated_data)
Residuals:
Min 1Q Median 3Q Max
-6.876 -5.048 -0.531 3.839 8.504
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 19.10148 1.86817 10.225 <2e-16 ***
Fiber_Type_1_Percentage 0.04771 0.03023 1.578 0.118
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 4.942 on 88 degrees of freedom
Multiple R-squared: 0.02753, Adjusted R-squared: 0.01648
F-statistic: 2.492 on 1 and 88 DF, p-value: 0.118
More stuff
Generalized Linear model
Call:
glm(formula = Delta_Efficiency ~ Fiber_Type_1_Percentage, family = gaussian(),
data = simulated_data)
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 19.10148 1.86817 10.225 <2e-16 ***
Fiber_Type_1_Percentage 0.04771 0.03023 1.578 0.118
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for gaussian family taken to be 24.42615)
Null deviance: 2210.4 on 89 degrees of freedom
Residual deviance: 2149.5 on 88 degrees of freedom
AIC: 547
Number of Fisher Scoring iterations: 2
More stuff
Plotting % improvement over time
Improvement over time
Considerations and limits
Single-Biopsy
Potentially introducing uncertainty
Binary
Extreme VS non-extreme
Fitness level
Starting performance bias
Subjects starting out with a worse performance will have a better chance to get a higher absolute improement, therefore i need to make everything performance wise relative