library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openxlsx)
library(dplyr)

# Load dataset
Probiotic_df <- read.csv("probiotic_data.csv")

# Remove patients who did not complete the study
Probiotic_df_clean <- Probiotic_df %>% na.omit()

# Split groups
placebo_df <- Probiotic_df_clean %>% filter(Group == "Placebo")
probiotic_df <- Probiotic_df_clean %>% filter(Group == "Probiotics")

# Compute summary statistics function
compute_summary <- function(df, group_name) {
  tibble(
    Group = group_name,
    Duration = paste0(round(mean(df$Duration, na.rm = TRUE), 2), " ± ", round(sd(df$Duration, na.rm = TRUE), 2)),
    Male = paste0(round(mean(df$Sex == "Male", na.rm = TRUE) * 100, 2), "%"),
    Chinese = paste0(round(mean(df$Race == "Chinese", na.rm = TRUE) * 100, 2), "%"),
    Malay = paste0(round(mean(df$Race == "Malay", na.rm = TRUE) * 100, 2), "%"),
    Sedentary = paste0(round(mean(df$Sedentary == "Sedentary (physical exercise less than 4 hour per week)", na.rm = TRUE) * 100, 2), "%"),
    Levodopa = paste0(round(mean(df$Levodopa == "yes", na.rm = TRUE) * 100, 2), "%"),
    Dopamine = paste0(round(mean(df$Dopamine == "yes", na.rm = TRUE) * 100, 2), "%"),
    LowFiber = paste0(round(mean(df$Fibre == "low (less than 3 serving perday)", na.rm = TRUE) * 100, 2), "%"),
    PD_stage = paste0(round(mean(df$PD_stage_HY == "<=3", na.rm = TRUE) * 100, 2), "%")
  )
}

# Compute summaries
placebo_summary <- compute_summary(placebo_df, "Placebo")
probiotic_summary <- compute_summary(probiotic_df, "Probiotics")

# Create final table
final_table <- tibble(
  Characteristic = c("Duration", "Male", "Chinese", "Malay", "Sedentary", "Levodopa", "Dopamine", "Low Fiber", "PD Stage <=3"),
  Placebo = unlist(placebo_summary[2:10]),
  Probiotics = unlist(probiotic_summary[2:10])
)

# Print the final table
print(final_table)
## # A tibble: 9 × 3
##   Characteristic Placebo     Probiotics
##   <chr>          <chr>       <chr>     
## 1 Duration       8.35 ± 6.38 7.5 ± 4.88
## 2 Male           61.54%      59.09%    
## 3 Chinese        53.85%      36.36%    
## 4 Malay          42.31%      63.64%    
## 5 Sedentary      50%         50%       
## 6 Levodopa       88.46%      90.91%    
## 7 Dopamine       57.69%      68.18%    
## 8 Low Fiber      73.08%      86.36%    
## 9 PD Stage <=3   65.38%      63.64%
# Function to compute p-values
compute_p_values <- function(var) {
  if (is.numeric(Probiotic_df_clean[[var]])) {
    # Use t-test for continuous variables
    t.test(Probiotic_df_clean[[var]] ~ Probiotic_df_clean$Group)$p.value
  } else {
    # Use chi-square test for categorical variables
    contingency_table <- table(Probiotic_df_clean[[var]], Probiotic_df_clean$Group)
    chisq.test(contingency_table)$p.value
  }
}

# Compute p-values for each characteristic
p_values <- tibble(
  Characteristic = c("Duration", "Sex", "Race", "Sedentary", "Levodopa", "Dopamine", "Fibre", "PD_stage_HY"),
  P_Value = sapply(c("Duration", "Sex", "Race", "Sedentary", "Levodopa", "Dopamine", "Fibre", "PD_stage_HY"), compute_p_values)
)
## Warning in chisq.test(contingency_table): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(contingency_table): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(contingency_table): Chi-squared approximation may be
## incorrect
# Print the p-values
print(p_values)
## # A tibble: 8 × 2
##   Characteristic P_Value
##   <chr>            <dbl>
## 1 Duration         0.606
## 2 Sex              1    
## 3 Race             0.262
## 4 Sedentary        1    
## 5 Levodopa         1    
## 6 Dopamine         0.654
## 7 Fibre            0.440
## 8 PD_stage_HY      1.00
# Check normality of gut transit time change using Shapiro-Wilk test
shapiro_test <- shapiro.test(Probiotic_df_clean$WGTT_DIFF)
print(shapiro_test)
## 
##  Shapiro-Wilk normality test
## 
## data:  Probiotic_df_clean$WGTT_DIFF
## W = 0.8753, p-value = 0.0001099
# Perform Mann-Whitney U test for difference between groups
mw_test_independent <- wilcox.test(WGTT_DIFF ~ Group, data = Probiotic_df_clean)
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
print(mw_test_independent)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  WGTT_DIFF by Group
## W = 186, p-value = 0.03341
## alternative hypothesis: true location shift is not equal to 0
# Perform Wilcoxon Signed-Rank tests within each group
wilcox_paired_placebo <- wilcox.test(placebo_df$WGTT_PRE, placebo_df$WGTT_POST, paired = TRUE)
## Warning in wilcox.test.default(placebo_df$WGTT_PRE, placebo_df$WGTT_POST, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(placebo_df$WGTT_PRE, placebo_df$WGTT_POST, :
## cannot compute exact p-value with zeroes
wilcox_paired_probiotic <- wilcox.test(probiotic_df$WGTT_PRE, probiotic_df$WGTT_POST, paired = TRUE)
## Warning in wilcox.test.default(probiotic_df$WGTT_PRE, probiotic_df$WGTT_POST, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(probiotic_df$WGTT_PRE, probiotic_df$WGTT_POST, :
## cannot compute exact p-value with zeroes
print(wilcox_paired_placebo)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  placebo_df$WGTT_PRE and placebo_df$WGTT_POST
## V = 61, p-value = 0.09118
## alternative hypothesis: true location shift is not equal to 0
print(wilcox_paired_probiotic)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  probiotic_df$WGTT_PRE and probiotic_df$WGTT_POST
## V = 145, p-value = 0.001282
## alternative hypothesis: true location shift is not equal to 0