library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openxlsx)
library(dplyr)
# Load dataset
Probiotic_df <- read.csv("probiotic_data.csv")
# Remove patients who did not complete the study
Probiotic_df_clean <- Probiotic_df %>% na.omit()
# Split groups
placebo_df <- Probiotic_df_clean %>% filter(Group == "Placebo")
probiotic_df <- Probiotic_df_clean %>% filter(Group == "Probiotics")
# Compute summary statistics function
compute_summary <- function(df, group_name) {
tibble(
Group = group_name,
Duration = paste0(round(mean(df$Duration, na.rm = TRUE), 2), " ± ", round(sd(df$Duration, na.rm = TRUE), 2)),
Male = paste0(round(mean(df$Sex == "Male", na.rm = TRUE) * 100, 2), "%"),
Chinese = paste0(round(mean(df$Race == "Chinese", na.rm = TRUE) * 100, 2), "%"),
Malay = paste0(round(mean(df$Race == "Malay", na.rm = TRUE) * 100, 2), "%"),
Sedentary = paste0(round(mean(df$Sedentary == "Sedentary (physical exercise less than 4 hour per week)", na.rm = TRUE) * 100, 2), "%"),
Levodopa = paste0(round(mean(df$Levodopa == "yes", na.rm = TRUE) * 100, 2), "%"),
Dopamine = paste0(round(mean(df$Dopamine == "yes", na.rm = TRUE) * 100, 2), "%"),
LowFiber = paste0(round(mean(df$Fibre == "low (less than 3 serving perday)", na.rm = TRUE) * 100, 2), "%"),
PD_stage = paste0(round(mean(df$PD_stage_HY == "<=3", na.rm = TRUE) * 100, 2), "%")
)
}
# Compute summaries
placebo_summary <- compute_summary(placebo_df, "Placebo")
probiotic_summary <- compute_summary(probiotic_df, "Probiotics")
# Create final table
final_table <- tibble(
Characteristic = c("Duration", "Male", "Chinese", "Malay", "Sedentary", "Levodopa", "Dopamine", "Low Fiber", "PD Stage <=3"),
Placebo = unlist(placebo_summary[2:10]),
Probiotics = unlist(probiotic_summary[2:10])
)
# Print the final table
print(final_table)
## # A tibble: 9 × 3
## Characteristic Placebo Probiotics
## <chr> <chr> <chr>
## 1 Duration 8.35 ± 6.38 7.5 ± 4.88
## 2 Male 61.54% 59.09%
## 3 Chinese 53.85% 36.36%
## 4 Malay 42.31% 63.64%
## 5 Sedentary 50% 50%
## 6 Levodopa 88.46% 90.91%
## 7 Dopamine 57.69% 68.18%
## 8 Low Fiber 73.08% 86.36%
## 9 PD Stage <=3 65.38% 63.64%
# Function to compute p-values
compute_p_values <- function(var) {
if (is.numeric(Probiotic_df_clean[[var]])) {
# Use t-test for continuous variables
t.test(Probiotic_df_clean[[var]] ~ Probiotic_df_clean$Group)$p.value
} else {
# Use chi-square test for categorical variables
contingency_table <- table(Probiotic_df_clean[[var]], Probiotic_df_clean$Group)
chisq.test(contingency_table)$p.value
}
}
# Compute p-values for each characteristic
p_values <- tibble(
Characteristic = c("Duration", "Sex", "Race", "Sedentary", "Levodopa", "Dopamine", "Fibre", "PD_stage_HY"),
P_Value = sapply(c("Duration", "Sex", "Race", "Sedentary", "Levodopa", "Dopamine", "Fibre", "PD_stage_HY"), compute_p_values)
)
## Warning in chisq.test(contingency_table): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(contingency_table): Chi-squared approximation may be
## incorrect
## Warning in chisq.test(contingency_table): Chi-squared approximation may be
## incorrect
# Print the p-values
print(p_values)
## # A tibble: 8 × 2
## Characteristic P_Value
## <chr> <dbl>
## 1 Duration 0.606
## 2 Sex 1
## 3 Race 0.262
## 4 Sedentary 1
## 5 Levodopa 1
## 6 Dopamine 0.654
## 7 Fibre 0.440
## 8 PD_stage_HY 1.00
# Check normality of gut transit time change using Shapiro-Wilk test
shapiro_test <- shapiro.test(Probiotic_df_clean$WGTT_DIFF)
print(shapiro_test)
##
## Shapiro-Wilk normality test
##
## data: Probiotic_df_clean$WGTT_DIFF
## W = 0.8753, p-value = 0.0001099
# Perform Mann-Whitney U test for difference between groups
mw_test_independent <- wilcox.test(WGTT_DIFF ~ Group, data = Probiotic_df_clean)
## Warning in wilcox.test.default(x = DATA[[1L]], y = DATA[[2L]], ...): cannot
## compute exact p-value with ties
print(mw_test_independent)
##
## Wilcoxon rank sum test with continuity correction
##
## data: WGTT_DIFF by Group
## W = 186, p-value = 0.03341
## alternative hypothesis: true location shift is not equal to 0
# Perform Wilcoxon Signed-Rank tests within each group
wilcox_paired_placebo <- wilcox.test(placebo_df$WGTT_PRE, placebo_df$WGTT_POST, paired = TRUE)
## Warning in wilcox.test.default(placebo_df$WGTT_PRE, placebo_df$WGTT_POST, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(placebo_df$WGTT_PRE, placebo_df$WGTT_POST, :
## cannot compute exact p-value with zeroes
wilcox_paired_probiotic <- wilcox.test(probiotic_df$WGTT_PRE, probiotic_df$WGTT_POST, paired = TRUE)
## Warning in wilcox.test.default(probiotic_df$WGTT_PRE, probiotic_df$WGTT_POST, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(probiotic_df$WGTT_PRE, probiotic_df$WGTT_POST, :
## cannot compute exact p-value with zeroes
print(wilcox_paired_placebo)
##
## Wilcoxon signed rank test with continuity correction
##
## data: placebo_df$WGTT_PRE and placebo_df$WGTT_POST
## V = 61, p-value = 0.09118
## alternative hypothesis: true location shift is not equal to 0
print(wilcox_paired_probiotic)
##
## Wilcoxon signed rank test with continuity correction
##
## data: probiotic_df$WGTT_PRE and probiotic_df$WGTT_POST
## V = 145, p-value = 0.001282
## alternative hypothesis: true location shift is not equal to 0