# Load necessary packages
library(tidyverse)  # dplyr, tidyr, ggplot2, etc.
library(readr)      # For fast CSV reading
library(mice)       # For multiple imputation if needed
library(naniar)     # For visualizing missing data
library(stringr)
library(psych)
library(car)
library(ordinal)    # For cumulative link mixed models
library(DescTools)  # For Bowker's test if needed
library(knitr)      # For pretty tables in the final document
# Specify the file path (update if necessary)
file.path <- "/Users/armansaeedi/Desktop/ArmanSaeedi/Med School/Phase V/Post Match/VCU/Coelho/CI Activation Study/Updated DAta/Dataset_3_3_Wide.csv"
data_wide <- read_csv(file.path)

#rename all columns to lower
data_wide <- data_wide %>%
  rename_all(tolower)

#remove rows with no value in record id
#rename record id to record_id
data_wide <- data_wide %>%
  filter(!is.na(`record id`)) %>%
  rename(record_id = `record id`)
data_wide <- data_wide %>%
  # Extract numeric hearing loss duration values (up to three decimals) and set 0 values to NA
  mutate(`hearing loss duration (years)` = as.numeric(str_extract(`hearing loss duration`, "\\d+\\.?\\d{0,3}")),
         `hearing loss duration (years)` = na_if(`hearing loss duration (years)`, 0)) %>%
  
  # Create categorical variable based on the numeric value; assign NA if the value is missing
  mutate(`hearing loss duration category` = case_when(
    is.na(`hearing loss duration (years)`) ~ NA_character_,
    `hearing loss duration (years)` <= 5 ~ "0 to 5 years",
    `hearing loss duration (years)` > 5 & `hearing loss duration (years)` <= 10 ~ "> 5 to 10 years",
    `hearing loss duration (years)` > 10 & `hearing loss duration (years)` <= 15 ~ "> 10 to 15 years",
    `hearing loss duration (years)` > 15 & `hearing loss duration (years)` <= 20 ~ "> 15 to 20 years",
    `hearing loss duration (years)` > 20 ~ "> 20 years",
    TRUE ~ NA_character_
  )) %>%
  
  # Reorder columns to bring the new hearing loss columns to the front
  select(`record_id`, `hearing loss duration (years)`, `hearing loss duration category`, everything()) %>%
  
  # Rename the new columns for clarity
  rename(hld_years = `hearing loss duration (years)`,
         hld_category = `hearing loss duration category`) %>%
  
  # Remove the original 'hearing loss duration' column
  select(-`hearing loss duration`)
# Impute missing hearing loss duration using linear regression with age as predictor
data_wide <- data_wide %>%
  mutate(hld_years = ifelse(is.na(hld_years),
                            predict(lm(hld_years ~ age, data = .), .),
                            hld_years))
# Re-categorize hld_years
data_wide <- data_wide %>%
  mutate(hld_category = case_when(
    is.na(hld_years) ~ NA_character_,
    hld_years <= 5 ~ "0 to 5 years",
    hld_years > 5 & hld_years <= 10 ~ "> 5 to 10 years",
    hld_years > 10 & hld_years <= 15 ~ "> 10 to 15 years",
    hld_years > 15 & hld_years <= 20 ~ "> 15 to 20 years",
    hld_years > 20 ~ "> 20 years",
    TRUE ~ NA_character_
  ))
# Impute missing total using regression with age and hld_years
data_wide <- data_wide %>%
  mutate(total = ifelse(is.na(total),
                        predict(lm(total ~ age + hld_years, data = .), .),
                        total))
# Impute missing CIQOL-10 values
data_wide <- data_wide %>%
  mutate(score_conversion = ifelse(is.na(score_conversion), 43.62, score_conversion),
         se_converted = ifelse(is.na(se_converted), 3.12, se_converted))
# Rename "brand of ci" to ci_brand for consistency
data_wide <- data_wide %>%
  rename(ci_brand = `brand of ci`)

# Subset relevant columns for descriptives
descriptives_df <- data_wide %>% 
  select(age, sex, bmi, hld_years, hld_category, ci_brand, processor, score_conversion, se_converted)

# Continuous descriptive statistics using psych::describe
continuous_vars <- describe(descriptives_df %>% select(age, bmi, hld_years, score_conversion, se_converted))
print("Continuous Variables:")
## [1] "Continuous Variables:"
print(continuous_vars)
##                  vars  n  mean    sd median trimmed   mad   min   max range
## age                 1 52 65.60 15.96  64.03   66.32 13.92 31.80 95.06 63.26
## bmi                 2 52 28.19  5.52  27.65   28.02  5.51 17.33 42.38 25.05
## hld_years           3 52 16.67 15.60  13.50   13.86 11.12  0.25 65.00 64.75
## score_conversion    4 52 39.23  8.67  39.56   39.67  8.12 18.53 56.45 37.92
## se_converted        5 52  3.29  0.34   3.18    3.20  0.09  3.11  4.77  1.66
##                   skew kurtosis   se
## age              -0.25    -0.72 2.21
## bmi               0.35    -0.25 0.77
## hld_years         1.56     2.04 2.16
## score_conversion -0.37    -0.34 1.20
## se_converted      2.75     7.26 0.05
# Calculate IQR for hld_years
hld_iqr <- descriptives_df %>%
  summarise(median = median(hld_years, na.rm = TRUE),
            q1 = quantile(hld_years, 0.25, na.rm = TRUE),
            q3 = quantile(hld_years, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
print(hld_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1   13.5     5    20    15
# Categorical variables
print("Categorical Variables:")
## [1] "Categorical Variables:"
print("Sex Distribution:")
## [1] "Sex Distribution:"
sex_table <- table(descriptives_df$sex)
print(prop.table(sex_table))
## 
##         F         M 
## 0.5384615 0.4615385
print("Brand of CI Distribution:")
## [1] "Brand of CI Distribution:"
brand_distribution <- descriptives_df %>% 
  count(ci_brand) %>% 
  mutate(percentage = n / sum(n) * 100)
print(brand_distribution)
## # A tibble: 3 × 3
##   ci_brand     n percentage
##   <chr>    <int>      <dbl>
## 1 AB          17      32.7 
## 2 CA          32      61.5 
## 3 MedEl        3       5.77
print("Processor Distribution:")
## [1] "Processor Distribution:"
processor_distribution <- descriptives_df %>% 
  count(processor) %>% 
  mutate(percentage = n / sum(n) * 100)
print(processor_distribution)
## # A tibble: 6 × 3
##   processor             n percentage
##   <chr>             <int>      <dbl>
## 1 Kanso 2               1       1.92
## 2 N8                   16      30.8 
## 3 N8/K2                17      32.7 
## 4 Naida M90            15      28.8 
## 5 Sonnet 2              2       3.85
## 6 Sonnet 2, Rondo 3     1       1.92
print("Hearing Loss Duration Category:")
## [1] "Hearing Loss Duration Category:"
hld_distribution <- descriptives_df %>%
  count(hld_category) %>%
  mutate(percentage = n / sum(n) * 100)
print(hld_distribution)
## # A tibble: 5 × 3
##   hld_category         n percentage
##   <chr>            <int>      <dbl>
## 1 0 to 5 years        14      26.9 
## 2 > 10 to 15 years     4       7.69
## 3 > 15 to 20 years    13      25   
## 4 > 20 years          11      21.2 
## 5 > 5 to 10 years     10      19.2
#check normality visually with histograms and Q-Q plots:
continuous_vars <- c("age", "bmi", "hld_years", "score_conversion", "se_converted")

# Histograms and Q-Q Plots
par(mfrow=c(2,2))
for(var in continuous_vars){
    hist(data_wide[[var]], main=paste("Histogram of", var), xlab=var, col='lightblue')
    qqPlot(data_wide[[var]], main=paste("Q-Q plot of", var))
}

# Shapiro-Wilk tests for normality
normality_tests <- lapply(continuous_vars, function(v) {
    shapiro.test(data_wide[[v]])
})

names(normality_tests) <- continuous_vars
normality_tests
## $age
## 
##  Shapiro-Wilk normality test
## 
## data:  data_wide[[v]]
## W = 0.96612, p-value = 0.1441
## 
## 
## $bmi
## 
##  Shapiro-Wilk normality test
## 
## data:  data_wide[[v]]
## W = 0.98314, p-value = 0.6665
## 
## 
## $hld_years
## 
##  Shapiro-Wilk normality test
## 
## data:  data_wide[[v]]
## W = 0.81611, p-value = 1.451e-06
## 
## 
## $score_conversion
## 
##  Shapiro-Wilk normality test
## 
## data:  data_wide[[v]]
## W = 0.97816, p-value = 0.4508
## 
## 
## $se_converted
## 
##  Shapiro-Wilk normality test
## 
## data:  data_wide[[v]]
## W = 0.55167, p-value = 2.312e-11
# Log transform hld_years due to skew
data_wide$log_hld_years <- log(data_wide$hld_years + 1)  # Add 1 to avoid log(0)

# Re-check normality
hist(data_wide$log_hld_years, main="Histogram of log-transformed hld_years", col='lightblue')
qqPlot(data_wide$log_hld_years, main="QQ plot of log-transformed hld_years")

## [1]  9 21
shapiro.test(data_wide$log_hld_years)
## 
##  Shapiro-Wilk normality test
## 
## data:  data_wide$log_hld_years
## W = 0.97506, p-value = 0.3421
#append visit_1_ to overall_1_10, pain_1_10, dizziness_1_10, energy_1_10
data_wide <- data_wide %>%
  rename(visit_1_overall_1_10 = overall_1_10,
         visit_1_pain_1_10 = pain_1_10,
         visit_1_dizziness_1_10 = dizziness_1_10,
         visit_1_energy_1_10 = energy_1_10)
descriptives_VAS <- data_wide %>%
  select(visit_1_overall_1_10, visit_1_pain_1_10, visit_1_dizziness_1_10, visit_1_energy_1_10, visit_2_overall_1_10, visit_2_pain_1_10, visit_2_dizziness_1_10, visit_2_energy_1_10, visit_3_overall_1_10, visit_3_pain_1_10, visit_3_dizziness_1_10, visit_3_energy_1_10, visit_4_overall_1_10, visit_4_pain_1_10, visit_4_dizziness_1_10, visit_4_energy_1_10)

#store IQR variables in a list
iqr_vars <- c("visit_1_overall_iqr", "visit_2_overall_iqr", "visit_3_overall_iqr", "visit_4_overall_iqr", "visit_1_pain_iqr", "visit_2_pain_iqr", "visit_3_pain_iqr", "visit_4_pain_iqr", "visit_1_dizziness_iqr", "visit_2_dizziness_iqr", "visit_3_dizziness_iqr", "visit_4_dizziness_iqr", "visit_1_energy_iqr", "visit_2_energy_iqr", "visit_3_energy_iqr", "visit_4_energy_iqr")

#calculate median and interquartile range for overall, dizziness, pain, and energy at each visit
visit_1_overall_iqr <- descriptives_VAS %>%
  summarise(median = median(visit_1_overall_1_10, na.rm = TRUE),
            q1 = quantile(visit_1_overall_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_1_overall_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_2_overall_iqr <- descriptives_VAS %>%
  summarise(median = median(visit_2_overall_1_10, na.rm = TRUE),
            q1 = quantile(visit_2_overall_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_2_overall_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_3_overall_iqr <- descriptives_VAS %>%
  summarise(median = median(visit_3_overall_1_10, na.rm = TRUE),
            q1 = quantile(visit_3_overall_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_3_overall_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_4_overall_iqr <- descriptives_VAS %>%
  summarise(median = median(visit_4_overall_1_10, na.rm = TRUE),
            q1 = quantile(visit_4_overall_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_4_overall_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_1_pain_iqr <- descriptives_VAS %>%
  summarise(median = median(visit_1_pain_1_10, na.rm = TRUE),
            q1 = quantile(visit_1_pain_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_1_pain_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_2_pain_iqr <- descriptives_VAS %>%
  summarise(median = median(visit_2_pain_1_10, na.rm = TRUE),
            q1 = quantile(visit_2_pain_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_2_pain_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_3_pain_iqr <- descriptives_VAS %>%
  summarise(median = median(visit_3_pain_1_10, na.rm = TRUE),
            q1 = quantile(visit_3_pain_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_3_pain_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_4_pain_iqr <- descriptives_VAS %>% 
  summarise(median = median(visit_4_pain_1_10, na.rm = TRUE),
            q1 = quantile(visit_4_pain_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_4_pain_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_1_dizziness_iqr <- descriptives_VAS %>% 
  summarise(median = median(visit_1_dizziness_1_10, na.rm = TRUE),
            q1 = quantile(visit_1_dizziness_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_1_dizziness_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_2_dizziness_iqr <- descriptives_VAS %>% 
  summarise(median = median(visit_2_dizziness_1_10, na.rm = TRUE),
            q1 = quantile(visit_2_dizziness_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_2_dizziness_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_3_dizziness_iqr <- descriptives_VAS %>% 
  summarise(median = median(visit_3_dizziness_1_10, na.rm = TRUE),
            q1 = quantile(visit_3_dizziness_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_3_dizziness_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_4_dizziness_iqr <- descriptives_VAS %>% 
  summarise(median = median(visit_4_dizziness_1_10, na.rm = TRUE),
            q1 = quantile(visit_4_dizziness_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_4_dizziness_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_1_energy_iqr <- descriptives_VAS %>% 
  summarise(median = median(visit_1_energy_1_10, na.rm = TRUE),
            q1 = quantile(visit_1_energy_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_1_energy_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_2_energy_iqr <- descriptives_VAS %>% 
  summarise(median = median(visit_2_energy_1_10, na.rm = TRUE),
            q1 = quantile(visit_2_energy_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_2_energy_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_3_energy_iqr <- descriptives_VAS %>% 
  summarise(median = median(visit_3_energy_1_10, na.rm = TRUE),
            q1 = quantile(visit_3_energy_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_3_energy_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)
visit_4_energy_iqr <- descriptives_VAS %>% 
  summarise(median = median(visit_4_energy_1_10, na.rm = TRUE),
            q1 = quantile(visit_4_energy_1_10, 0.25, na.rm = TRUE),
            q3 = quantile(visit_4_energy_1_10, 0.75, na.rm = TRUE),
            iqr = q3 - q1)


#print all IQR variables
print(visit_1_overall_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      9     7    10     3
print(visit_2_overall_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1    5.5     3     7     4
print(visit_3_overall_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      8     7     9     2
print(visit_4_overall_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      9     7    10     3
print(visit_1_pain_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      0     0  1.25  1.25
print(visit_2_pain_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      5     4     7     3
print(visit_3_pain_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      0     0     2     2
print(visit_4_pain_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      0     0     0     0
print(visit_1_dizziness_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      0     0  1.25  1.25
print(visit_2_dizziness_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1    1.5     0     5     5
print(visit_3_dizziness_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      2     0     5     5
print(visit_4_dizziness_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      0     0     1     1
print(visit_1_energy_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      8  5.75  8.25   2.5
print(visit_2_energy_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      3     2  5.25  3.25
print(visit_3_energy_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      7     5     8     3
print(visit_4_energy_iqr)
## # A tibble: 1 × 4
##   median    q1    q3   iqr
##    <dbl> <dbl> <dbl> <dbl>
## 1      8     6   8.5   2.5
#Missingness check
cat("=== Missingness Check ===\n")
## === Missingness Check ===
missingness <- data_wide %>%
  summarise(across(starts_with("visit_"), ~sum(is.na(.x)))) %>%
  select(contains("activation_pref_ordinal"))
print(missingness)
## # A tibble: 1 × 4
##   visit_1_activation_pref_ordinal visit_2_activation_pr…¹ visit_3_activation_p…²
##                             <int>                   <int>                  <int>
## 1                               1                      20                     19
## # ℹ abbreviated names: ¹​visit_2_activation_pref_ordinal,
## #   ²​visit_3_activation_pref_ordinal
## # ℹ 1 more variable: visit_4_activation_pref_ordinal <int>
# Calculate and print complete-case counts for activation preference comparisons
for(v in 1:4){
  count_complete <- data_wide %>%
    filter(!is.na(visit_1_activation_pref_ordinal),
           !is.na(.data[[paste0("visit_", v, "_activation_pref_ordinal")]])) %>%
    nrow()
  cat("Complete-case count for Visit", v, "comparison:", count_complete, "\n")
}
## Complete-case count for Visit 1 comparison: 51 
## Complete-case count for Visit 2 comparison: 32 
## Complete-case count for Visit 3 comparison: 33 
## Complete-case count for Visit 4 comparison: 45
#########################################
# Section 2: Correlation Analyses for Continuous Variables
#########################################
# Calculate Spearman correlations for baseline and VAS variables at each visit
baseline_vars <- c("age", "bmi", "hld_years", "log_hld_years", "score_conversion")
vas_vars <- c("overall_1_10", "pain_1_10", "dizziness_1_10", "energy_1_10")
visits <- 1:4
all_correlations <- data.frame()
for(v in visits){
  act_var <- paste0("visit_", v, "_activation_pref_ordinal")
  df_v <- data_wide %>% filter(!is.na(.data[[act_var]]))
  
  for(var in baseline_vars){
    if(sum(!is.na(df_v[[var]])) > 5){
      test <- cor.test(df_v[[var]], df_v[[act_var]], method = "spearman")
      all_correlations <- bind_rows(all_correlations, data.frame(
        visit = v, variable = var, rho = round(test$estimate, 3), p.value = round(test$p.value, 3)
      ))
    }
  }
  for(var in vas_vars){
    vas_var <- paste0("visit_", v, "_", var)
    if(sum(!is.na(df_v[[vas_var]])) > 5){
      test <- cor.test(df_v[[vas_var]], df_v[[act_var]], method = "spearman")
      all_correlations <- bind_rows(all_correlations, data.frame(
        visit = v, variable = vas_var, rho = round(test$estimate, 3), p.value = round(test$p.value, 3)
      ))
    }
  }
}
kable(all_correlations, caption = "Table 2. Spearman Correlations for Baseline and VAS Variables")
Table 2. Spearman Correlations for Baseline and VAS Variables
visit variable rho p.value
rho…1 1 age -0.146 0.306
rho…2 1 bmi 0.068 0.636
rho…3 1 hld_years -0.053 0.713
rho…4 1 log_hld_years -0.053 0.713
rho…5 1 score_conversion -0.081 0.573
rho…6 1 visit_1_overall_1_10 -0.074 0.606
rho…7 1 visit_1_pain_1_10 -0.050 0.729
rho…8 1 visit_1_dizziness_1_10 0.004 0.975
rho…9 1 visit_1_energy_1_10 0.070 0.625
rho…10 2 age 0.058 0.752
rho…11 2 bmi -0.068 0.710
rho…12 2 hld_years -0.272 0.132
rho…13 2 log_hld_years -0.272 0.132
rho…14 2 score_conversion 0.228 0.209
rho…15 2 visit_2_overall_1_10 -0.099 0.588
rho…16 2 visit_2_pain_1_10 -0.166 0.363
rho…17 2 visit_2_dizziness_1_10 0.378 0.033
rho…18 2 visit_2_energy_1_10 -0.024 0.897
rho…19 3 age -0.006 0.973
rho…20 3 bmi -0.341 0.052
rho…21 3 hld_years -0.160 0.375
rho…22 3 log_hld_years -0.160 0.375
rho…23 3 score_conversion 0.065 0.721
rho…24 3 visit_3_overall_1_10 -0.310 0.079
rho…25 3 visit_3_pain_1_10 -0.281 0.114
rho…26 3 visit_3_dizziness_1_10 0.020 0.913
rho…27 3 visit_3_energy_1_10 -0.112 0.536
rho…28 4 age -0.216 0.149
rho…29 4 bmi -0.147 0.329
rho…30 4 hld_years 0.172 0.253
rho…31 4 log_hld_years 0.172 0.253
rho…32 4 score_conversion -0.061 0.685
rho…33 4 visit_4_overall_1_10 0.033 0.826
rho…34 4 visit_4_pain_1_10 0.115 0.448
rho…35 4 visit_4_dizziness_1_10 -0.043 0.775
rho…36 4 visit_4_energy_1_10 0.002 0.989
#########################################
# Section 3: Dichotomous Variable Analysis for sex
#########################################
# Perform Wilcoxon tests for sex at each visit
sex_results <- data.frame()
for(v in visits){
  act_var <- paste0("visit_", v, "_activation_pref_ordinal")
  df_v <- data_wide %>% filter(!is.na(.data[[act_var]]))
  test <- wilcox.test(df_v[[act_var]] ~ df_v$sex)
  sex_results <- bind_rows(sex_results, data.frame(
    Visit = v, W_Statistic = round(test$statistic, 3), p_value = round(test$p.value, 3)
  ))
}
kable(sex_results, caption = "Table 3. Sex Comparisons (Wilcoxon Rank-Sum Test) by Visit")
Table 3. Sex Comparisons (Wilcoxon Rank-Sum Test) by Visit
Visit W_Statistic p_value
W…1 1 381.0 0.276
W…2 2 119.5 1.000
W…3 3 178.5 0.090
W…4 4 253.5 0.847
#########################################
# Section 4: Categorical Variable Comparisons (ci_brand, processor)
#########################################
# Categorical comparisons for ci_brand and processor
catcomp <- data.frame()
cat_vars <- c("ci_brand", "processor")
for(v in visits){
  act_var <- paste0("visit_", v, "_activation_pref_ordinal")
  df_v <- data_wide %>% filter(!is.na(.data[[act_var]]))
  for(cat_var in cat_vars){
    test <- kruskal.test(df_v[[act_var]] ~ as.factor(df_v[[cat_var]]))
    catcomp <- bind_rows(catcomp, data.frame(
      Visit = v, Variable = cat_var, Chi_Sq = round(test$statistic, 3), p_value = round(test$p.value, 3)
    ))
  }
}
kable(catcomp, caption = "Table 4. Categorical Comparisons for CI Brand and Processor by Visit")
Table 4. Categorical Comparisons for CI Brand and Processor by Visit
Visit Variable Chi_Sq p_value
Kruskal-Wallis chi-squared…1 1 ci_brand 1.325 0.516
Kruskal-Wallis chi-squared…2 1 processor 8.927 0.112
Kruskal-Wallis chi-squared…3 2 ci_brand 0.769 0.681
Kruskal-Wallis chi-squared…4 2 processor 2.928 0.570
Kruskal-Wallis chi-squared…5 3 ci_brand 3.668 0.160
Kruskal-Wallis chi-squared…6 3 processor 4.430 0.351
Kruskal-Wallis chi-squared…7 4 ci_brand 4.292 0.117
Kruskal-Wallis chi-squared…8 4 processor 5.920 0.314
#########################################
# Section 5: Wilcoxon Signed-Rank Tests (Visit 1 vs. Visits 2, 3, and 4)
#########################################
# Wilcoxon signed-rank tests for change in activation preference (Visit 1 vs. Visits 2, 3, 4)
wilcox_results <- data.frame()
for(v in 2:4){
  df_v <- data_wide %>% filter(!is.na(visit_1_activation_pref_ordinal),
                                !is.na(.data[[paste0("visit_", v, "_activation_pref_ordinal")]]))
  # Calculate differences and exclude tied pairs (where difference == 0)
  diff_vals <- df_v$visit_1_activation_pref_ordinal - df_v[[paste0("visit_", v, "_activation_pref_ordinal")]]
  nonzero_df <- df_v[diff_vals != 0, ]
  
  # Get sample size for non-tied pairs
  n <- nrow(nonzero_df)
  
  # Run the paired Wilcoxon test on non-tied data
  test <- wilcox.test(nonzero_df$visit_1_activation_pref_ordinal,
                      nonzero_df[[paste0("visit_", v, "_activation_pref_ordinal")]], paired = TRUE)
  wilcox_results <- bind_rows(wilcox_results, data.frame(
    Comparison = paste("Visit 1 vs Visit", v),
    n = n,
    V_Statistic = round(test$statistic, 3),
    p_value = round(test$p.value, 3)
  ))
}
kable(wilcox_results, caption = "Table 5. Wilcoxon Signed-Rank Test Results for Change in Activation Preference")
Table 5. Wilcoxon Signed-Rank Test Results for Change in Activation Preference
Comparison n V_Statistic p_value
V…1 Visit 1 vs Visit 2 21 97.5 0.538
V…2 Visit 1 vs Visit 3 21 77.0 0.182
V…3 Visit 1 vs Visit 4 32 130.0 0.012
#########################################
# Section 6: Subgroup Analysis for Significant Change (Example: Visit 4)
#########################################
# Subgroup analysis for Visit 4 (Changed vs. No Change)
df_v4 <- data_wide %>% 
  filter(!is.na(visit_1_activation_pref_ordinal), !is.na(visit_4_activation_pref_ordinal)) %>%
  mutate(
    diff = visit_4_activation_pref_ordinal - visit_1_activation_pref_ordinal,
    change_binary = ifelse(diff != 0, "changed", "no_change"),
    change_direction = case_when(
      diff > 0 ~ "later",
      diff < 0 ~ "earlier",
      diff == 0 ~ "no_change"
    )
  )
cat("Complete-case count for Visit 4 comparison:", nrow(df_v4), "\n")
## Complete-case count for Visit 4 comparison: 45
kable(as.data.frame(table(df_v4$change_binary)), caption = "Table 6. Frequency of Change (Changed vs. No Change) at Visit 4")
Table 6. Frequency of Change (Changed vs. No Change) at Visit 4
Var1 Freq
changed 32
no_change 13
kable(as.data.frame(table(df_v4$change_direction)), caption = "Table 7. Frequency of Change Direction at Visit 4")
Table 7. Frequency of Change Direction at Visit 4
Var1 Freq
earlier 10
later 22
no_change 13
# Subgroup comparisons: Continuous variables
subgroup_cont <- data.frame(
  Variable = c("Age", "BMI", "hld_years", "score_conversion"),
  Test = c("t-test", "t-test", "Wilcoxon", "t-test")
)
subgroup_cont$Statistic <- c(
  round(t.test(age ~ change_binary, data = df_v4, var.equal = TRUE)$statistic, 3),
  round(t.test(bmi ~ change_binary, data = df_v4, var.equal = TRUE)$statistic, 3),
  round(wilcox.test(hld_years ~ change_binary, data = df_v4)$statistic, 3),
  round(t.test(score_conversion ~ change_binary, data = df_v4, var.equal = TRUE)$statistic, 3)
)
subgroup_cont$p_value <- c(
  round(t.test(age ~ change_binary, data = df_v4, var.equal = TRUE)$p.value, 3),
  round(t.test(bmi ~ change_binary, data = df_v4, var.equal = TRUE)$p.value, 3),
  round(wilcox.test(hld_years ~ change_binary, data = df_v4)$p.value, 3),
  round(t.test(score_conversion ~ change_binary, data = df_v4, var.equal = TRUE)$p.value, 3)
)
kable(subgroup_cont, caption = "Table 8. Subgroup Comparisons for Continuous Variables (Changed vs. No Change) at Visit 4")
Table 8. Subgroup Comparisons for Continuous Variables (Changed vs. No Change) at Visit 4
Variable Test Statistic p_value
Age t-test 0.346 0.731
BMI t-test 0.205 0.839
hld_years Wilcoxon 205.500 0.960
score_conversion t-test 0.596 0.554
# Subgroup comparison for sex
sex_subgroup <- data.frame(
  Variable = "sex",
  Test = "Fisher's Exact Test",
  p_value = round(fisher.test(table(df_v4$sex, df_v4$change_binary))$p.value, 3)
)
kable(sex_subgroup, caption = "Table 9. Subgroup Comparison for Sex at Visit 4")
Table 9. Subgroup Comparison for Sex at Visit 4
Variable Test p_value
sex Fisher’s Exact Test 1
# Subgroup comparison for CI brand
ci_brand_subgroup <- data.frame(
  Variable = "ci_brand",
  Test = "Fisher's Exact Test",
  p_value = round(fisher.test(table(df_v4$ci_brand, df_v4$change_binary))$p.value, 3)
)
kable(ci_brand_subgroup, caption = "Table 10. Subgroup Comparison for CI Brand at Visit 4")
Table 10. Subgroup Comparison for CI Brand at Visit 4
Variable Test p_value
ci_brand Fisher’s Exact Test 0.074
#subgroup comparisons for processor
processor_subgroup <- data.frame(
  Variable = "processor",
  Test = "Fisher's Exact Test",
  p_value = round(fisher.test(table(df_v4$processor, df_v4$change_binary))$p.value, 3)
)
kable(processor_subgroup, caption = "Table 11. Subgroup Comparison for Processor at Visit 4")
Table 11. Subgroup Comparison for Processor at Visit 4
Variable Test p_value
processor Fisher’s Exact Test 0.289
# Subgroup comparisons for VAS scores at Visit 4
subgroup_vas <- data.frame(
  Variable = c("visit_4_overall_1_10", "visit_4_pain_1_10", "visit_4_dizziness_1_10", "visit_4_energy_1_10"),
  Test = rep("Wilcoxon", 4)
)
subgroup_vas$Statistic <- c(
  round(wilcox.test(df_v4$visit_4_overall_1_10 ~ df_v4$change_binary)$statistic, 3),
  round(wilcox.test(df_v4$visit_4_pain_1_10 ~ df_v4$change_binary)$statistic, 3),
  round(wilcox.test(df_v4$visit_4_dizziness_1_10 ~ df_v4$change_binary)$statistic, 3),
  round(wilcox.test(df_v4$visit_4_energy_1_10 ~ df_v4$change_binary)$statistic, 3)
)
subgroup_vas$p_value <- c(
  round(wilcox.test(df_v4$visit_4_overall_1_10 ~ df_v4$change_binary)$p.value, 3),
  round(wilcox.test(df_v4$visit_4_pain_1_10 ~ df_v4$change_binary)$p.value, 3),
  round(wilcox.test(df_v4$visit_4_dizziness_1_10 ~ df_v4$change_binary)$p.value, 3),
  round(wilcox.test(df_v4$visit_4_energy_1_10 ~ df_v4$change_binary)$p.value, 3)
)
kable(subgroup_vas, caption = "Table 12. Subgroup Comparisons for VAS Scores at Visit 4 (Changed vs. No Change)")
Table 12. Subgroup Comparisons for VAS Scores at Visit 4 (Changed vs. No Change)
Variable Test Statistic p_value
visit_4_overall_1_10 Wilcoxon 259 0.193
visit_4_pain_1_10 Wilcoxon 166 0.119
visit_4_dizziness_1_10 Wilcoxon 192 0.636
visit_4_energy_1_10 Wilcoxon 218 0.809
#########################################
# Section 7: Directional Subgroup Analysis for Visit 4
# (Pairwise comparisons among "earlier", "later", and "no_change")
#########################################
# Directional Subgroup Analysis for Visit 4
cat("\n=== Directional Subgroup Analysis for Visit 4 ===\n")
## 
## === Directional Subgroup Analysis for Visit 4 ===
print(table(df_v4$change_direction))
## 
##   earlier     later no_change 
##        10        22        13
# Helper function for continuous pairwise comparisons:
pairwise_test_cont <- function(data, var, group1, group2, normal = FALSE) {
  subdata <- data %>% filter(change_direction %in% c(group1, group2))
  if(nrow(subdata) < 5) {
    cat("Not enough data for", var, "(", group1, "vs", group2, ")\n")
    return(NULL)
  }
  if(normal) {
    test <- t.test(as.formula(paste(var, "~ change_direction")), data = subdata)
    cat(var, "(", group1, "vs", group2, ") t-test p-value:", round(test$p.value, 3), "\n")
  } else {
    test <- wilcox.test(as.formula(paste(var, "~ change_direction")), data = subdata)
    cat(var, "(", group1, "vs", group2, ") Wilcoxon p-value:", round(test$p.value, 3), "\n")
  }
}

cat("\n--- Directional Analysis for Continuous Variables ---\n")
## 
## --- Directional Analysis for Continuous Variables ---
cont_vars <- c("age", "bmi", "hld_years", "score_conversion")
for(var in cont_vars) {
  cat("\nTesting", var, ":\n")
  pairwise_test_cont(df_v4, var, "earlier", "no_change", normal = (var %in% c("age", "bmi", "score_conversion")))
  pairwise_test_cont(df_v4, var, "later", "no_change", normal = (var %in% c("age", "bmi", "score_conversion")))
  pairwise_test_cont(df_v4, var, "earlier", "later", normal = (var %in% c("age", "bmi", "score_conversion")))
}
## 
## Testing age :
## age ( earlier vs no_change ) t-test p-value: 0.91 
## age ( later vs no_change ) t-test p-value: 0.708 
## age ( earlier vs later ) t-test p-value: 0.806 
## 
## Testing bmi :
## bmi ( earlier vs no_change ) t-test p-value: 0.503 
## bmi ( later vs no_change ) t-test p-value: 0.929 
## bmi ( earlier vs later ) t-test p-value: 0.366 
## 
## Testing hld_years :
## hld_years ( earlier vs no_change ) Wilcoxon p-value: 0.576
## hld_years ( later vs no_change ) Wilcoxon p-value: 0.824
## hld_years ( earlier vs later ) Wilcoxon p-value: 0.337 
## 
## Testing score_conversion :
## score_conversion ( earlier vs no_change ) t-test p-value: 0.715 
## score_conversion ( later vs no_change ) t-test p-value: 0.555 
## score_conversion ( earlier vs later ) t-test p-value: 0.918
cat("\n--- Directional Analysis for Categorical Variable: sex ---\n")
## 
## --- Directional Analysis for Categorical Variable: sex ---
subdata <- df_v4 %>% filter(change_direction %in% c("earlier", "no_change"))
cat("sex (earlier vs. no_change) p-value:", round(fisher.test(table(subdata$sex, subdata$change_direction))$p.value, 3), "\n")
## sex (earlier vs. no_change) p-value: 0.669
subdata <- df_v4 %>% filter(change_direction %in% c("later", "no_change"))
cat("sex (later vs. no_change) p-value:", round(fisher.test(table(subdata$sex, subdata$change_direction))$p.value, 3), "\n")
## sex (later vs. no_change) p-value: 0.733
subdata <- df_v4 %>% filter(change_direction %in% c("earlier", "later"))
cat("sex (earlier vs. later) p-value:", round(fisher.test(table(subdata$sex, subdata$change_direction))$p.value, 3), "\n")
## sex (earlier vs. later) p-value: 0.265
cat("\n--- Directional Analysis for Categorical Variable: CI brand ---\n")
## 
## --- Directional Analysis for Categorical Variable: CI brand ---
subdata <- df_v4 %>% filter(change_direction %in% c("earlier", "no_change"))
cat("ci_brand (earlier vs. no_change) p-value:", round(fisher.test(table(subdata$ci_brand, subdata$change_direction))$p.value, 3), "\n")
## ci_brand (earlier vs. no_change) p-value: 0.629
subdata <- df_v4 %>% filter(change_direction %in% c("later", "no_change"))
cat("ci_brand (later vs. no_change) p-value:", round(fisher.test(table(subdata$ci_brand, subdata$change_direction))$p.value, 3), "\n")
## ci_brand (later vs. no_change) p-value: 0.04
subdata <- df_v4 %>% filter(change_direction %in% c("earlier", "later"))
cat("ci_brand (earlier vs. later) p-value:", round(fisher.test(table(subdata$ci_brand, subdata$change_direction))$p.value, 3), "\n")
## ci_brand (earlier vs. later) p-value: 0.454
cat("\n--- Directional Analysis for Categorical Variable: CI processor ---\n")
## 
## --- Directional Analysis for Categorical Variable: CI processor ---
subdata <- df_v4 %>% filter(change_direction %in% c("earlier", "no_change"))
cat("processor (earlier vs. no_change) p-value:", round(fisher.test(table(subdata$processor, subdata$change_direction))$p.value, 3), "\n")
## processor (earlier vs. no_change) p-value: 0.446
subdata <- df_v4 %>% filter(change_direction %in% c("later", "no_change"))
cat("processor (later vs. no_change) p-value:", round(fisher.test(table(subdata$processor, subdata$change_direction))$p.value, 3), "\n")
## processor (later vs. no_change) p-value: 0.411
subdata <- df_v4 %>% filter(change_direction %in% c("earlier", "later"))


cat("\n--- Directional Analysis for VAS Scores (Visit 4) ---\n")
## 
## --- Directional Analysis for VAS Scores (Visit 4) ---
for(var in vas_vars){
  vas_var_name <- paste0("visit_4_", var)
  cat("\nTesting", vas_var_name, ":\n")
  pairwise_test_cont(df_v4, vas_var_name, "earlier", "no_change", normal = FALSE)
  pairwise_test_cont(df_v4, vas_var_name, "later", "no_change", normal = FALSE)
  pairwise_test_cont(df_v4, vas_var_name, "earlier", "later", normal = FALSE)
}
## 
## Testing visit_4_overall_1_10 :
## visit_4_overall_1_10 ( earlier vs no_change ) Wilcoxon p-value: 0.57
## visit_4_overall_1_10 ( later vs no_change ) Wilcoxon p-value: 0.15
## visit_4_overall_1_10 ( earlier vs later ) Wilcoxon p-value: 0.608 
## 
## Testing visit_4_pain_1_10 :
## visit_4_pain_1_10 ( earlier vs no_change ) Wilcoxon p-value: 0.229
## visit_4_pain_1_10 ( later vs no_change ) Wilcoxon p-value: 0.187
## visit_4_pain_1_10 ( earlier vs later ) Wilcoxon p-value: 0.777 
## 
## Testing visit_4_dizziness_1_10 :
## visit_4_dizziness_1_10 ( earlier vs no_change ) Wilcoxon p-value: 0.703
## visit_4_dizziness_1_10 ( later vs no_change ) Wilcoxon p-value: 0.678
## visit_4_dizziness_1_10 ( earlier vs later ) Wilcoxon p-value: 1 
## 
## Testing visit_4_energy_1_10 :
## visit_4_energy_1_10 ( earlier vs no_change ) Wilcoxon p-value: 0.75
## visit_4_energy_1_10 ( later vs no_change ) Wilcoxon p-value: 0.602
## visit_4_energy_1_10 ( earlier vs later ) Wilcoxon p-value: 0.471
# Plot transition heatmaps for each visit and save as PNG files
plot_change_matrix <- function(data, baseline_var, followup_var, visit_label) {
  transition_df <- data %>%
    filter(!is.na(.data[[baseline_var]]), !is.na(.data[[followup_var]])) %>%
    count(Preop = .data[[baseline_var]], Postop = .data[[followup_var]]) %>%
    group_by(Preop) %>%
    mutate(percentage = n / sum(n) * 100)
  
p <- ggplot(transition_df, aes(x = factor(Preop), y = factor(Postop), fill = n)) +
    geom_tile(color = "white") +
    # Custom blue gradient: light to medium/dark, so black text remains visible
    scale_fill_gradient(low = "#71c4d1", high = "#08306b") +
    geom_text(aes(label = paste0(n, " (", round(percentage, 1), "%)")), 
              color = "white", size = 5, face = "bold") +
    labs(
      title = paste("Activation Preference Transition (Visit 1 to", visit_label, ")"),
      x = "\nPreoperative Preference",
      y = paste("\nVisit", visit_label, "Preference\n"),
      fill = "Count"
    ) +
    theme_minimal(base_size = 12) +
    theme(
      # Bold and center the title
      plot.title = element_text(face = "bold", hjust = 0.5, size = 14),
      # Bold the axis titles
      axis.title.x = element_text(face = "bold"),
      axis.title.y = element_text(face = "bold"),
      # Remove minor grid lines and vertical major lines for a cleaner look
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank()
    )
  return(p)
}


post_visits <- c(2, 3, 4)
for(v in post_visits){
  baseline_var <- "visit_1_activation_pref_ordinal"
  followup_var <- paste0("visit_", v, "_activation_pref_ordinal")
  p <- plot_change_matrix(data_wide, baseline_var, followup_var, visit_label = v)
  print(p)
  ggsave(paste0("transition_visit1_vs_visit", v, ".tiff"), p, width = 8, height = 6, dpi = 1200, bg = "white")
}

# Save the wide-format data for record keeping
write_csv(data_wide, "/Users/armansaeedi/Desktop/ArmanSaeedi/Med School/Phase V/Post Match/VCU/Coelho/CI Activation Study/Updated DAta/wide_3_6.csv")

# Open the long-format CSV (update file path if necessary)
file.path_long <- "/Users/armansaeedi/Desktop/ArmanSaeedi/Med School/Phase V/Post Match/VCU/Coelho/CI Activation Study/Updated DAta/long_3_6.csv"

# Import the CSV file into a data frame
data_long <- read_csv(file.path_long)

# Print column names for verification
print(colnames(data_long))
##  [1] "record_id"                       "visit"                          
##  [3] "hld_years"                       "log_hld_years"                  
##  [5] "hld_category"                    "age"                            
##  [7] "sex"                             "bmi"                            
##  [9] "ci_brand"                        "processor"                      
## [11] "total"                           "score_conversion"               
## [13] "se_converted"                    "overall_1_10"                   
## [15] "pain_1_10"                       "dizziness_1_10"                 
## [17] "energy_1_10"                     "visit_1_activation_pref"        
## [19] "visit_1_activation_pref_ordinal" "visit_1_hear_asap"              
## [21] "visit_1_wait_to_recover"         "visit_1_wait_to_adjust"         
## [23] "visit_1_logistical_travel"       "visit_1_other_yn"               
## [25] "free text"
# Remove the "visit_1_" prefix from all columns that start with it
data_long <- data_long %>%
  rename_with(~str_remove(., "visit_1_"), starts_with("visit_1_"))
# Create a clean factor variable for activation preference with desired ordering
data_long <- data_long %>%
  mutate(activation_pref = factor(activation_pref,
                                  levels = c("Immediately following surgery/the day of surgery",
                                             "Within 5 days after surgery, but not on the actual day of surgery",
                                             "1 week after surgery",
                                             "2 weeks after surgery",
                                             "3 weeks after surgery (standard activation time)",
                                             "More than 3 weeks after surgery"),
                                  labels = c("0 days", "1–5 days", "1 week", "2 weeks", "3 weeks", ">3 weeks")))

# Create the preference table by visit
pref_table <- data_long %>%
  group_by(visit, activation_pref) %>%
  summarise(n = n(), .groups = 'drop') %>%
  filter(!is.na(activation_pref)) %>%
  group_by(visit) %>%
  mutate(percent = round(100 * n / sum(n), 1)) %>%
  pivot_wider(names_from = visit,
              values_from = c(n, percent),
              names_glue = "Visit_{visit}_{.value}") %>%
  arrange(factor(activation_pref, levels = c("0 days", "1–5 days", "1 week",
                                             "2 weeks", "3 weeks", ">3 weeks")))
# Print the resulting preference table
kable(pref_table, caption = "Table 13. Activation Preference Distribution by Visit")
Table 13. Activation Preference Distribution by Visit
activation_pref Visit_1_n Visit_2_n Visit_3_n Visit_4_n Visit_1_percent Visit_2_percent Visit_3_percent Visit_4_percent
0 days 13 4 4 4 25.5 12.5 12.1 8.7
1–5 days 5 3 2 2 9.8 9.4 6.1 4.3
1 week 9 7 5 10 17.6 21.9 15.2 21.7
2 weeks 9 11 12 8 17.6 34.4 36.4 17.4
3 weeks 12 6 8 15 23.5 18.8 24.2 32.6
>3 weeks 3 1 2 7 5.9 3.1 6.1 15.2
library(dplyr)
library(tidyr)
library(ggplot2)

pref_table_df <- as.data.frame(pref_table)

desired_order <- c("Preoperative\nPreference",
                   "Immediate Postoperative\nPreference",
                   "One Week Postoperative\nPreference",
                   "One Week Postactivation\nPreference")

pref_table_long <- pref_table_df %>%
  select(activation_pref, starts_with("Visit_")) %>%
  pivot_longer(
    cols = -activation_pref, 
    names_to = c("visit", ".value"),
    names_pattern = "Visit_(.*)_(.*)"
  ) %>%
  mutate(visit = case_when(
    visit == "1" ~ "Preoperative\nPreference",
    visit == "2" ~ "Immediate Postoperative\nPreference",
    visit == "3" ~ "One Week Postoperative\nPreference",      
    visit == "4" ~ "One Week Postactivation\nPreference",
    TRUE ~ visit
  )) %>%
  mutate(visit = factor(visit, levels = desired_order))

max_percent <- max(pref_table_long$percent, na.rm = TRUE)

ggplot(pref_table_long, aes(x = activation_pref, y = percent, color = visit, group = visit)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(
    x = "\nPreferred Activation Time",
    y = "Frequency (%)",
    title = "Preoperative Versus Postoperative Preferences"
  ) +
  scale_y_continuous(limits = c(0, 1.25 * max_percent)) +
  theme_minimal(base_size = 14) +
  theme(
    # Bold the title and center it
    plot.title = element_text(face = "bold", hjust = 0.5, size = 16),
    # Bold the axis titles
    axis.title.x = element_text(face = "bold", size = 14),
    axis.title.y = element_text(face = "bold", size = 14),
    # Adjust legend
    legend.title = element_blank(),
    legend.position = "bottom",
    # Optionally reduce or remove some grid lines
    panel.grid.minor = element_blank(),
    panel.grid.major.x = element_blank()
  )
Preoperative Versus Postoperative Preferences

Preoperative Versus Postoperative Preferences

# save chart as tiff
ggsave("preop_vs_postop_preferences.tiff", width = 8, height = 6, dpi = 1200, bg = "white")
# Reshape data to long format for rationale responses
reasons_long <- data_long %>%
  select(record_id, visit, hear_asap, wait_to_recover, wait_to_adjust, logistical_travel, other_yn) %>%
  pivot_longer(
    cols = c(hear_asap, wait_to_recover, wait_to_adjust, logistical_travel, other_yn),
    names_to = "reason",
    values_to = "response"
  ) %>%
  filter(response == "Y")  # Assuming responses are coded as "Y" for yes

# Summarize counts and percentages by visit and rationale
reasons_table <- reasons_long %>%
  group_by(visit, reason) %>%
  summarise(n = n(), .groups = 'drop') %>%
  group_by(visit) %>%
  mutate(percent = round(100 * n / sum(n), 1)) %>%
  pivot_wider(
    names_from = visit,
    values_from = c(n, percent),
    names_glue = "Visit_{visit}_{.value}"
  ) %>%
  arrange(reason)
# Print the rationale table
kable(reasons_table, caption = "Table 14. Rationale Responses (Yes) by Visit")
Table 14. Rationale Responses (Yes) by Visit
reason Visit_1_n Visit_2_n Visit_3_n Visit_4_n Visit_1_percent Visit_2_percent Visit_3_percent Visit_4_percent
hear_asap 23 15 16 23 38.3 37.5 37.2 40.4
logistical_travel 1 NA NA NA 1.7 NA NA NA
other_yn 1 2 NA 2 1.7 5.0 NA 3.5
wait_to_adjust 12 3 8 11 20.0 7.5 18.6 19.3
wait_to_recover 23 20 19 21 38.3 50.0 44.2 36.8
#########################################
# Rationale Analysis: Change from Visit 1 to Visit 4
#########################################
library(dplyr)
library(tidyr)
library(knitr)

# Step 1: Create a data frame with change direction.
df_rationale <- data_wide %>%
  filter(!is.na(visit_1_activation_pref_ordinal), !is.na(visit_4_activation_pref_ordinal)) %>%
  mutate(
    diff = visit_4_activation_pref_ordinal - visit_1_activation_pref_ordinal,
    change_direction = case_when(
      diff > 0 ~ "later",
      diff < 0 ~ "earlier",
      diff == 0 ~ "no_change"
    )
  )

# Define rationale columns for Visit 1 and Visit 4.
rationale_vars_visit1 <- c("visit_1_hear_asap", "visit_1_wait_to_recover", 
                           "visit_1_wait_to_adjust", "visit_1_logistical_travel", 
                           "visit_1_other_yn")
rationale_vars_visit4 <- c("visit_4_hear_asap", "visit_4_wait_to_recover", 
                           "visit_4_wait_to_adjust", "visit_4_logistical_travel", 
                           "visit_4_other_yn")

# Step 2: Pivot rationale responses to long format separately.
rationale_visit1 <- df_rationale %>%
  select(record_id, change_direction, all_of(rationale_vars_visit1)) %>%
  pivot_longer(cols = all_of(rationale_vars_visit1),
               names_to = "rationale",
               values_to = "response_visit1") %>%
  mutate(rationale = sub("visit_1_", "", rationale))

rationale_visit4 <- df_rationale %>%
  select(record_id, change_direction, all_of(rationale_vars_visit4)) %>%
  pivot_longer(cols = all_of(rationale_vars_visit4),
               names_to = "rationale",
               values_to = "response_visit4") %>%
  mutate(rationale = sub("visit_4_", "", rationale))

# Step 3: Join the two long-format data frames.
rationale_change <- inner_join(rationale_visit1, rationale_visit4, 
                               by = c("record_id", "rationale", "change_direction"))

# Step 4: Create Table A: Overall rationale responses.
overall_rationale <- rationale_change %>%
  pivot_longer(cols = c(response_visit1, response_visit4),
               names_to = "visit",
               values_to = "response") %>%
  mutate(visit = ifelse(visit == "response_visit1", "Visit 1", "Visit 4")) %>%
  filter(response == "Y") %>%  # assuming "Y" denotes a yes response
  group_by(visit, rationale) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(visit) %>%
  mutate(percentage = round(100 * count / sum(count), 1)) %>%
  ungroup()
kable(overall_rationale, caption = "Table A. Overall Rationale Responses (Yes) at Visits 1 and 4")
Table A. Overall Rationale Responses (Yes) at Visits 1 and 4
visit rationale count percentage
Visit 1 hear_asap 19 35.8
Visit 1 logistical_travel 1 1.9
Visit 1 other_yn 1 1.9
Visit 1 wait_to_adjust 10 18.9
Visit 1 wait_to_recover 22 41.5
Visit 4 hear_asap 21 38.2
Visit 4 other_yn 2 3.6
Visit 4 wait_to_adjust 11 20.0
Visit 4 wait_to_recover 21 38.2
# Step 5: Create Table B: Rationale responses by change direction subgroup.
rationale_by_group <- rationale_change %>%
  pivot_longer(cols = c(response_visit1, response_visit4),
               names_to = "visit",
               values_to = "response") %>%
  mutate(visit = ifelse(visit == "response_visit1", "Visit 1", "Visit 4")) %>%
  filter(response == "Y") %>%
  group_by(change_direction, rationale, visit) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(change_direction, rationale) %>%
  mutate(percentage = round(100 * count / sum(count), 1)) %>%
  ungroup()
kable(rationale_by_group, caption = "Table B. Rationale Responses (Yes) at Visits 1 and 4 by Change Direction Subgroup")
Table B. Rationale Responses (Yes) at Visits 1 and 4 by Change Direction Subgroup
change_direction rationale visit count percentage
earlier hear_asap Visit 4 7 100.0
earlier other_yn Visit 1 1 100.0
earlier wait_to_adjust Visit 1 2 66.7
earlier wait_to_adjust Visit 4 1 33.3
earlier wait_to_recover Visit 1 8 80.0
earlier wait_to_recover Visit 4 2 20.0
later hear_asap Visit 1 15 68.2
later hear_asap Visit 4 7 31.8
later logistical_travel Visit 1 1 100.0
later other_yn Visit 4 2 100.0
later wait_to_adjust Visit 1 4 40.0
later wait_to_adjust Visit 4 6 60.0
later wait_to_recover Visit 1 7 35.0
later wait_to_recover Visit 4 13 65.0
no_change hear_asap Visit 1 4 36.4
no_change hear_asap Visit 4 7 63.6
no_change wait_to_adjust Visit 1 4 50.0
no_change wait_to_adjust Visit 4 4 50.0
no_change wait_to_recover Visit 1 7 53.8
no_change wait_to_recover Visit 4 6 46.2
###########################################
# Updated Subgroup-Stratified McNemar’s Test
# (Skips any rationale with non–2×2 tables)
###########################################
subgroups <- unique(df_rationale$change_direction)
rationale_names <- c("hear_asap", "wait_to_recover", "wait_to_adjust",
                     "logistical_travel", "other_yn")

cat("\n=== McNemar's Test for Rationale Shifts by Subgroup (Visit 1 vs. Visit 4) ===\n")
## 
## === McNemar's Test for Rationale Shifts by Subgroup (Visit 1 vs. Visit 4) ===
for(grp in subgroups) {
  cat("\nSubgroup:", grp, "\n")
  df_grp <- df_rationale %>% filter(change_direction == grp)
  for(r in rationale_names) {
    v1_col <- paste0("visit_1_", r)
    v4_col <- paste0("visit_4_", r)
    if(!all(c(v1_col, v4_col) %in% names(df_grp))) {
      cat("Columns", v1_col, "or", v4_col, "not found\n")
      next
    }
    df_pair <- df_grp %>% filter(!is.na(.data[[v1_col]]), !is.na(.data[[v4_col]]))
    contingency <- table(df_pair[[v1_col]], df_pair[[v4_col]])
    if(!all(dim(contingency) == c(2,2))) {
      cat("Skipping rationale", r, "in subgroup", grp, "- table is not 2×2\n")
      next
    }
    test <- mcnemar.test(contingency)
    cat("\nRationale:", r, "in subgroup:", grp, "\n")
    print(contingency)
    cat("McNemar's test p-value:", round(test$p.value, 3), "\n")
  }
}
## 
## Subgroup: later 
## 
## Rationale: hear_asap in subgroup: later 
##    
##      N  Y
##   N  4  3
##   Y 11  4
## McNemar's test p-value: 0.061 
## 
## Rationale: wait_to_recover in subgroup: later 
##    
##     N Y
##   N 6 9
##   Y 3 4
## McNemar's test p-value: 0.149 
## 
## Rationale: wait_to_adjust in subgroup: later 
##    
##      N  Y
##   N 14  4
##   Y  2  2
## McNemar's test p-value: 0.683 
## Skipping rationale logistical_travel in subgroup later - table is not 2×2
## Skipping rationale other_yn in subgroup later - table is not 2×2
## 
## Subgroup: earlier 
## Skipping rationale hear_asap in subgroup earlier - table is not 2×2
## 
## Rationale: wait_to_recover in subgroup: earlier 
##    
##     N Y
##   N 2 0
##   Y 6 2
## McNemar's test p-value: 0.041 
## 
## Rationale: wait_to_adjust in subgroup: earlier 
##    
##     N Y
##   N 8 0
##   Y 1 1
## McNemar's test p-value: 1 
## Skipping rationale logistical_travel in subgroup earlier - table is not 2×2
## Skipping rationale other_yn in subgroup earlier - table is not 2×2
## 
## Subgroup: no_change 
## 
## Rationale: hear_asap in subgroup: no_change 
##    
##     N Y
##   N 5 4
##   Y 1 3
## McNemar's test p-value: 0.371 
## 
## Rationale: wait_to_recover in subgroup: no_change 
##    
##     N Y
##   N 4 2
##   Y 3 4
## McNemar's test p-value: 1 
## 
## Rationale: wait_to_adjust in subgroup: no_change 
##    
##     N Y
##   N 7 2
##   Y 2 2
## McNemar's test p-value: 1 
## Skipping rationale logistical_travel in subgroup no_change - table is not 2×2
## Skipping rationale other_yn in subgroup no_change - table is not 2×2
# Using the binary recoding to test direction via a one-sided binomial test
# Subset to patients with non-missing activation preference at Visits 1 and 4
subset_data <- data_wide %>%
  filter(!is.na(visit_1_activation_pref_ordinal),
         !is.na(visit_4_activation_pref_ordinal))
# Recode into binary categories
# "early" = levels 0, 1, 2; "standard" = levels 3, 4, 5.
subset_data <- subset_data %>%
  mutate(
    pref_bin_visit1 = ifelse(visit_1_activation_pref_ordinal <= 2, "early", "standard"),
    pref_bin_visit4 = ifelse(visit_4_activation_pref_ordinal <= 2, "early", "standard")
  )
# Create the 2x2 contingency table
pref_table_bin <- table(subset_data$pref_bin_visit1, subset_data$pref_bin_visit4)
print(pref_table_bin)
##           
##            early standard
##   early        9       15
##   standard     6       15
# Extract discordant pair counts from your 2x2 table (pref_table)
b <- pref_table_bin["early", "standard"]  # switched from ≤2 weeks to ≥3 weeks
c <- pref_table_bin["standard", "early"]   # switched from ≥3 weeks to ≤2 weeks

# Perform a one-sided binomial test on the discordant pairs
binom_test <- binom.test(b, b + c, p = 0.5, alternative = "greater")
print(binom_test)
## 
##  Exact binomial test
## 
## data:  b and b + c
## number of successes = 15, number of trials = 21, p-value = 0.03918
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
##  0.5126112 1.0000000
## sample estimates:
## probability of success 
##              0.7142857