# Load necessary packages
library(tidyverse) # dplyr, tidyr, ggplot2, etc.
library(readr) # For fast CSV reading
library(mice) # For multiple imputation if needed
library(naniar) # For visualizing missing data
library(stringr)
library(psych)
library(car)
library(ordinal) # For cumulative link mixed models
library(DescTools) # For Bowker's test if needed
library(knitr) # For pretty tables in the final document
# Specify the file path (update if necessary)
file.path <- "/Users/armansaeedi/Desktop/ArmanSaeedi/Med School/Phase V/Post Match/VCU/Coelho/CI Activation Study/Updated DAta/Dataset_3_3_Wide.csv"
data_wide <- read_csv(file.path)
#rename all columns to lower
data_wide <- data_wide %>%
rename_all(tolower)
#remove rows with no value in record id
#rename record id to record_id
data_wide <- data_wide %>%
filter(!is.na(`record id`)) %>%
rename(record_id = `record id`)
data_wide <- data_wide %>%
# Extract numeric hearing loss duration values (up to three decimals) and set 0 values to NA
mutate(`hearing loss duration (years)` = as.numeric(str_extract(`hearing loss duration`, "\\d+\\.?\\d{0,3}")),
`hearing loss duration (years)` = na_if(`hearing loss duration (years)`, 0)) %>%
# Create categorical variable based on the numeric value; assign NA if the value is missing
mutate(`hearing loss duration category` = case_when(
is.na(`hearing loss duration (years)`) ~ NA_character_,
`hearing loss duration (years)` <= 5 ~ "0 to 5 years",
`hearing loss duration (years)` > 5 & `hearing loss duration (years)` <= 10 ~ "> 5 to 10 years",
`hearing loss duration (years)` > 10 & `hearing loss duration (years)` <= 15 ~ "> 10 to 15 years",
`hearing loss duration (years)` > 15 & `hearing loss duration (years)` <= 20 ~ "> 15 to 20 years",
`hearing loss duration (years)` > 20 ~ "> 20 years",
TRUE ~ NA_character_
)) %>%
# Reorder columns to bring the new hearing loss columns to the front
select(`record_id`, `hearing loss duration (years)`, `hearing loss duration category`, everything()) %>%
# Rename the new columns for clarity
rename(hld_years = `hearing loss duration (years)`,
hld_category = `hearing loss duration category`) %>%
# Remove the original 'hearing loss duration' column
select(-`hearing loss duration`)
# Impute missing hearing loss duration using linear regression with age as predictor
data_wide <- data_wide %>%
mutate(hld_years = ifelse(is.na(hld_years),
predict(lm(hld_years ~ age, data = .), .),
hld_years))
# Re-categorize hld_years
data_wide <- data_wide %>%
mutate(hld_category = case_when(
is.na(hld_years) ~ NA_character_,
hld_years <= 5 ~ "0 to 5 years",
hld_years > 5 & hld_years <= 10 ~ "> 5 to 10 years",
hld_years > 10 & hld_years <= 15 ~ "> 10 to 15 years",
hld_years > 15 & hld_years <= 20 ~ "> 15 to 20 years",
hld_years > 20 ~ "> 20 years",
TRUE ~ NA_character_
))
# Impute missing total using regression with age and hld_years
data_wide <- data_wide %>%
mutate(total = ifelse(is.na(total),
predict(lm(total ~ age + hld_years, data = .), .),
total))
# Impute missing CIQOL-10 values
data_wide <- data_wide %>%
mutate(score_conversion = ifelse(is.na(score_conversion), 43.62, score_conversion),
se_converted = ifelse(is.na(se_converted), 3.12, se_converted))
# Rename "brand of ci" to ci_brand for consistency
data_wide <- data_wide %>%
rename(ci_brand = `brand of ci`)
# Subset relevant columns for descriptives
descriptives_df <- data_wide %>%
select(age, sex, bmi, hld_years, hld_category, ci_brand, processor, score_conversion, se_converted)
# Continuous descriptive statistics using psych::describe
continuous_vars <- describe(descriptives_df %>% select(age, bmi, hld_years, score_conversion, se_converted))
print("Continuous Variables:")
## [1] "Continuous Variables:"
print(continuous_vars)
## vars n mean sd median trimmed mad min max range
## age 1 52 65.60 15.96 64.03 66.32 13.92 31.80 95.06 63.26
## bmi 2 52 28.19 5.52 27.65 28.02 5.51 17.33 42.38 25.05
## hld_years 3 52 16.67 15.60 13.50 13.86 11.12 0.25 65.00 64.75
## score_conversion 4 52 39.23 8.67 39.56 39.67 8.12 18.53 56.45 37.92
## se_converted 5 52 3.29 0.34 3.18 3.20 0.09 3.11 4.77 1.66
## skew kurtosis se
## age -0.25 -0.72 2.21
## bmi 0.35 -0.25 0.77
## hld_years 1.56 2.04 2.16
## score_conversion -0.37 -0.34 1.20
## se_converted 2.75 7.26 0.05
# Calculate IQR for hld_years
hld_iqr <- descriptives_df %>%
summarise(median = median(hld_years, na.rm = TRUE),
q1 = quantile(hld_years, 0.25, na.rm = TRUE),
q3 = quantile(hld_years, 0.75, na.rm = TRUE),
iqr = q3 - q1)
print(hld_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 13.5 5 20 15
# Categorical variables
print("Categorical Variables:")
## [1] "Categorical Variables:"
print("Sex Distribution:")
## [1] "Sex Distribution:"
sex_table <- table(descriptives_df$sex)
print(prop.table(sex_table))
##
## F M
## 0.5384615 0.4615385
print("Brand of CI Distribution:")
## [1] "Brand of CI Distribution:"
brand_distribution <- descriptives_df %>%
count(ci_brand) %>%
mutate(percentage = n / sum(n) * 100)
print(brand_distribution)
## # A tibble: 3 × 3
## ci_brand n percentage
## <chr> <int> <dbl>
## 1 AB 17 32.7
## 2 CA 32 61.5
## 3 MedEl 3 5.77
print("Processor Distribution:")
## [1] "Processor Distribution:"
processor_distribution <- descriptives_df %>%
count(processor) %>%
mutate(percentage = n / sum(n) * 100)
print(processor_distribution)
## # A tibble: 6 × 3
## processor n percentage
## <chr> <int> <dbl>
## 1 Kanso 2 1 1.92
## 2 N8 16 30.8
## 3 N8/K2 17 32.7
## 4 Naida M90 15 28.8
## 5 Sonnet 2 2 3.85
## 6 Sonnet 2, Rondo 3 1 1.92
print("Hearing Loss Duration Category:")
## [1] "Hearing Loss Duration Category:"
hld_distribution <- descriptives_df %>%
count(hld_category) %>%
mutate(percentage = n / sum(n) * 100)
print(hld_distribution)
## # A tibble: 5 × 3
## hld_category n percentage
## <chr> <int> <dbl>
## 1 0 to 5 years 14 26.9
## 2 > 10 to 15 years 4 7.69
## 3 > 15 to 20 years 13 25
## 4 > 20 years 11 21.2
## 5 > 5 to 10 years 10 19.2
#check normality visually with histograms and Q-Q plots:
continuous_vars <- c("age", "bmi", "hld_years", "score_conversion", "se_converted")
# Histograms and Q-Q Plots
par(mfrow=c(2,2))
for(var in continuous_vars){
hist(data_wide[[var]], main=paste("Histogram of", var), xlab=var, col='lightblue')
qqPlot(data_wide[[var]], main=paste("Q-Q plot of", var))
}


# Shapiro-Wilk tests for normality
normality_tests <- lapply(continuous_vars, function(v) {
shapiro.test(data_wide[[v]])
})
names(normality_tests) <- continuous_vars
normality_tests
## $age
##
## Shapiro-Wilk normality test
##
## data: data_wide[[v]]
## W = 0.96612, p-value = 0.1441
##
##
## $bmi
##
## Shapiro-Wilk normality test
##
## data: data_wide[[v]]
## W = 0.98314, p-value = 0.6665
##
##
## $hld_years
##
## Shapiro-Wilk normality test
##
## data: data_wide[[v]]
## W = 0.81611, p-value = 1.451e-06
##
##
## $score_conversion
##
## Shapiro-Wilk normality test
##
## data: data_wide[[v]]
## W = 0.97816, p-value = 0.4508
##
##
## $se_converted
##
## Shapiro-Wilk normality test
##
## data: data_wide[[v]]
## W = 0.55167, p-value = 2.312e-11
# Log transform hld_years due to skew
data_wide$log_hld_years <- log(data_wide$hld_years + 1) # Add 1 to avoid log(0)
# Re-check normality
hist(data_wide$log_hld_years, main="Histogram of log-transformed hld_years", col='lightblue')
qqPlot(data_wide$log_hld_years, main="QQ plot of log-transformed hld_years")

## [1] 9 21
shapiro.test(data_wide$log_hld_years)
##
## Shapiro-Wilk normality test
##
## data: data_wide$log_hld_years
## W = 0.97506, p-value = 0.3421
#append visit_1_ to overall_1_10, pain_1_10, dizziness_1_10, energy_1_10
data_wide <- data_wide %>%
rename(visit_1_overall_1_10 = overall_1_10,
visit_1_pain_1_10 = pain_1_10,
visit_1_dizziness_1_10 = dizziness_1_10,
visit_1_energy_1_10 = energy_1_10)
descriptives_VAS <- data_wide %>%
select(visit_1_overall_1_10, visit_1_pain_1_10, visit_1_dizziness_1_10, visit_1_energy_1_10, visit_2_overall_1_10, visit_2_pain_1_10, visit_2_dizziness_1_10, visit_2_energy_1_10, visit_3_overall_1_10, visit_3_pain_1_10, visit_3_dizziness_1_10, visit_3_energy_1_10, visit_4_overall_1_10, visit_4_pain_1_10, visit_4_dizziness_1_10, visit_4_energy_1_10)
#store IQR variables in a list
iqr_vars <- c("visit_1_overall_iqr", "visit_2_overall_iqr", "visit_3_overall_iqr", "visit_4_overall_iqr", "visit_1_pain_iqr", "visit_2_pain_iqr", "visit_3_pain_iqr", "visit_4_pain_iqr", "visit_1_dizziness_iqr", "visit_2_dizziness_iqr", "visit_3_dizziness_iqr", "visit_4_dizziness_iqr", "visit_1_energy_iqr", "visit_2_energy_iqr", "visit_3_energy_iqr", "visit_4_energy_iqr")
#calculate median and interquartile range for overall, dizziness, pain, and energy at each visit
visit_1_overall_iqr <- descriptives_VAS %>%
summarise(median = median(visit_1_overall_1_10, na.rm = TRUE),
q1 = quantile(visit_1_overall_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_1_overall_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_2_overall_iqr <- descriptives_VAS %>%
summarise(median = median(visit_2_overall_1_10, na.rm = TRUE),
q1 = quantile(visit_2_overall_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_2_overall_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_3_overall_iqr <- descriptives_VAS %>%
summarise(median = median(visit_3_overall_1_10, na.rm = TRUE),
q1 = quantile(visit_3_overall_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_3_overall_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_4_overall_iqr <- descriptives_VAS %>%
summarise(median = median(visit_4_overall_1_10, na.rm = TRUE),
q1 = quantile(visit_4_overall_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_4_overall_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_1_pain_iqr <- descriptives_VAS %>%
summarise(median = median(visit_1_pain_1_10, na.rm = TRUE),
q1 = quantile(visit_1_pain_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_1_pain_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_2_pain_iqr <- descriptives_VAS %>%
summarise(median = median(visit_2_pain_1_10, na.rm = TRUE),
q1 = quantile(visit_2_pain_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_2_pain_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_3_pain_iqr <- descriptives_VAS %>%
summarise(median = median(visit_3_pain_1_10, na.rm = TRUE),
q1 = quantile(visit_3_pain_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_3_pain_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_4_pain_iqr <- descriptives_VAS %>%
summarise(median = median(visit_4_pain_1_10, na.rm = TRUE),
q1 = quantile(visit_4_pain_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_4_pain_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_1_dizziness_iqr <- descriptives_VAS %>%
summarise(median = median(visit_1_dizziness_1_10, na.rm = TRUE),
q1 = quantile(visit_1_dizziness_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_1_dizziness_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_2_dizziness_iqr <- descriptives_VAS %>%
summarise(median = median(visit_2_dizziness_1_10, na.rm = TRUE),
q1 = quantile(visit_2_dizziness_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_2_dizziness_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_3_dizziness_iqr <- descriptives_VAS %>%
summarise(median = median(visit_3_dizziness_1_10, na.rm = TRUE),
q1 = quantile(visit_3_dizziness_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_3_dizziness_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_4_dizziness_iqr <- descriptives_VAS %>%
summarise(median = median(visit_4_dizziness_1_10, na.rm = TRUE),
q1 = quantile(visit_4_dizziness_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_4_dizziness_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_1_energy_iqr <- descriptives_VAS %>%
summarise(median = median(visit_1_energy_1_10, na.rm = TRUE),
q1 = quantile(visit_1_energy_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_1_energy_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_2_energy_iqr <- descriptives_VAS %>%
summarise(median = median(visit_2_energy_1_10, na.rm = TRUE),
q1 = quantile(visit_2_energy_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_2_energy_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_3_energy_iqr <- descriptives_VAS %>%
summarise(median = median(visit_3_energy_1_10, na.rm = TRUE),
q1 = quantile(visit_3_energy_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_3_energy_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
visit_4_energy_iqr <- descriptives_VAS %>%
summarise(median = median(visit_4_energy_1_10, na.rm = TRUE),
q1 = quantile(visit_4_energy_1_10, 0.25, na.rm = TRUE),
q3 = quantile(visit_4_energy_1_10, 0.75, na.rm = TRUE),
iqr = q3 - q1)
#print all IQR variables
print(visit_1_overall_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 9 7 10 3
print(visit_2_overall_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 5.5 3 7 4
print(visit_3_overall_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 8 7 9 2
print(visit_4_overall_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 9 7 10 3
print(visit_1_pain_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 0 0 1.25 1.25
print(visit_2_pain_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 5 4 7 3
print(visit_3_pain_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 0 0 2 2
print(visit_4_pain_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 0 0 0 0
print(visit_1_dizziness_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 0 0 1.25 1.25
print(visit_2_dizziness_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 1.5 0 5 5
print(visit_3_dizziness_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 2 0 5 5
print(visit_4_dizziness_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 0 0 1 1
print(visit_1_energy_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 8 5.75 8.25 2.5
print(visit_2_energy_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 3 2 5.25 3.25
print(visit_3_energy_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 7 5 8 3
print(visit_4_energy_iqr)
## # A tibble: 1 × 4
## median q1 q3 iqr
## <dbl> <dbl> <dbl> <dbl>
## 1 8 6 8.5 2.5
#Missingness check
cat("=== Missingness Check ===\n")
## === Missingness Check ===
missingness <- data_wide %>%
summarise(across(starts_with("visit_"), ~sum(is.na(.x)))) %>%
select(contains("activation_pref_ordinal"))
print(missingness)
## # A tibble: 1 × 4
## visit_1_activation_pref_ordinal visit_2_activation_pr…¹ visit_3_activation_p…²
## <int> <int> <int>
## 1 1 20 19
## # ℹ abbreviated names: ¹visit_2_activation_pref_ordinal,
## # ²visit_3_activation_pref_ordinal
## # ℹ 1 more variable: visit_4_activation_pref_ordinal <int>
# Calculate and print complete-case counts for activation preference comparisons
for(v in 1:4){
count_complete <- data_wide %>%
filter(!is.na(visit_1_activation_pref_ordinal),
!is.na(.data[[paste0("visit_", v, "_activation_pref_ordinal")]])) %>%
nrow()
cat("Complete-case count for Visit", v, "comparison:", count_complete, "\n")
}
## Complete-case count for Visit 1 comparison: 51
## Complete-case count for Visit 2 comparison: 32
## Complete-case count for Visit 3 comparison: 33
## Complete-case count for Visit 4 comparison: 45
#########################################
# Section 2: Correlation Analyses for Continuous Variables
#########################################
# Calculate Spearman correlations for baseline and VAS variables at each visit
baseline_vars <- c("age", "bmi", "hld_years", "log_hld_years", "score_conversion")
vas_vars <- c("overall_1_10", "pain_1_10", "dizziness_1_10", "energy_1_10")
visits <- 1:4
all_correlations <- data.frame()
for(v in visits){
act_var <- paste0("visit_", v, "_activation_pref_ordinal")
df_v <- data_wide %>% filter(!is.na(.data[[act_var]]))
for(var in baseline_vars){
if(sum(!is.na(df_v[[var]])) > 5){
test <- cor.test(df_v[[var]], df_v[[act_var]], method = "spearman")
all_correlations <- bind_rows(all_correlations, data.frame(
visit = v, variable = var, rho = round(test$estimate, 3), p.value = round(test$p.value, 3)
))
}
}
for(var in vas_vars){
vas_var <- paste0("visit_", v, "_", var)
if(sum(!is.na(df_v[[vas_var]])) > 5){
test <- cor.test(df_v[[vas_var]], df_v[[act_var]], method = "spearman")
all_correlations <- bind_rows(all_correlations, data.frame(
visit = v, variable = vas_var, rho = round(test$estimate, 3), p.value = round(test$p.value, 3)
))
}
}
}
kable(all_correlations, caption = "Table 2. Spearman Correlations for Baseline and VAS Variables")
Table 2. Spearman Correlations for Baseline and VAS
Variables
| rho…1 |
1 |
age |
-0.146 |
0.306 |
| rho…2 |
1 |
bmi |
0.068 |
0.636 |
| rho…3 |
1 |
hld_years |
-0.053 |
0.713 |
| rho…4 |
1 |
log_hld_years |
-0.053 |
0.713 |
| rho…5 |
1 |
score_conversion |
-0.081 |
0.573 |
| rho…6 |
1 |
visit_1_overall_1_10 |
-0.074 |
0.606 |
| rho…7 |
1 |
visit_1_pain_1_10 |
-0.050 |
0.729 |
| rho…8 |
1 |
visit_1_dizziness_1_10 |
0.004 |
0.975 |
| rho…9 |
1 |
visit_1_energy_1_10 |
0.070 |
0.625 |
| rho…10 |
2 |
age |
0.058 |
0.752 |
| rho…11 |
2 |
bmi |
-0.068 |
0.710 |
| rho…12 |
2 |
hld_years |
-0.272 |
0.132 |
| rho…13 |
2 |
log_hld_years |
-0.272 |
0.132 |
| rho…14 |
2 |
score_conversion |
0.228 |
0.209 |
| rho…15 |
2 |
visit_2_overall_1_10 |
-0.099 |
0.588 |
| rho…16 |
2 |
visit_2_pain_1_10 |
-0.166 |
0.363 |
| rho…17 |
2 |
visit_2_dizziness_1_10 |
0.378 |
0.033 |
| rho…18 |
2 |
visit_2_energy_1_10 |
-0.024 |
0.897 |
| rho…19 |
3 |
age |
-0.006 |
0.973 |
| rho…20 |
3 |
bmi |
-0.341 |
0.052 |
| rho…21 |
3 |
hld_years |
-0.160 |
0.375 |
| rho…22 |
3 |
log_hld_years |
-0.160 |
0.375 |
| rho…23 |
3 |
score_conversion |
0.065 |
0.721 |
| rho…24 |
3 |
visit_3_overall_1_10 |
-0.310 |
0.079 |
| rho…25 |
3 |
visit_3_pain_1_10 |
-0.281 |
0.114 |
| rho…26 |
3 |
visit_3_dizziness_1_10 |
0.020 |
0.913 |
| rho…27 |
3 |
visit_3_energy_1_10 |
-0.112 |
0.536 |
| rho…28 |
4 |
age |
-0.216 |
0.149 |
| rho…29 |
4 |
bmi |
-0.147 |
0.329 |
| rho…30 |
4 |
hld_years |
0.172 |
0.253 |
| rho…31 |
4 |
log_hld_years |
0.172 |
0.253 |
| rho…32 |
4 |
score_conversion |
-0.061 |
0.685 |
| rho…33 |
4 |
visit_4_overall_1_10 |
0.033 |
0.826 |
| rho…34 |
4 |
visit_4_pain_1_10 |
0.115 |
0.448 |
| rho…35 |
4 |
visit_4_dizziness_1_10 |
-0.043 |
0.775 |
| rho…36 |
4 |
visit_4_energy_1_10 |
0.002 |
0.989 |
#########################################
# Section 3: Dichotomous Variable Analysis for sex
#########################################
# Perform Wilcoxon tests for sex at each visit
sex_results <- data.frame()
for(v in visits){
act_var <- paste0("visit_", v, "_activation_pref_ordinal")
df_v <- data_wide %>% filter(!is.na(.data[[act_var]]))
test <- wilcox.test(df_v[[act_var]] ~ df_v$sex)
sex_results <- bind_rows(sex_results, data.frame(
Visit = v, W_Statistic = round(test$statistic, 3), p_value = round(test$p.value, 3)
))
}
kable(sex_results, caption = "Table 3. Sex Comparisons (Wilcoxon Rank-Sum Test) by Visit")
Table 3. Sex Comparisons (Wilcoxon Rank-Sum Test) by
Visit
| W…1 |
1 |
381.0 |
0.276 |
| W…2 |
2 |
119.5 |
1.000 |
| W…3 |
3 |
178.5 |
0.090 |
| W…4 |
4 |
253.5 |
0.847 |
#########################################
# Section 4: Categorical Variable Comparisons (ci_brand, processor)
#########################################
# Categorical comparisons for ci_brand and processor
catcomp <- data.frame()
cat_vars <- c("ci_brand", "processor")
for(v in visits){
act_var <- paste0("visit_", v, "_activation_pref_ordinal")
df_v <- data_wide %>% filter(!is.na(.data[[act_var]]))
for(cat_var in cat_vars){
test <- kruskal.test(df_v[[act_var]] ~ as.factor(df_v[[cat_var]]))
catcomp <- bind_rows(catcomp, data.frame(
Visit = v, Variable = cat_var, Chi_Sq = round(test$statistic, 3), p_value = round(test$p.value, 3)
))
}
}
kable(catcomp, caption = "Table 4. Categorical Comparisons for CI Brand and Processor by Visit")
Table 4. Categorical Comparisons for CI Brand and Processor by
Visit
| Kruskal-Wallis chi-squared…1 |
1 |
ci_brand |
1.325 |
0.516 |
| Kruskal-Wallis chi-squared…2 |
1 |
processor |
8.927 |
0.112 |
| Kruskal-Wallis chi-squared…3 |
2 |
ci_brand |
0.769 |
0.681 |
| Kruskal-Wallis chi-squared…4 |
2 |
processor |
2.928 |
0.570 |
| Kruskal-Wallis chi-squared…5 |
3 |
ci_brand |
3.668 |
0.160 |
| Kruskal-Wallis chi-squared…6 |
3 |
processor |
4.430 |
0.351 |
| Kruskal-Wallis chi-squared…7 |
4 |
ci_brand |
4.292 |
0.117 |
| Kruskal-Wallis chi-squared…8 |
4 |
processor |
5.920 |
0.314 |
#########################################
# Section 5: Wilcoxon Signed-Rank Tests (Visit 1 vs. Visits 2, 3, and 4)
#########################################
# Wilcoxon signed-rank tests for change in activation preference (Visit 1 vs. Visits 2, 3, 4)
wilcox_results <- data.frame()
for(v in 2:4){
df_v <- data_wide %>% filter(!is.na(visit_1_activation_pref_ordinal),
!is.na(.data[[paste0("visit_", v, "_activation_pref_ordinal")]]))
# Calculate differences and exclude tied pairs (where difference == 0)
diff_vals <- df_v$visit_1_activation_pref_ordinal - df_v[[paste0("visit_", v, "_activation_pref_ordinal")]]
nonzero_df <- df_v[diff_vals != 0, ]
# Get sample size for non-tied pairs
n <- nrow(nonzero_df)
# Run the paired Wilcoxon test on non-tied data
test <- wilcox.test(nonzero_df$visit_1_activation_pref_ordinal,
nonzero_df[[paste0("visit_", v, "_activation_pref_ordinal")]], paired = TRUE)
wilcox_results <- bind_rows(wilcox_results, data.frame(
Comparison = paste("Visit 1 vs Visit", v),
n = n,
V_Statistic = round(test$statistic, 3),
p_value = round(test$p.value, 3)
))
}
kable(wilcox_results, caption = "Table 5. Wilcoxon Signed-Rank Test Results for Change in Activation Preference")
Table 5. Wilcoxon Signed-Rank Test Results for Change in
Activation Preference
| V…1 |
Visit 1 vs Visit 2 |
21 |
97.5 |
0.538 |
| V…2 |
Visit 1 vs Visit 3 |
21 |
77.0 |
0.182 |
| V…3 |
Visit 1 vs Visit 4 |
32 |
130.0 |
0.012 |
#########################################
# Section 6: Subgroup Analysis for Significant Change (Example: Visit 4)
#########################################
# Subgroup analysis for Visit 4 (Changed vs. No Change)
df_v4 <- data_wide %>%
filter(!is.na(visit_1_activation_pref_ordinal), !is.na(visit_4_activation_pref_ordinal)) %>%
mutate(
diff = visit_4_activation_pref_ordinal - visit_1_activation_pref_ordinal,
change_binary = ifelse(diff != 0, "changed", "no_change"),
change_direction = case_when(
diff > 0 ~ "later",
diff < 0 ~ "earlier",
diff == 0 ~ "no_change"
)
)
cat("Complete-case count for Visit 4 comparison:", nrow(df_v4), "\n")
## Complete-case count for Visit 4 comparison: 45
kable(as.data.frame(table(df_v4$change_binary)), caption = "Table 6. Frequency of Change (Changed vs. No Change) at Visit 4")
Table 6. Frequency of Change (Changed vs. No Change) at Visit
4
| changed |
32 |
| no_change |
13 |
kable(as.data.frame(table(df_v4$change_direction)), caption = "Table 7. Frequency of Change Direction at Visit 4")
Table 7. Frequency of Change Direction at Visit 4
| earlier |
10 |
| later |
22 |
| no_change |
13 |
# Subgroup comparisons: Continuous variables
subgroup_cont <- data.frame(
Variable = c("Age", "BMI", "hld_years", "score_conversion"),
Test = c("t-test", "t-test", "Wilcoxon", "t-test")
)
subgroup_cont$Statistic <- c(
round(t.test(age ~ change_binary, data = df_v4, var.equal = TRUE)$statistic, 3),
round(t.test(bmi ~ change_binary, data = df_v4, var.equal = TRUE)$statistic, 3),
round(wilcox.test(hld_years ~ change_binary, data = df_v4)$statistic, 3),
round(t.test(score_conversion ~ change_binary, data = df_v4, var.equal = TRUE)$statistic, 3)
)
subgroup_cont$p_value <- c(
round(t.test(age ~ change_binary, data = df_v4, var.equal = TRUE)$p.value, 3),
round(t.test(bmi ~ change_binary, data = df_v4, var.equal = TRUE)$p.value, 3),
round(wilcox.test(hld_years ~ change_binary, data = df_v4)$p.value, 3),
round(t.test(score_conversion ~ change_binary, data = df_v4, var.equal = TRUE)$p.value, 3)
)
kable(subgroup_cont, caption = "Table 8. Subgroup Comparisons for Continuous Variables (Changed vs. No Change) at Visit 4")
Table 8. Subgroup Comparisons for Continuous Variables (Changed
vs. No Change) at Visit 4
| Age |
t-test |
0.346 |
0.731 |
| BMI |
t-test |
0.205 |
0.839 |
| hld_years |
Wilcoxon |
205.500 |
0.960 |
| score_conversion |
t-test |
0.596 |
0.554 |
# Subgroup comparison for sex
sex_subgroup <- data.frame(
Variable = "sex",
Test = "Fisher's Exact Test",
p_value = round(fisher.test(table(df_v4$sex, df_v4$change_binary))$p.value, 3)
)
kable(sex_subgroup, caption = "Table 9. Subgroup Comparison for Sex at Visit 4")
Table 9. Subgroup Comparison for Sex at Visit 4
| sex |
Fisher’s Exact Test |
1 |
# Subgroup comparison for CI brand
ci_brand_subgroup <- data.frame(
Variable = "ci_brand",
Test = "Fisher's Exact Test",
p_value = round(fisher.test(table(df_v4$ci_brand, df_v4$change_binary))$p.value, 3)
)
kable(ci_brand_subgroup, caption = "Table 10. Subgroup Comparison for CI Brand at Visit 4")
Table 10. Subgroup Comparison for CI Brand at Visit 4
| ci_brand |
Fisher’s Exact Test |
0.074 |
#subgroup comparisons for processor
processor_subgroup <- data.frame(
Variable = "processor",
Test = "Fisher's Exact Test",
p_value = round(fisher.test(table(df_v4$processor, df_v4$change_binary))$p.value, 3)
)
kable(processor_subgroup, caption = "Table 11. Subgroup Comparison for Processor at Visit 4")
Table 11. Subgroup Comparison for Processor at Visit
4
| processor |
Fisher’s Exact Test |
0.289 |
# Subgroup comparisons for VAS scores at Visit 4
subgroup_vas <- data.frame(
Variable = c("visit_4_overall_1_10", "visit_4_pain_1_10", "visit_4_dizziness_1_10", "visit_4_energy_1_10"),
Test = rep("Wilcoxon", 4)
)
subgroup_vas$Statistic <- c(
round(wilcox.test(df_v4$visit_4_overall_1_10 ~ df_v4$change_binary)$statistic, 3),
round(wilcox.test(df_v4$visit_4_pain_1_10 ~ df_v4$change_binary)$statistic, 3),
round(wilcox.test(df_v4$visit_4_dizziness_1_10 ~ df_v4$change_binary)$statistic, 3),
round(wilcox.test(df_v4$visit_4_energy_1_10 ~ df_v4$change_binary)$statistic, 3)
)
subgroup_vas$p_value <- c(
round(wilcox.test(df_v4$visit_4_overall_1_10 ~ df_v4$change_binary)$p.value, 3),
round(wilcox.test(df_v4$visit_4_pain_1_10 ~ df_v4$change_binary)$p.value, 3),
round(wilcox.test(df_v4$visit_4_dizziness_1_10 ~ df_v4$change_binary)$p.value, 3),
round(wilcox.test(df_v4$visit_4_energy_1_10 ~ df_v4$change_binary)$p.value, 3)
)
kable(subgroup_vas, caption = "Table 12. Subgroup Comparisons for VAS Scores at Visit 4 (Changed vs. No Change)")
Table 12. Subgroup Comparisons for VAS Scores at Visit 4
(Changed vs. No Change)
| visit_4_overall_1_10 |
Wilcoxon |
259 |
0.193 |
| visit_4_pain_1_10 |
Wilcoxon |
166 |
0.119 |
| visit_4_dizziness_1_10 |
Wilcoxon |
192 |
0.636 |
| visit_4_energy_1_10 |
Wilcoxon |
218 |
0.809 |
#########################################
# Section 7: Directional Subgroup Analysis for Visit 4
# (Pairwise comparisons among "earlier", "later", and "no_change")
#########################################
# Directional Subgroup Analysis for Visit 4
cat("\n=== Directional Subgroup Analysis for Visit 4 ===\n")
##
## === Directional Subgroup Analysis for Visit 4 ===
print(table(df_v4$change_direction))
##
## earlier later no_change
## 10 22 13
# Helper function for continuous pairwise comparisons:
pairwise_test_cont <- function(data, var, group1, group2, normal = FALSE) {
subdata <- data %>% filter(change_direction %in% c(group1, group2))
if(nrow(subdata) < 5) {
cat("Not enough data for", var, "(", group1, "vs", group2, ")\n")
return(NULL)
}
if(normal) {
test <- t.test(as.formula(paste(var, "~ change_direction")), data = subdata)
cat(var, "(", group1, "vs", group2, ") t-test p-value:", round(test$p.value, 3), "\n")
} else {
test <- wilcox.test(as.formula(paste(var, "~ change_direction")), data = subdata)
cat(var, "(", group1, "vs", group2, ") Wilcoxon p-value:", round(test$p.value, 3), "\n")
}
}
cat("\n--- Directional Analysis for Continuous Variables ---\n")
##
## --- Directional Analysis for Continuous Variables ---
cont_vars <- c("age", "bmi", "hld_years", "score_conversion")
for(var in cont_vars) {
cat("\nTesting", var, ":\n")
pairwise_test_cont(df_v4, var, "earlier", "no_change", normal = (var %in% c("age", "bmi", "score_conversion")))
pairwise_test_cont(df_v4, var, "later", "no_change", normal = (var %in% c("age", "bmi", "score_conversion")))
pairwise_test_cont(df_v4, var, "earlier", "later", normal = (var %in% c("age", "bmi", "score_conversion")))
}
##
## Testing age :
## age ( earlier vs no_change ) t-test p-value: 0.91
## age ( later vs no_change ) t-test p-value: 0.708
## age ( earlier vs later ) t-test p-value: 0.806
##
## Testing bmi :
## bmi ( earlier vs no_change ) t-test p-value: 0.503
## bmi ( later vs no_change ) t-test p-value: 0.929
## bmi ( earlier vs later ) t-test p-value: 0.366
##
## Testing hld_years :
## hld_years ( earlier vs no_change ) Wilcoxon p-value: 0.576
## hld_years ( later vs no_change ) Wilcoxon p-value: 0.824
## hld_years ( earlier vs later ) Wilcoxon p-value: 0.337
##
## Testing score_conversion :
## score_conversion ( earlier vs no_change ) t-test p-value: 0.715
## score_conversion ( later vs no_change ) t-test p-value: 0.555
## score_conversion ( earlier vs later ) t-test p-value: 0.918
cat("\n--- Directional Analysis for Categorical Variable: sex ---\n")
##
## --- Directional Analysis for Categorical Variable: sex ---
subdata <- df_v4 %>% filter(change_direction %in% c("earlier", "no_change"))
cat("sex (earlier vs. no_change) p-value:", round(fisher.test(table(subdata$sex, subdata$change_direction))$p.value, 3), "\n")
## sex (earlier vs. no_change) p-value: 0.669
subdata <- df_v4 %>% filter(change_direction %in% c("later", "no_change"))
cat("sex (later vs. no_change) p-value:", round(fisher.test(table(subdata$sex, subdata$change_direction))$p.value, 3), "\n")
## sex (later vs. no_change) p-value: 0.733
subdata <- df_v4 %>% filter(change_direction %in% c("earlier", "later"))
cat("sex (earlier vs. later) p-value:", round(fisher.test(table(subdata$sex, subdata$change_direction))$p.value, 3), "\n")
## sex (earlier vs. later) p-value: 0.265
cat("\n--- Directional Analysis for Categorical Variable: CI brand ---\n")
##
## --- Directional Analysis for Categorical Variable: CI brand ---
subdata <- df_v4 %>% filter(change_direction %in% c("earlier", "no_change"))
cat("ci_brand (earlier vs. no_change) p-value:", round(fisher.test(table(subdata$ci_brand, subdata$change_direction))$p.value, 3), "\n")
## ci_brand (earlier vs. no_change) p-value: 0.629
subdata <- df_v4 %>% filter(change_direction %in% c("later", "no_change"))
cat("ci_brand (later vs. no_change) p-value:", round(fisher.test(table(subdata$ci_brand, subdata$change_direction))$p.value, 3), "\n")
## ci_brand (later vs. no_change) p-value: 0.04
subdata <- df_v4 %>% filter(change_direction %in% c("earlier", "later"))
cat("ci_brand (earlier vs. later) p-value:", round(fisher.test(table(subdata$ci_brand, subdata$change_direction))$p.value, 3), "\n")
## ci_brand (earlier vs. later) p-value: 0.454
cat("\n--- Directional Analysis for Categorical Variable: CI processor ---\n")
##
## --- Directional Analysis for Categorical Variable: CI processor ---
subdata <- df_v4 %>% filter(change_direction %in% c("earlier", "no_change"))
cat("processor (earlier vs. no_change) p-value:", round(fisher.test(table(subdata$processor, subdata$change_direction))$p.value, 3), "\n")
## processor (earlier vs. no_change) p-value: 0.446
subdata <- df_v4 %>% filter(change_direction %in% c("later", "no_change"))
cat("processor (later vs. no_change) p-value:", round(fisher.test(table(subdata$processor, subdata$change_direction))$p.value, 3), "\n")
## processor (later vs. no_change) p-value: 0.411
subdata <- df_v4 %>% filter(change_direction %in% c("earlier", "later"))
cat("\n--- Directional Analysis for VAS Scores (Visit 4) ---\n")
##
## --- Directional Analysis for VAS Scores (Visit 4) ---
for(var in vas_vars){
vas_var_name <- paste0("visit_4_", var)
cat("\nTesting", vas_var_name, ":\n")
pairwise_test_cont(df_v4, vas_var_name, "earlier", "no_change", normal = FALSE)
pairwise_test_cont(df_v4, vas_var_name, "later", "no_change", normal = FALSE)
pairwise_test_cont(df_v4, vas_var_name, "earlier", "later", normal = FALSE)
}
##
## Testing visit_4_overall_1_10 :
## visit_4_overall_1_10 ( earlier vs no_change ) Wilcoxon p-value: 0.57
## visit_4_overall_1_10 ( later vs no_change ) Wilcoxon p-value: 0.15
## visit_4_overall_1_10 ( earlier vs later ) Wilcoxon p-value: 0.608
##
## Testing visit_4_pain_1_10 :
## visit_4_pain_1_10 ( earlier vs no_change ) Wilcoxon p-value: 0.229
## visit_4_pain_1_10 ( later vs no_change ) Wilcoxon p-value: 0.187
## visit_4_pain_1_10 ( earlier vs later ) Wilcoxon p-value: 0.777
##
## Testing visit_4_dizziness_1_10 :
## visit_4_dizziness_1_10 ( earlier vs no_change ) Wilcoxon p-value: 0.703
## visit_4_dizziness_1_10 ( later vs no_change ) Wilcoxon p-value: 0.678
## visit_4_dizziness_1_10 ( earlier vs later ) Wilcoxon p-value: 1
##
## Testing visit_4_energy_1_10 :
## visit_4_energy_1_10 ( earlier vs no_change ) Wilcoxon p-value: 0.75
## visit_4_energy_1_10 ( later vs no_change ) Wilcoxon p-value: 0.602
## visit_4_energy_1_10 ( earlier vs later ) Wilcoxon p-value: 0.471
# Plot transition heatmaps for each visit and save as PNG files
plot_change_matrix <- function(data, baseline_var, followup_var, visit_label) {
transition_df <- data %>%
filter(!is.na(.data[[baseline_var]]), !is.na(.data[[followup_var]])) %>%
count(Preop = .data[[baseline_var]], Postop = .data[[followup_var]]) %>%
group_by(Preop) %>%
mutate(percentage = n / sum(n) * 100)
p <- ggplot(transition_df, aes(x = factor(Preop), y = factor(Postop), fill = n)) +
geom_tile(color = "white") +
# Custom blue gradient: light to medium/dark, so black text remains visible
scale_fill_gradient(low = "#71c4d1", high = "#08306b") +
geom_text(aes(label = paste0(n, " (", round(percentage, 1), "%)")),
color = "white", size = 5, face = "bold") +
labs(
title = paste("Activation Preference Transition (Visit 1 to", visit_label, ")"),
x = "\nPreoperative Preference",
y = paste("\nVisit", visit_label, "Preference\n"),
fill = "Count"
) +
theme_minimal(base_size = 12) +
theme(
# Bold and center the title
plot.title = element_text(face = "bold", hjust = 0.5, size = 14),
# Bold the axis titles
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
# Remove minor grid lines and vertical major lines for a cleaner look
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank()
)
return(p)
}
post_visits <- c(2, 3, 4)
for(v in post_visits){
baseline_var <- "visit_1_activation_pref_ordinal"
followup_var <- paste0("visit_", v, "_activation_pref_ordinal")
p <- plot_change_matrix(data_wide, baseline_var, followup_var, visit_label = v)
print(p)
ggsave(paste0("transition_visit1_vs_visit", v, ".tiff"), p, width = 8, height = 6, dpi = 1200, bg = "white")
}



# Save the wide-format data for record keeping
write_csv(data_wide, "/Users/armansaeedi/Desktop/ArmanSaeedi/Med School/Phase V/Post Match/VCU/Coelho/CI Activation Study/Updated DAta/wide_3_6.csv")
# Open the long-format CSV (update file path if necessary)
file.path_long <- "/Users/armansaeedi/Desktop/ArmanSaeedi/Med School/Phase V/Post Match/VCU/Coelho/CI Activation Study/Updated DAta/long_3_6.csv"
# Import the CSV file into a data frame
data_long <- read_csv(file.path_long)
# Print column names for verification
print(colnames(data_long))
## [1] "record_id" "visit"
## [3] "hld_years" "log_hld_years"
## [5] "hld_category" "age"
## [7] "sex" "bmi"
## [9] "ci_brand" "processor"
## [11] "total" "score_conversion"
## [13] "se_converted" "overall_1_10"
## [15] "pain_1_10" "dizziness_1_10"
## [17] "energy_1_10" "visit_1_activation_pref"
## [19] "visit_1_activation_pref_ordinal" "visit_1_hear_asap"
## [21] "visit_1_wait_to_recover" "visit_1_wait_to_adjust"
## [23] "visit_1_logistical_travel" "visit_1_other_yn"
## [25] "free text"
# Remove the "visit_1_" prefix from all columns that start with it
data_long <- data_long %>%
rename_with(~str_remove(., "visit_1_"), starts_with("visit_1_"))
# Create a clean factor variable for activation preference with desired ordering
data_long <- data_long %>%
mutate(activation_pref = factor(activation_pref,
levels = c("Immediately following surgery/the day of surgery",
"Within 5 days after surgery, but not on the actual day of surgery",
"1 week after surgery",
"2 weeks after surgery",
"3 weeks after surgery (standard activation time)",
"More than 3 weeks after surgery"),
labels = c("0 days", "1–5 days", "1 week", "2 weeks", "3 weeks", ">3 weeks")))
# Create the preference table by visit
pref_table <- data_long %>%
group_by(visit, activation_pref) %>%
summarise(n = n(), .groups = 'drop') %>%
filter(!is.na(activation_pref)) %>%
group_by(visit) %>%
mutate(percent = round(100 * n / sum(n), 1)) %>%
pivot_wider(names_from = visit,
values_from = c(n, percent),
names_glue = "Visit_{visit}_{.value}") %>%
arrange(factor(activation_pref, levels = c("0 days", "1–5 days", "1 week",
"2 weeks", "3 weeks", ">3 weeks")))
# Print the resulting preference table
kable(pref_table, caption = "Table 13. Activation Preference Distribution by Visit")
Table 13. Activation Preference Distribution by Visit
| 0 days |
13 |
4 |
4 |
4 |
25.5 |
12.5 |
12.1 |
8.7 |
| 1–5 days |
5 |
3 |
2 |
2 |
9.8 |
9.4 |
6.1 |
4.3 |
| 1 week |
9 |
7 |
5 |
10 |
17.6 |
21.9 |
15.2 |
21.7 |
| 2 weeks |
9 |
11 |
12 |
8 |
17.6 |
34.4 |
36.4 |
17.4 |
| 3 weeks |
12 |
6 |
8 |
15 |
23.5 |
18.8 |
24.2 |
32.6 |
| >3 weeks |
3 |
1 |
2 |
7 |
5.9 |
3.1 |
6.1 |
15.2 |
library(dplyr)
library(tidyr)
library(ggplot2)
pref_table_df <- as.data.frame(pref_table)
desired_order <- c("Preoperative\nPreference",
"Immediate Postoperative\nPreference",
"One Week Postoperative\nPreference",
"One Week Postactivation\nPreference")
pref_table_long <- pref_table_df %>%
select(activation_pref, starts_with("Visit_")) %>%
pivot_longer(
cols = -activation_pref,
names_to = c("visit", ".value"),
names_pattern = "Visit_(.*)_(.*)"
) %>%
mutate(visit = case_when(
visit == "1" ~ "Preoperative\nPreference",
visit == "2" ~ "Immediate Postoperative\nPreference",
visit == "3" ~ "One Week Postoperative\nPreference",
visit == "4" ~ "One Week Postactivation\nPreference",
TRUE ~ visit
)) %>%
mutate(visit = factor(visit, levels = desired_order))
max_percent <- max(pref_table_long$percent, na.rm = TRUE)
ggplot(pref_table_long, aes(x = activation_pref, y = percent, color = visit, group = visit)) +
geom_line(size = 1) +
geom_point(size = 2) +
labs(
x = "\nPreferred Activation Time",
y = "Frequency (%)",
title = "Preoperative Versus Postoperative Preferences"
) +
scale_y_continuous(limits = c(0, 1.25 * max_percent)) +
theme_minimal(base_size = 14) +
theme(
# Bold the title and center it
plot.title = element_text(face = "bold", hjust = 0.5, size = 16),
# Bold the axis titles
axis.title.x = element_text(face = "bold", size = 14),
axis.title.y = element_text(face = "bold", size = 14),
# Adjust legend
legend.title = element_blank(),
legend.position = "bottom",
# Optionally reduce or remove some grid lines
panel.grid.minor = element_blank(),
panel.grid.major.x = element_blank()
)
# save chart as tiff
ggsave("preop_vs_postop_preferences.tiff", width = 8, height = 6, dpi = 1200, bg = "white")
# Reshape data to long format for rationale responses
reasons_long <- data_long %>%
select(record_id, visit, hear_asap, wait_to_recover, wait_to_adjust, logistical_travel, other_yn) %>%
pivot_longer(
cols = c(hear_asap, wait_to_recover, wait_to_adjust, logistical_travel, other_yn),
names_to = "reason",
values_to = "response"
) %>%
filter(response == "Y") # Assuming responses are coded as "Y" for yes
# Summarize counts and percentages by visit and rationale
reasons_table <- reasons_long %>%
group_by(visit, reason) %>%
summarise(n = n(), .groups = 'drop') %>%
group_by(visit) %>%
mutate(percent = round(100 * n / sum(n), 1)) %>%
pivot_wider(
names_from = visit,
values_from = c(n, percent),
names_glue = "Visit_{visit}_{.value}"
) %>%
arrange(reason)
# Print the rationale table
kable(reasons_table, caption = "Table 14. Rationale Responses (Yes) by Visit")
Table 14. Rationale Responses (Yes) by Visit
| hear_asap |
23 |
15 |
16 |
23 |
38.3 |
37.5 |
37.2 |
40.4 |
| logistical_travel |
1 |
NA |
NA |
NA |
1.7 |
NA |
NA |
NA |
| other_yn |
1 |
2 |
NA |
2 |
1.7 |
5.0 |
NA |
3.5 |
| wait_to_adjust |
12 |
3 |
8 |
11 |
20.0 |
7.5 |
18.6 |
19.3 |
| wait_to_recover |
23 |
20 |
19 |
21 |
38.3 |
50.0 |
44.2 |
36.8 |
#########################################
# Rationale Analysis: Change from Visit 1 to Visit 4
#########################################
library(dplyr)
library(tidyr)
library(knitr)
# Step 1: Create a data frame with change direction.
df_rationale <- data_wide %>%
filter(!is.na(visit_1_activation_pref_ordinal), !is.na(visit_4_activation_pref_ordinal)) %>%
mutate(
diff = visit_4_activation_pref_ordinal - visit_1_activation_pref_ordinal,
change_direction = case_when(
diff > 0 ~ "later",
diff < 0 ~ "earlier",
diff == 0 ~ "no_change"
)
)
# Define rationale columns for Visit 1 and Visit 4.
rationale_vars_visit1 <- c("visit_1_hear_asap", "visit_1_wait_to_recover",
"visit_1_wait_to_adjust", "visit_1_logistical_travel",
"visit_1_other_yn")
rationale_vars_visit4 <- c("visit_4_hear_asap", "visit_4_wait_to_recover",
"visit_4_wait_to_adjust", "visit_4_logistical_travel",
"visit_4_other_yn")
# Step 2: Pivot rationale responses to long format separately.
rationale_visit1 <- df_rationale %>%
select(record_id, change_direction, all_of(rationale_vars_visit1)) %>%
pivot_longer(cols = all_of(rationale_vars_visit1),
names_to = "rationale",
values_to = "response_visit1") %>%
mutate(rationale = sub("visit_1_", "", rationale))
rationale_visit4 <- df_rationale %>%
select(record_id, change_direction, all_of(rationale_vars_visit4)) %>%
pivot_longer(cols = all_of(rationale_vars_visit4),
names_to = "rationale",
values_to = "response_visit4") %>%
mutate(rationale = sub("visit_4_", "", rationale))
# Step 3: Join the two long-format data frames.
rationale_change <- inner_join(rationale_visit1, rationale_visit4,
by = c("record_id", "rationale", "change_direction"))
# Step 4: Create Table A: Overall rationale responses.
overall_rationale <- rationale_change %>%
pivot_longer(cols = c(response_visit1, response_visit4),
names_to = "visit",
values_to = "response") %>%
mutate(visit = ifelse(visit == "response_visit1", "Visit 1", "Visit 4")) %>%
filter(response == "Y") %>% # assuming "Y" denotes a yes response
group_by(visit, rationale) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(visit) %>%
mutate(percentage = round(100 * count / sum(count), 1)) %>%
ungroup()
kable(overall_rationale, caption = "Table A. Overall Rationale Responses (Yes) at Visits 1 and 4")
Table A. Overall Rationale Responses (Yes) at Visits 1 and
4
| Visit 1 |
hear_asap |
19 |
35.8 |
| Visit 1 |
logistical_travel |
1 |
1.9 |
| Visit 1 |
other_yn |
1 |
1.9 |
| Visit 1 |
wait_to_adjust |
10 |
18.9 |
| Visit 1 |
wait_to_recover |
22 |
41.5 |
| Visit 4 |
hear_asap |
21 |
38.2 |
| Visit 4 |
other_yn |
2 |
3.6 |
| Visit 4 |
wait_to_adjust |
11 |
20.0 |
| Visit 4 |
wait_to_recover |
21 |
38.2 |
# Step 5: Create Table B: Rationale responses by change direction subgroup.
rationale_by_group <- rationale_change %>%
pivot_longer(cols = c(response_visit1, response_visit4),
names_to = "visit",
values_to = "response") %>%
mutate(visit = ifelse(visit == "response_visit1", "Visit 1", "Visit 4")) %>%
filter(response == "Y") %>%
group_by(change_direction, rationale, visit) %>%
summarise(count = n(), .groups = "drop") %>%
group_by(change_direction, rationale) %>%
mutate(percentage = round(100 * count / sum(count), 1)) %>%
ungroup()
kable(rationale_by_group, caption = "Table B. Rationale Responses (Yes) at Visits 1 and 4 by Change Direction Subgroup")
Table B. Rationale Responses (Yes) at Visits 1 and 4 by Change
Direction Subgroup
| earlier |
hear_asap |
Visit 4 |
7 |
100.0 |
| earlier |
other_yn |
Visit 1 |
1 |
100.0 |
| earlier |
wait_to_adjust |
Visit 1 |
2 |
66.7 |
| earlier |
wait_to_adjust |
Visit 4 |
1 |
33.3 |
| earlier |
wait_to_recover |
Visit 1 |
8 |
80.0 |
| earlier |
wait_to_recover |
Visit 4 |
2 |
20.0 |
| later |
hear_asap |
Visit 1 |
15 |
68.2 |
| later |
hear_asap |
Visit 4 |
7 |
31.8 |
| later |
logistical_travel |
Visit 1 |
1 |
100.0 |
| later |
other_yn |
Visit 4 |
2 |
100.0 |
| later |
wait_to_adjust |
Visit 1 |
4 |
40.0 |
| later |
wait_to_adjust |
Visit 4 |
6 |
60.0 |
| later |
wait_to_recover |
Visit 1 |
7 |
35.0 |
| later |
wait_to_recover |
Visit 4 |
13 |
65.0 |
| no_change |
hear_asap |
Visit 1 |
4 |
36.4 |
| no_change |
hear_asap |
Visit 4 |
7 |
63.6 |
| no_change |
wait_to_adjust |
Visit 1 |
4 |
50.0 |
| no_change |
wait_to_adjust |
Visit 4 |
4 |
50.0 |
| no_change |
wait_to_recover |
Visit 1 |
7 |
53.8 |
| no_change |
wait_to_recover |
Visit 4 |
6 |
46.2 |
###########################################
# Updated Subgroup-Stratified McNemar’s Test
# (Skips any rationale with non–2×2 tables)
###########################################
subgroups <- unique(df_rationale$change_direction)
rationale_names <- c("hear_asap", "wait_to_recover", "wait_to_adjust",
"logistical_travel", "other_yn")
cat("\n=== McNemar's Test for Rationale Shifts by Subgroup (Visit 1 vs. Visit 4) ===\n")
##
## === McNemar's Test for Rationale Shifts by Subgroup (Visit 1 vs. Visit 4) ===
for(grp in subgroups) {
cat("\nSubgroup:", grp, "\n")
df_grp <- df_rationale %>% filter(change_direction == grp)
for(r in rationale_names) {
v1_col <- paste0("visit_1_", r)
v4_col <- paste0("visit_4_", r)
if(!all(c(v1_col, v4_col) %in% names(df_grp))) {
cat("Columns", v1_col, "or", v4_col, "not found\n")
next
}
df_pair <- df_grp %>% filter(!is.na(.data[[v1_col]]), !is.na(.data[[v4_col]]))
contingency <- table(df_pair[[v1_col]], df_pair[[v4_col]])
if(!all(dim(contingency) == c(2,2))) {
cat("Skipping rationale", r, "in subgroup", grp, "- table is not 2×2\n")
next
}
test <- mcnemar.test(contingency)
cat("\nRationale:", r, "in subgroup:", grp, "\n")
print(contingency)
cat("McNemar's test p-value:", round(test$p.value, 3), "\n")
}
}
##
## Subgroup: later
##
## Rationale: hear_asap in subgroup: later
##
## N Y
## N 4 3
## Y 11 4
## McNemar's test p-value: 0.061
##
## Rationale: wait_to_recover in subgroup: later
##
## N Y
## N 6 9
## Y 3 4
## McNemar's test p-value: 0.149
##
## Rationale: wait_to_adjust in subgroup: later
##
## N Y
## N 14 4
## Y 2 2
## McNemar's test p-value: 0.683
## Skipping rationale logistical_travel in subgroup later - table is not 2×2
## Skipping rationale other_yn in subgroup later - table is not 2×2
##
## Subgroup: earlier
## Skipping rationale hear_asap in subgroup earlier - table is not 2×2
##
## Rationale: wait_to_recover in subgroup: earlier
##
## N Y
## N 2 0
## Y 6 2
## McNemar's test p-value: 0.041
##
## Rationale: wait_to_adjust in subgroup: earlier
##
## N Y
## N 8 0
## Y 1 1
## McNemar's test p-value: 1
## Skipping rationale logistical_travel in subgroup earlier - table is not 2×2
## Skipping rationale other_yn in subgroup earlier - table is not 2×2
##
## Subgroup: no_change
##
## Rationale: hear_asap in subgroup: no_change
##
## N Y
## N 5 4
## Y 1 3
## McNemar's test p-value: 0.371
##
## Rationale: wait_to_recover in subgroup: no_change
##
## N Y
## N 4 2
## Y 3 4
## McNemar's test p-value: 1
##
## Rationale: wait_to_adjust in subgroup: no_change
##
## N Y
## N 7 2
## Y 2 2
## McNemar's test p-value: 1
## Skipping rationale logistical_travel in subgroup no_change - table is not 2×2
## Skipping rationale other_yn in subgroup no_change - table is not 2×2
# Using the binary recoding to test direction via a one-sided binomial test
# Subset to patients with non-missing activation preference at Visits 1 and 4
subset_data <- data_wide %>%
filter(!is.na(visit_1_activation_pref_ordinal),
!is.na(visit_4_activation_pref_ordinal))
# Recode into binary categories
# "early" = levels 0, 1, 2; "standard" = levels 3, 4, 5.
subset_data <- subset_data %>%
mutate(
pref_bin_visit1 = ifelse(visit_1_activation_pref_ordinal <= 2, "early", "standard"),
pref_bin_visit4 = ifelse(visit_4_activation_pref_ordinal <= 2, "early", "standard")
)
# Create the 2x2 contingency table
pref_table_bin <- table(subset_data$pref_bin_visit1, subset_data$pref_bin_visit4)
print(pref_table_bin)
##
## early standard
## early 9 15
## standard 6 15
# Extract discordant pair counts from your 2x2 table (pref_table)
b <- pref_table_bin["early", "standard"] # switched from ≤2 weeks to ≥3 weeks
c <- pref_table_bin["standard", "early"] # switched from ≥3 weeks to ≤2 weeks
# Perform a one-sided binomial test on the discordant pairs
binom_test <- binom.test(b, b + c, p = 0.5, alternative = "greater")
print(binom_test)
##
## Exact binomial test
##
## data: b and b + c
## number of successes = 15, number of trials = 21, p-value = 0.03918
## alternative hypothesis: true probability of success is greater than 0.5
## 95 percent confidence interval:
## 0.5126112 1.0000000
## sample estimates:
## probability of success
## 0.7142857