#IMPORT DATA
og_data<- read_excel("OG_Data_KSA_Gender_CH.xlsx", 
    sheet = "DATA_CH")


#TOTAL COHORT describtive
#als erstes Summary of demographic data
demo_vars <- c(
  "Age_onset", "Disease_duration", "Birthyear", "Visit_Year", "Sex", "Height", "Age", "BMI",
  "AA1_Anxiety", "AA1_Depression", "AA1_Diabetes", "AA1_Gastrointestinal_disease", "AA1_Hyperlipidemia",
  "AA1_Hypertension", "AA1_Kidney_disease", "AA1_Kidney_disease_txt", "AA1_Liver_disease", "AA1_Liver_disease_txt",
  "AA1_Other_pain", "AA1_Other_pain_txt", "AA1_Other", "AA1_Other_txt1", "AA1_Other_txt2",
  "AA1_Respiratory_disease", "AA1_Respiratory_disease_txt", "AA1_Sleep_disorders", "AA1_Stroke",
  "AA1_Teeth_grinding", "AA1_Thyroid_diseases"
)

# Extrahieren
demo_data <- og_data[, demo_vars]

# schauen ob die Variablen, die korrekte Klassifikation haben (Continous, categrocal...)
str(demo_data)
## tibble [1,468 × 29] (S3: tbl_df/tbl/data.frame)
##  $ Age_onset                   : num [1:1468] 32 18 12 21 6 14 28 20 23 14 ...
##  $ Disease_duration            : num [1:1468] 11 30 22 24 25 54 30 29 2 21 ...
##  $ Birthyear                   : num [1:1468] 1973 1967 1982 1970 1985 ...
##  $ Visit_Year                  : num [1:1468] 2016 2015 2016 2015 2016 ...
##  $ Sex                         : num [1:1468] 2 1 2 2 2 2 2 2 2 2 ...
##  $ Height                      : num [1:1468] 172 180 175 158 167 172 159 169 175 168 ...
##  $ Age                         : num [1:1468] 43 48.5 33.2 45.7 30.3 67.6 57.5 48.7 24.3 34.4 ...
##  $ BMI                         : num [1:1468] 22.5 25.6 NA 22.9 25.1 ...
##  $ AA1_Anxiety                 : num [1:1468] 0 0 0 0 0 0 0 0 1 0 ...
##  $ AA1_Depression              : num [1:1468] 1 0 1 0 0 0 0 0 0 0 ...
##  $ AA1_Diabetes                : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AA1_Gastrointestinal_disease: num [1:1468] 0 0 0 0 0 0 0 0 1 0 ...
##  $ AA1_Hyperlipidemia          : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AA1_Hypertension            : num [1:1468] 0 0 1 0 0 0 0 0 0 0 ...
##  $ AA1_Kidney_disease          : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AA1_Kidney_disease_txt      : chr [1:1468] NA NA NA NA ...
##  $ AA1_Liver_disease           : num [1:1468] 0 0 0 0 0 0 0 0 0 1 ...
##  $ AA1_Liver_disease_txt       : chr [1:1468] NA NA NA NA ...
##  $ AA1_Other_pain              : num [1:1468] 0 0 1 0 0 0 0 0 1 1 ...
##  $ AA1_Other_pain_txt          : chr [1:1468] NA NA "Schmerzen in den Fingern und Handgelenken" NA ...
##  $ AA1_Other                   : num [1:1468] 0 1 1 0 0 1 0 0 1 1 ...
##  $ AA1_Other_txt1              : chr [1:1468] NA "Neurodermitis" "Hashimoto" NA ...
##  $ AA1_Other_txt2              : chr [1:1468] NA NA NA NA ...
##  $ AA1_Respiratory_disease     : num [1:1468] 0 1 0 0 0 0 0 0 0 0 ...
##  $ AA1_Respiratory_disease_txt : chr [1:1468] NA "Asthma" NA NA ...
##  $ AA1_Sleep_disorders         : num [1:1468] 0 1 0 0 0 0 1 0 0 0 ...
##  $ AA1_Stroke                  : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AA1_Teeth_grinding          : num [1:1468] 0 1 1 0 0 0 1 0 0 1 ...
##  $ AA1_Thyroid_diseases        : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
# Proper conversion: only once, and only after verifying the values
demo_data$Sex <- factor(demo_data$Sex,
                      levels = c(1, 2),
                      labels = c("Male", "Female"))
og_data$Sex <- factor(og_data$Sex,
                      levels = c(1, 2),
                      labels = c("Male", "Female"))

binary_vars <- c(
  "AA1_Anxiety", "AA1_Depression", "AA1_Diabetes", "AA1_Gastrointestinal_disease",
  "AA1_Hyperlipidemia", "AA1_Hypertension", "AA1_Kidney_disease",
  "AA1_Liver_disease", "AA1_Other_pain", "AA1_Other",
  "AA1_Respiratory_disease", "AA1_Sleep_disorders", "AA1_Stroke",
  "AA1_Teeth_grinding", "AA1_Thyroid_diseases"
)

# Convert all these columns to factors with labels
demo_data[binary_vars] <- lapply(demo_data[binary_vars], function(x) {
  factor(x, levels = c(0, 1), labels = c("No", "Yes"))
})

demo_data[binary_vars] <- lapply(og_data[binary_vars], function(x) {
  factor(x, levels = c(0, 1), labels = c("No", "Yes"))
})

# schauen ob die Variablen, die korrekte Klassifikation haben (Continous, categrocal...)
str(demo_data)
## tibble [1,468 × 29] (S3: tbl_df/tbl/data.frame)
##  $ Age_onset                   : num [1:1468] 32 18 12 21 6 14 28 20 23 14 ...
##  $ Disease_duration            : num [1:1468] 11 30 22 24 25 54 30 29 2 21 ...
##  $ Birthyear                   : num [1:1468] 1973 1967 1982 1970 1985 ...
##  $ Visit_Year                  : num [1:1468] 2016 2015 2016 2015 2016 ...
##  $ Sex                         : Factor w/ 2 levels "Male","Female": 2 1 2 2 2 2 2 2 2 2 ...
##  $ Height                      : num [1:1468] 172 180 175 158 167 172 159 169 175 168 ...
##  $ Age                         : num [1:1468] 43 48.5 33.2 45.7 30.3 67.6 57.5 48.7 24.3 34.4 ...
##  $ BMI                         : num [1:1468] 22.5 25.6 NA 22.9 25.1 ...
##  $ AA1_Anxiety                 : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 2 1 ...
##  $ AA1_Depression              : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 1 1 1 1 1 ...
##  $ AA1_Diabetes                : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ AA1_Gastrointestinal_disease: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 2 1 ...
##  $ AA1_Hyperlipidemia          : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ AA1_Hypertension            : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
##  $ AA1_Kidney_disease          : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ AA1_Kidney_disease_txt      : chr [1:1468] NA NA NA NA ...
##  $ AA1_Liver_disease           : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 2 ...
##  $ AA1_Liver_disease_txt       : chr [1:1468] NA NA NA NA ...
##  $ AA1_Other_pain              : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 2 2 ...
##  $ AA1_Other_pain_txt          : chr [1:1468] NA NA "Schmerzen in den Fingern und Handgelenken" NA ...
##  $ AA1_Other                   : Factor w/ 2 levels "No","Yes": 1 2 2 1 1 2 1 1 2 2 ...
##  $ AA1_Other_txt1              : chr [1:1468] NA "Neurodermitis" "Hashimoto" NA ...
##  $ AA1_Other_txt2              : chr [1:1468] NA NA NA NA ...
##  $ AA1_Respiratory_disease     : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
##  $ AA1_Respiratory_disease_txt : chr [1:1468] NA "Asthma" NA NA ...
##  $ AA1_Sleep_disorders         : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 2 1 1 1 ...
##  $ AA1_Stroke                  : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ AA1_Teeth_grinding          : Factor w/ 2 levels "No","Yes": 1 2 2 1 1 1 2 1 1 2 ...
##  $ AA1_Thyroid_diseases        : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
# generiere automatisches Summary 
summary(demo_data)
##    Age_onset     Disease_duration   Birthyear      Visit_Year       Sex      
##  Min.   : 2.00   Min.   : 0.00    Min.   :1930   Min.   :2015   Male  : 374  
##  1st Qu.:14.00   1st Qu.: 5.00    1st Qu.:1965   1st Qu.:2017   Female:1094  
##  Median :20.00   Median :16.00    Median :1976   Median :2019                
##  Mean   :24.72   Mean   :18.75    Mean   :1975   Mean   :2019                
##  3rd Qu.:33.00   3rd Qu.:29.00    3rd Qu.:1987   3rd Qu.:2021                
##  Max.   :85.00   Max.   :70.00    Max.   :2004   Max.   :2022                
##  NA's   :11      NA's   :11                                                  
##      Height           Age             BMI        AA1_Anxiety AA1_Depression
##  Min.   :120.0   Min.   :15.70   Min.   :15.32   No :1207    No :1142      
##  1st Qu.:164.0   1st Qu.:31.27   1st Qu.:21.37   Yes: 261    Yes: 326      
##  Median :169.0   Median :43.20   Median :24.09                             
##  Mean   :169.6   Mean   :43.41   Mean   :25.07                             
##  3rd Qu.:175.0   3rd Qu.:53.40   3rd Qu.:27.59                             
##  Max.   :198.0   Max.   :89.50   Max.   :62.11                             
##  NA's   :39                      NA's   :112                               
##  AA1_Diabetes AA1_Gastrointestinal_disease AA1_Hyperlipidemia AA1_Hypertension
##  No :1423     No :1294                     No :1354           No :1241        
##  Yes:  45     Yes: 174                     Yes: 114           Yes: 227        
##                                                                               
##                                                                               
##                                                                               
##                                                                               
##                                                                               
##  AA1_Kidney_disease AA1_Kidney_disease_txt AA1_Liver_disease
##  No :1394           Length:1468            No :1416         
##  Yes:  74           Class :character       Yes:  52         
##                     Mode  :character                        
##                                                             
##                                                             
##                                                             
##                                                             
##  AA1_Liver_disease_txt AA1_Other_pain AA1_Other_pain_txt AA1_Other 
##  Length:1468           No :1187       Length:1468        No :1023  
##  Class :character      Yes: 281       Class :character   Yes: 445  
##  Mode  :character                     Mode  :character             
##                                                                    
##                                                                    
##                                                                    
##                                                                    
##  AA1_Other_txt1     AA1_Other_txt2     AA1_Respiratory_disease
##  Length:1468        Length:1468        No :1291               
##  Class :character   Class :character   Yes: 177               
##  Mode  :character   Mode  :character                          
##                                                               
##                                                               
##                                                               
##                                                               
##  AA1_Respiratory_disease_txt AA1_Sleep_disorders AA1_Stroke AA1_Teeth_grinding
##  Length:1468                 No :935             No :1435   No :1040          
##  Class :character            Yes:533             Yes:  33   Yes: 428          
##  Mode  :character                                                             
##                                                                               
##                                                                               
##                                                                               
##                                                                               
##  AA1_Thyroid_diseases
##  No :1209            
##  Yes: 259            
##                      
##                      
##                      
##                      
## 
summary_table_demo <- demo_data %>%
  select(
    `Age at Onset` = Age_onset,
    `Disease Duration` = Disease_duration,
    Sex,
    Age,
    BMI,
    `Anxiety` = AA1_Anxiety,
    `Depression` = AA1_Depression,
    `Diabetes` = AA1_Diabetes,
    `GI Disease` = AA1_Gastrointestinal_disease,
    `Hyperlipidemia` = AA1_Hyperlipidemia,
    `Hypertension` = AA1_Hypertension,
    `Kidney Disease` = AA1_Kidney_disease,
    `Liver Disease` = AA1_Liver_disease,
    `Other Pain` = AA1_Other_pain,
    `Other Comorbidity` = AA1_Other,
    `Respiratory Disease` = AA1_Respiratory_disease,
    `Sleep Disorders` = AA1_Sleep_disorders,
    `Stroke` = AA1_Stroke,
    `Teeth Grinding` = AA1_Teeth_grinding,
    `Thyroid Disease` = AA1_Thyroid_diseases
  ) %>%
  tbl_summary(
    statistic = list(
      all_continuous() ~ "{median} ({p25},{p75})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    missing = "no"
  ) %>%
  modify_header(label = "**Variable**")


summary_table_demo
Variable N = 1,4681
Age at Onset 20 (14,33)
Disease Duration 16 (5,29)
Sex
    Male 374 (25%)
    Female 1,094 (75%)
Age 43 (31,53)
BMI 24.1 (21.4,27.6)
Anxiety 261 (18%)
Depression 326 (22%)
Diabetes 45 (3.1%)
GI Disease 174 (12%)
Hyperlipidemia 114 (7.8%)
Hypertension 227 (15%)
Kidney Disease 74 (5.0%)
Liver Disease 52 (3.5%)
Other Pain 281 (19%)
Other Comorbidity 445 (30%)
Respiratory Disease 177 (12%)
Sleep Disorders 533 (36%)
Stroke 33 (2.2%)
Teeth Grinding 428 (29%)
Thyroid Disease 259 (18%)
1 Median (Q1,Q3); n (%)
summary_table_demo %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_table_demo.docx")

summary_table_demo_sex <- demo_data %>%
  select(
    `Age at Onset` = Age_onset,
    `Disease Duration` = Disease_duration,
    Sex,
    Age,
    BMI,
    `Anxiety` = AA1_Anxiety,
    `Depression` = AA1_Depression,
    `Diabetes` = AA1_Diabetes,
    `GI Disease` = AA1_Gastrointestinal_disease,
    `Hyperlipidemia` = AA1_Hyperlipidemia,
    `Hypertension` = AA1_Hypertension,
    `Kidney Disease` = AA1_Kidney_disease,
    `Liver Disease` = AA1_Liver_disease,
    `Other Pain` = AA1_Other_pain,
    `Other Comorbidity` = AA1_Other,
    `Respiratory Disease` = AA1_Respiratory_disease,
    `Sleep Disorders` = AA1_Sleep_disorders,
    `Stroke` = AA1_Stroke,
    `Teeth Grinding` = AA1_Teeth_grinding,
    `Thyroid Disease` = AA1_Thyroid_diseases
  ) %>%
  tbl_summary(
    by = Sex,
    statistic = list(
      all_continuous() ~ "{median} ({p25}, {p75})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    missing = "no"
  ) %>%
  modify_header(label = "**Variable**") %>%
  add_p()  # Optional: add p-values for group comparisons

summary_table_demo_sex
Variable Male
N = 374
1
Female
N = 1,094
1
p-value2
Age at Onset 27 (14, 41) 19 (14, 30) <0.001
Disease Duration 10 (3, 23) 19 (8, 30) <0.001
Age 43 (31, 55) 43 (31, 53) 0.3
BMI 25.2 (23.1, 28.7) 23.4 (20.9, 27.2) <0.001
Anxiety 54 (14%) 207 (19%) 0.050
Depression 79 (21%) 247 (23%) 0.6
Diabetes 17 (4.5%) 28 (2.6%) 0.054
GI Disease 29 (7.8%) 145 (13%) 0.005
Hyperlipidemia 26 (7.0%) 88 (8.0%) 0.5
Hypertension 82 (22%) 145 (13%) <0.001
Kidney Disease 17 (4.5%) 57 (5.2%) 0.6
Liver Disease 14 (3.7%) 38 (3.5%) 0.8
Other Pain 52 (14%) 229 (21%) 0.003
Other Comorbidity 113 (30%) 332 (30%) >0.9
Respiratory Disease 45 (12%) 132 (12%) >0.9
Sleep Disorders 130 (35%) 403 (37%) 0.5
Stroke 10 (2.7%) 23 (2.1%) 0.5
Teeth Grinding 84 (22%) 344 (31%) <0.001
Thyroid Disease 13 (3.5%) 246 (22%) <0.001
1 Median (Q1, Q3); n (%)
2 Wilcoxon rank sum test; Pearson’s Chi-squared test
summary_table_demo_sex %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_table_demo_sex.docx")

summary_migraine_only <- og_data %>%
  filter(HS_M == "Yes") %>%
  select(
    `Age at Onset` = Age_onset,
    `Disease Duration` = Disease_duration,
    Sex,
    Age,
    BMI,
    `Anxiety` = AA1_Anxiety,
    `Depression` = AA1_Depression,
    `Diabetes` = AA1_Diabetes,
    `GI Disease` = AA1_Gastrointestinal_disease,
    `Hyperlipidemia` = AA1_Hyperlipidemia,
    `Hypertension` = AA1_Hypertension,
    `Kidney Disease` = AA1_Kidney_disease,
    `Liver Disease` = AA1_Liver_disease,
    `Other Pain` = AA1_Other_pain,
    `Other Comorbidity` = AA1_Other,
    `Respiratory Disease` = AA1_Respiratory_disease,
    `Sleep Disorders` = AA1_Sleep_disorders,
    `Stroke` = AA1_Stroke,
    `Teeth Grinding` = AA1_Teeth_grinding,
    `Thyroid Disease` = AA1_Thyroid_diseases
  ) %>%
  tbl_summary(
    by = Sex,
    statistic = list(
      all_continuous() ~ "{median} ({p25}, {p75})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    missing = "no"
  ) %>%
  modify_header(label = "**Variable**") %>%
  add_p()  # Optional: add p-values for group comparisons
## Error in `modify_header()`:
## ! Expecting `data` argument to have at least 1 row and 1 column.
summary_migraine_only
## Error: object 'summary_migraine_only' not found
summary_migraine_only %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_migraine_only.docx")
## Error: object 'summary_migraine_only' not found
summary_cluster_only <- og_data %>%
  filter(HS_C == "Yes") %>%
  select(
    `Age at Onset` = Age_onset,
    `Disease Duration` = Disease_duration,
    Sex,
    Age,
    BMI,
    `Anxiety` = AA1_Anxiety,
    `Depression` = AA1_Depression,
    `Diabetes` = AA1_Diabetes,
    `GI Disease` = AA1_Gastrointestinal_disease,
    `Hyperlipidemia` = AA1_Hyperlipidemia,
    `Hypertension` = AA1_Hypertension,
    `Kidney Disease` = AA1_Kidney_disease,
    `Liver Disease` = AA1_Liver_disease,
    `Other Pain` = AA1_Other_pain,
    `Other Comorbidity` = AA1_Other,
    `Respiratory Disease` = AA1_Respiratory_disease,
    `Sleep Disorders` = AA1_Sleep_disorders,
    `Stroke` = AA1_Stroke,
    `Teeth Grinding` = AA1_Teeth_grinding,
    `Thyroid Disease` = AA1_Thyroid_diseases
  ) %>%
  tbl_summary(
    by = Sex,
    statistic = list(
      all_continuous() ~ "{median} ({p25}, {p75})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    missing = "no"
  ) %>%
  modify_header(label = "**Variable**") %>%
  add_p()
## Error in `modify_header()`:
## ! Expecting `data` argument to have at least 1 row and 1 column.
# Save to Word document
summary_cluster_only %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_cluster_only.docx")
## Error: object 'summary_cluster_only' not found
summary_tension_only <- og_data %>%
  filter(HS_T == "Yes") %>%
  select(
    `Age at Onset` = Age_onset,
    `Disease Duration` = Disease_duration,
    Sex,
    Age,
    BMI,
    `Anxiety` = AA1_Anxiety,
    `Depression` = AA1_Depression,
    `Diabetes` = AA1_Diabetes,
    `GI Disease` = AA1_Gastrointestinal_disease,
    `Hyperlipidemia` = AA1_Hyperlipidemia,
    `Hypertension` = AA1_Hypertension,
    `Kidney Disease` = AA1_Kidney_disease,
    `Liver Disease` = AA1_Liver_disease,
    `Other Pain` = AA1_Other_pain,
    `Other Comorbidity` = AA1_Other,
    `Respiratory Disease` = AA1_Respiratory_disease,
    `Sleep Disorders` = AA1_Sleep_disorders,
    `Stroke` = AA1_Stroke,
    `Teeth Grinding` = AA1_Teeth_grinding,
    `Thyroid Disease` = AA1_Thyroid_diseases
  ) %>%
  tbl_summary(
    by = Sex,
    statistic = list(
      all_continuous() ~ "{median} ({p25}, {p75})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    missing = "no"
  ) %>%
  modify_header(label = "**Variable**") %>%
  add_p()
## Error in `modify_header()`:
## ! Expecting `data` argument to have at least 1 row and 1 column.
# Save to Word document
summary_tension_only %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_tension_only.docx")
## Error: object 'summary_tension_only' not found

Summary of Diagnostic recommendation

# recommended Diagnostics muss erstmal als korrekte Variable gelabelt werden
diag_vars <- c("DIAR_YN", "DIAR_CT", "DIAR_MRI", "DIAR_EEG", 
               "DIAR_ECG", "DIAR_DOPPLER", "DIAR_Lumbar_puncture")

# Convert to factors with labels
og_data[diag_vars] <- lapply(og_data[diag_vars], function(x) {
  factor(x, levels = c(0, 1, 9), labels = c("No", "Yes", "Not applicable"))
})

summary_reco_diagnostics <- og_data %>%
  select(Sex, all_of(diag_vars)) %>%
  tbl_summary(
    by = Sex,
    statistic = all_categorical() ~ "{n} ({p}%)",
    missing = "no",
    label = list(
      DIAR_YN               = "Any Diagnostics Recommended",
      DIAR_CT               = "CT Scan",
      DIAR_MRI              = "MRI",
      DIAR_EEG              = "EEG",
      DIAR_ECG              = "ECG",
      DIAR_DOPPLER          = "Doppler Ultrasound",
      DIAR_Lumbar_puncture  = "Lumbar Puncture"
    )
  ) %>%
  modify_header(label = "**Diagnostic Recommendation**") %>%
  add_p()


summary_reco_diagnostics
Diagnostic Recommendation Male
N = 374
1
Female
N = 1,094
1
p-value2
Any Diagnostics Recommended

0.009
    No 293 (78%) 923 (84%)
    Yes 81 (22%) 171 (16%)
    Not applicable 0 (0%) 0 (0%)
CT Scan

0.028
    No 78 (21%) 165 (15%)
    Yes 2 (0.5%) 6 (0.5%)
    Not applicable 294 (79%) 923 (84%)
MRI

0.014
    No 35 (9.4%) 60 (5.5%)
    Yes 45 (12%) 111 (10%)
    Not applicable 294 (79%) 923 (84%)
EEG

0.021
    No 79 (21%) 165 (15%)
    Yes 1 (0.3%) 6 (0.5%)
    Not applicable 294 (79%) 923 (84%)
ECG

0.009
    No 75 (20%) 167 (15%)
    Yes 5 (1.3%) 4 (0.4%)
    Not applicable 294 (79%) 923 (84%)
Doppler Ultrasound

0.024
    No 77 (21%) 167 (15%)
    Yes 3 (0.8%) 4 (0.4%)
    Not applicable 294 (79%) 923 (84%)
Lumbar Puncture

0.031
    No 71 (19%) 156 (14%)
    Yes 9 (2.4%) 15 (1.4%)
    Not applicable 294 (79%) 923 (84%)
1 n (%)
2 Fisher’s exact test; Pearson’s Chi-squared test
summary_reco_diagnostics %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_reco_diagnostics.docx")


# Step 1: Pivot longer
diag_long <- og_data %>%
  select(Sex, all_of(diag_vars)) %>%
  pivot_longer(
    cols = -Sex,
    names_to = "Diagnosis",
    values_to = "Value"
  ) %>%
  filter(Value == "Yes")  # Only plot actual recommendations

# Join label names to your long data
diag_long <- diag_long %>%
  left_join(diagnosis_labels, by = "Diagnosis")
## Error: object 'diagnosis_labels' not found
diagnosis_labels <- tibble::tibble(
  Diagnosis = c("DIAR_YN", "DIAR_CT", "DIAR_MRI", "DIAR_EEG", 
                "DIAR_ECG", "DIAR_DOPPLER", "DIAR_Lumbar_puncture"),
  Diagnosis_Label = c("Any Diagnostics", "CT Scan", "MRI", "EEG", 
                      "ECG", "Doppler Ultrasound", "Lumbar Puncture")
)

ggplot(diag_long, aes(x = Diagnosis_Label, fill = Sex)) +
  geom_bar(position = "dodge") +
  labs(
    title = "Diagnostic Recommendations by Sex",
    x = "Diagnostic Test",
    y = "Number of Recommendations",
    fill = "Sex"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )
## Error in `geom_bar()`:
## ! Problem while computing aesthetics.
## ℹ Error occurred in the 1st layer.
## Caused by error:
## ! object 'Diagnosis_Label' not found
# 1. Count total number of individuals per sex
sex_totals <- og_data %>%
  count(Sex, name = "total_n")

# 2. Pivot diagnostics to long and keep only "Yes"
diag_long <- og_data %>%
  select(Sex, all_of(diag_vars)) %>%
  pivot_longer(
    cols = -Sex,
    names_to = "Diagnosis",
    values_to = "Value"
  ) %>%
  filter(Value == "Yes")

# 3. Add readable labels
diagnosis_labels <- tibble::tibble(
  Diagnosis = c("DIAR_YN", "DIAR_CT", "DIAR_MRI", "DIAR_EEG", 
                "DIAR_ECG", "DIAR_DOPPLER", "DIAR_Lumbar_puncture"),
  Diagnosis_Label = c("Any Diagnostics", "CT Scan", "MRI", "EEG", 
                      "ECG", "Doppler Ultrasound", "Lumbar Puncture")
)

diag_long <- diag_long %>%
  left_join(diagnosis_labels, by = "Diagnosis")

# 4. Count diagnosis occurrences and calculate percentages
diag_prop <- diag_long %>%
  count(Sex, Diagnosis_Label, name = "diagnosis_n") %>%
  left_join(sex_totals, by = "Sex") %>%
  mutate(percent = diagnosis_n / total_n * 100)

# 5. Plot percentages
ggplot(diag_prop, aes(x = Diagnosis_Label, y = percent, fill = Sex)) +
  geom_col(position = "dodge") +
  labs(
    title = "Diagnostic Recommendations by Sex (Relative to Group Size)",
    x = "Diagnostic Test",
    y = "Percentage of Patients",
    fill = "Sex"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

ggsave("diagnostic_recommendations_by_sex.png", width = 10, height = 6, dpi = 300)

dev.off()
## null device 
##           1

Summary of diagnosis

# Define diagnosis variables
hs_diag_vars <- c("HS_M", "HS_T", "HS_C", "HS_O", "HS_N")

# Convert to factors with Yes/No labels
og_data[hs_diag_vars] <- lapply(og_data[hs_diag_vars], function(x) {
  factor(x, levels = c(0, 1), labels = c("No", "Yes"))
})

summary_hs_diagnose <- og_data %>%
  select(Sex, all_of(hs_diag_vars)) %>%
  tbl_summary(
    by = Sex,
    statistic = all_categorical() ~ "{n} ({p}%)",
    missing = "no",
    label = list(
      HS_M ~ "Migraine",
      HS_T ~ "Tension-type Headache",
      HS_C ~ "Cluster Headache",
      HS_O ~ "Other Headache",
      HS_N ~ "No Diagnosis"
    )
  ) %>%
  modify_header(label = "**Specialist Diagnosis**") %>%
  add_p()
summary_hs_diagnose
Specialist Diagnosis Male
N = 374
1
Female
N = 1,094
1
p-value2
Migraine 210 (56%) 958 (88%) <0.001
Tension-type Headache 93 (25%) 134 (12%) <0.001
Cluster Headache 44 (12%) 18 (1.6%) <0.001
Other Headache 85 (23%) 182 (17%) 0.008
No Diagnosis 4 (1.1%) 4 (0.4%) 0.12
1 n (%)
2 Pearson’s Chi-squared test; Fisher’s exact test
summary_reco_diagnostics %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_hs_diagnose.docx" )

#PIE CHART

hs_long <- og_data %>%
  select(Sex, HS_M, HS_T, HS_C, HS_O, HS_N) %>%
  pivot_longer(
    cols      = starts_with("HS_"),
    names_to  = "Diagnosis",
    values_to = "Value"
  ) %>%
  filter(Value == "Yes")  # Only count confirmed diagnoses

# Step 2: Compute relative frequencies within each sex
hs_plot_data <- hs_long %>%
  count(Sex, Diagnosis) %>%
  group_by(Sex) %>%
  mutate(Percent = n / sum(n) * 100)

# Step 3: Plot normalized pie chart
ggplot(hs_plot_data, aes(x = "", y = Percent, fill = Diagnosis)) +
  geom_bar(stat = "identity", width = 1, color = "white") +
  coord_polar("y") +
  facet_wrap(~ Sex) +
  labs(
    title = "Distribution of Specialist Diagnoses (Within Each Sex)",
    fill  = "Diagnosis"
  ) +
  theme_void() +
  theme(
    strip.text       = element_text(face = "bold", size = 12),
    plot.title       = element_text(hjust = 0.5),
    legend.position  = "right"
  )

ggsave("diagnosis_by_sex_pie.png", width = 8, height = 6, dpi = 300)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.