KSA_Gender

#IMPORT DATA
og_data<- read_excel("OG_Data_KSA_Gender_CH.xlsx", 
    sheet = "DATA_CH")


#TOTAL COHORT describtive
#als erstes Summary of demographic data
demo_vars <- c(
  "Age_onset", "Disease_duration", "Birthyear", "Visit_Year", "Sex", "Height", "Age", "BMI",
  "AA1_Anxiety", "AA1_Depression", "AA1_Diabetes", "AA1_Gastrointestinal_disease", "AA1_Hyperlipidemia",
  "AA1_Hypertension", "AA1_Kidney_disease", "AA1_Kidney_disease_txt", "AA1_Liver_disease", "AA1_Liver_disease_txt",
  "AA1_Other_pain", "AA1_Other_pain_txt", "AA1_Other", "AA1_Other_txt1", "AA1_Other_txt2",
  "AA1_Respiratory_disease", "AA1_Respiratory_disease_txt", "AA1_Sleep_disorders", "AA1_Stroke",
  "AA1_Teeth_grinding", "AA1_Thyroid_diseases"
)

# Extrahieren
demo_data <- og_data[, demo_vars]

# schauen ob die Variablen, die korrekte Klassifikation haben (Continous, categrocal...)
str(demo_data)

## tibble [1,468 × 29] (S3: tbl_df/tbl/data.frame)
##  $ Age_onset                   : num [1:1468] 32 18 12 21 6 14 28 20 23 14 ...
##  $ Disease_duration            : num [1:1468] 11 30 22 24 25 54 30 29 2 21 ...
##  $ Birthyear                   : num [1:1468] 1973 1967 1982 1970 1985 ...
##  $ Visit_Year                  : num [1:1468] 2016 2015 2016 2015 2016 ...
##  $ Sex                         : num [1:1468] 2 1 2 2 2 2 2 2 2 2 ...
##  $ Height                      : num [1:1468] 172 180 175 158 167 172 159 169 175 168 ...
##  $ Age                         : num [1:1468] 43 48.5 33.2 45.7 30.3 67.6 57.5 48.7 24.3 34.4 ...
##  $ BMI                         : num [1:1468] 22.5 25.6 NA 22.9 25.1 ...
##  $ AA1_Anxiety                 : num [1:1468] 0 0 0 0 0 0 0 0 1 0 ...
##  $ AA1_Depression              : num [1:1468] 1 0 1 0 0 0 0 0 0 0 ...
##  $ AA1_Diabetes                : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AA1_Gastrointestinal_disease: num [1:1468] 0 0 0 0 0 0 0 0 1 0 ...
##  $ AA1_Hyperlipidemia          : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AA1_Hypertension            : num [1:1468] 0 0 1 0 0 0 0 0 0 0 ...
##  $ AA1_Kidney_disease          : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AA1_Kidney_disease_txt      : chr [1:1468] NA NA NA NA ...
##  $ AA1_Liver_disease           : num [1:1468] 0 0 0 0 0 0 0 0 0 1 ...
##  $ AA1_Liver_disease_txt       : chr [1:1468] NA NA NA NA ...
##  $ AA1_Other_pain              : num [1:1468] 0 0 1 0 0 0 0 0 1 1 ...
##  $ AA1_Other_pain_txt          : chr [1:1468] NA NA "Schmerzen in den Fingern und Handgelenken" NA ...
##  $ AA1_Other                   : num [1:1468] 0 1 1 0 0 1 0 0 1 1 ...
##  $ AA1_Other_txt1              : chr [1:1468] NA "Neurodermitis" "Hashimoto" NA ...
##  $ AA1_Other_txt2              : chr [1:1468] NA NA NA NA ...
##  $ AA1_Respiratory_disease     : num [1:1468] 0 1 0 0 0 0 0 0 0 0 ...
##  $ AA1_Respiratory_disease_txt : chr [1:1468] NA "Asthma" NA NA ...
##  $ AA1_Sleep_disorders         : num [1:1468] 0 1 0 0 0 0 1 0 0 0 ...
##  $ AA1_Stroke                  : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
##  $ AA1_Teeth_grinding          : num [1:1468] 0 1 1 0 0 0 1 0 0 1 ...
##  $ AA1_Thyroid_diseases        : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...

# Proper conversion: only once, and only after verifying the values
demo_data$Sex <- factor(demo_data$Sex,
                      levels = c(1, 2),
                      labels = c("Male", "Female"))
og_data$Sex <- factor(og_data$Sex,
                      levels = c(1, 2),
                      labels = c("Male", "Female"))

binary_vars <- c(
  "AA1_Anxiety", "AA1_Depression", "AA1_Diabetes", "AA1_Gastrointestinal_disease",
  "AA1_Hyperlipidemia", "AA1_Hypertension", "AA1_Kidney_disease",
  "AA1_Liver_disease", "AA1_Other_pain", "AA1_Other",
  "AA1_Respiratory_disease", "AA1_Sleep_disorders", "AA1_Stroke",
  "AA1_Teeth_grinding", "AA1_Thyroid_diseases"
)

# Convert all these columns to factors with labels
demo_data[binary_vars] <- lapply(demo_data[binary_vars], function(x) {
  factor(x, levels = c(0, 1), labels = c("No", "Yes"))
})

demo_data[binary_vars] <- lapply(og_data[binary_vars], function(x) {
  factor(x, levels = c(0, 1), labels = c("No", "Yes"))
})

# schauen ob die Variablen, die korrekte Klassifikation haben (Continous, categrocal...)
str(demo_data)

## tibble [1,468 × 29] (S3: tbl_df/tbl/data.frame)
##  $ Age_onset                   : num [1:1468] 32 18 12 21 6 14 28 20 23 14 ...
##  $ Disease_duration            : num [1:1468] 11 30 22 24 25 54 30 29 2 21 ...
##  $ Birthyear                   : num [1:1468] 1973 1967 1982 1970 1985 ...
##  $ Visit_Year                  : num [1:1468] 2016 2015 2016 2015 2016 ...
##  $ Sex                         : Factor w/ 2 levels "Male","Female": 2 1 2 2 2 2 2 2 2 2 ...
##  $ Height                      : num [1:1468] 172 180 175 158 167 172 159 169 175 168 ...
##  $ Age                         : num [1:1468] 43 48.5 33.2 45.7 30.3 67.6 57.5 48.7 24.3 34.4 ...
##  $ BMI                         : num [1:1468] 22.5 25.6 NA 22.9 25.1 ...
##  $ AA1_Anxiety                 : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 2 1 ...
##  $ AA1_Depression              : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 1 1 1 1 1 ...
##  $ AA1_Diabetes                : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ AA1_Gastrointestinal_disease: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 2 1 ...
##  $ AA1_Hyperlipidemia          : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ AA1_Hypertension            : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
##  $ AA1_Kidney_disease          : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ AA1_Kidney_disease_txt      : chr [1:1468] NA NA NA NA ...
##  $ AA1_Liver_disease           : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 2 ...
##  $ AA1_Liver_disease_txt       : chr [1:1468] NA NA NA NA ...
##  $ AA1_Other_pain              : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 2 2 ...
##  $ AA1_Other_pain_txt          : chr [1:1468] NA NA "Schmerzen in den Fingern und Handgelenken" NA ...
##  $ AA1_Other                   : Factor w/ 2 levels "No","Yes": 1 2 2 1 1 2 1 1 2 2 ...
##  $ AA1_Other_txt1              : chr [1:1468] NA "Neurodermitis" "Hashimoto" NA ...
##  $ AA1_Other_txt2              : chr [1:1468] NA NA NA NA ...
##  $ AA1_Respiratory_disease     : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
##  $ AA1_Respiratory_disease_txt : chr [1:1468] NA "Asthma" NA NA ...
##  $ AA1_Sleep_disorders         : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 2 1 1 1 ...
##  $ AA1_Stroke                  : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ AA1_Teeth_grinding          : Factor w/ 2 levels "No","Yes": 1 2 2 1 1 1 2 1 1 2 ...
##  $ AA1_Thyroid_diseases        : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...

# generiere automatisches Summary 
summary(demo_data)

##    Age_onset     Disease_duration   Birthyear      Visit_Year       Sex      
##  Min.   : 2.00   Min.   : 0.00    Min.   :1930   Min.   :2015   Male  : 374  
##  1st Qu.:14.00   1st Qu.: 5.00    1st Qu.:1965   1st Qu.:2017   Female:1094  
##  Median :20.00   Median :16.00    Median :1976   Median :2019                
##  Mean   :24.72   Mean   :18.75    Mean   :1975   Mean   :2019                
##  3rd Qu.:33.00   3rd Qu.:29.00    3rd Qu.:1987   3rd Qu.:2021                
##  Max.   :85.00   Max.   :70.00    Max.   :2004   Max.   :2022                
##  NA's   :11      NA's   :11                                                  
##      Height           Age             BMI        AA1_Anxiety AA1_Depression
##  Min.   :120.0   Min.   :15.70   Min.   :15.32   No :1207    No :1142      
##  1st Qu.:164.0   1st Qu.:31.27   1st Qu.:21.37   Yes: 261    Yes: 326      
##  Median :169.0   Median :43.20   Median :24.09                             
##  Mean   :169.6   Mean   :43.41   Mean   :25.07                             
##  3rd Qu.:175.0   3rd Qu.:53.40   3rd Qu.:27.59                             
##  Max.   :198.0   Max.   :89.50   Max.   :62.11                             
##  NA's   :39                      NA's   :112                               
##  AA1_Diabetes AA1_Gastrointestinal_disease AA1_Hyperlipidemia AA1_Hypertension
##  No :1423     No :1294                     No :1354           No :1241        
##  Yes:  45     Yes: 174                     Yes: 114           Yes: 227        
##                                                                               
##                                                                               
##                                                                               
##                                                                               
##                                                                               
##  AA1_Kidney_disease AA1_Kidney_disease_txt AA1_Liver_disease
##  No :1394           Length:1468            No :1416         
##  Yes:  74           Class :character       Yes:  52         
##                     Mode  :character                        
##                                                             
##                                                             
##                                                             
##                                                             
##  AA1_Liver_disease_txt AA1_Other_pain AA1_Other_pain_txt AA1_Other 
##  Length:1468           No :1187       Length:1468        No :1023  
##  Class :character      Yes: 281       Class :character   Yes: 445  
##  Mode  :character                     Mode  :character             
##                                                                    
##                                                                    
##                                                                    
##                                                                    
##  AA1_Other_txt1     AA1_Other_txt2     AA1_Respiratory_disease
##  Length:1468        Length:1468        No :1291               
##  Class :character   Class :character   Yes: 177               
##  Mode  :character   Mode  :character                          
##                                                               
##                                                               
##                                                               
##                                                               
##  AA1_Respiratory_disease_txt AA1_Sleep_disorders AA1_Stroke AA1_Teeth_grinding
##  Length:1468                 No :935             No :1435   No :1040          
##  Class :character            Yes:533             Yes:  33   Yes: 428          
##  Mode  :character                                                             
##                                                                               
##                                                                               
##                                                                               
##                                                                               
##  AA1_Thyroid_diseases
##  No :1209            
##  Yes: 259            
##                      
##                      
##                      
##                      
##

summary_table_demo <- demo_data %>%
  select(
    `Age at Onset` = Age_onset,
    `Disease Duration` = Disease_duration,
    Sex,
    Age,
    BMI,
    `Anxiety` = AA1_Anxiety,
    `Depression` = AA1_Depression,
    `Diabetes` = AA1_Diabetes,
    `GI Disease` = AA1_Gastrointestinal_disease,
    `Hyperlipidemia` = AA1_Hyperlipidemia,
    `Hypertension` = AA1_Hypertension,
    `Kidney Disease` = AA1_Kidney_disease,
    `Liver Disease` = AA1_Liver_disease,
    `Other Pain` = AA1_Other_pain,
    `Other Comorbidity` = AA1_Other,
    `Respiratory Disease` = AA1_Respiratory_disease,
    `Sleep Disorders` = AA1_Sleep_disorders,
    `Stroke` = AA1_Stroke,
    `Teeth Grinding` = AA1_Teeth_grinding,
    `Thyroid Disease` = AA1_Thyroid_diseases
  ) %>%
  tbl_summary(
    statistic = list(
      all_continuous() ~ "{median} ({p25},{p75})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    missing = "no"
  ) %>%
  modify_header(label = "**Variable**")


summary_table_demo

Variable	N = 1,468¹
Age at Onset	20 (14,33)
Disease Duration	16 (5,29)
Sex
Male	374 (25%)
Female	1,094 (75%)
Age	43 (31,53)
BMI	24.1 (21.4,27.6)
Anxiety	261 (18%)
Depression	326 (22%)
Diabetes	45 (3.1%)
GI Disease	174 (12%)
Hyperlipidemia	114 (7.8%)
Hypertension	227 (15%)
Kidney Disease	74 (5.0%)
Liver Disease	52 (3.5%)
Other Pain	281 (19%)
Other Comorbidity	445 (30%)
Respiratory Disease	177 (12%)
Sleep Disorders	533 (36%)
Stroke	33 (2.2%)
Teeth Grinding	428 (29%)
Thyroid Disease	259 (18%)
¹ Median (Q1,Q3); n (%)

summary_table_demo %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_table_demo.docx")

summary_table_demo_sex <- demo_data %>%
  select(
    `Age at Onset` = Age_onset,
    `Disease Duration` = Disease_duration,
    Sex,
    Age,
    BMI,
    `Anxiety` = AA1_Anxiety,
    `Depression` = AA1_Depression,
    `Diabetes` = AA1_Diabetes,
    `GI Disease` = AA1_Gastrointestinal_disease,
    `Hyperlipidemia` = AA1_Hyperlipidemia,
    `Hypertension` = AA1_Hypertension,
    `Kidney Disease` = AA1_Kidney_disease,
    `Liver Disease` = AA1_Liver_disease,
    `Other Pain` = AA1_Other_pain,
    `Other Comorbidity` = AA1_Other,
    `Respiratory Disease` = AA1_Respiratory_disease,
    `Sleep Disorders` = AA1_Sleep_disorders,
    `Stroke` = AA1_Stroke,
    `Teeth Grinding` = AA1_Teeth_grinding,
    `Thyroid Disease` = AA1_Thyroid_diseases
  ) %>%
  tbl_summary(
    by = Sex,
    statistic = list(
      all_continuous() ~ "{median} ({p25}, {p75})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    missing = "no"
  ) %>%
  modify_header(label = "**Variable**") %>%
  add_p()  # Optional: add p-values for group comparisons

summary_table_demo_sex

Variable	Male N = 374¹	Female N = 1,094¹	p-value²
Age at Onset	27 (14, 41)	19 (14, 30)	<0.001
Disease Duration	10 (3, 23)	19 (8, 30)	<0.001
Age	43 (31, 55)	43 (31, 53)	0.3
BMI	25.2 (23.1, 28.7)	23.4 (20.9, 27.2)	<0.001
Anxiety	54 (14%)	207 (19%)	0.050
Depression	79 (21%)	247 (23%)	0.6
Diabetes	17 (4.5%)	28 (2.6%)	0.054
GI Disease	29 (7.8%)	145 (13%)	0.005
Hyperlipidemia	26 (7.0%)	88 (8.0%)	0.5
Hypertension	82 (22%)	145 (13%)	<0.001
Kidney Disease	17 (4.5%)	57 (5.2%)	0.6
Liver Disease	14 (3.7%)	38 (3.5%)	0.8
Other Pain	52 (14%)	229 (21%)	0.003
Other Comorbidity	113 (30%)	332 (30%)	>0.9
Respiratory Disease	45 (12%)	132 (12%)	>0.9
Sleep Disorders	130 (35%)	403 (37%)	0.5
Stroke	10 (2.7%)	23 (2.1%)	0.5
Teeth Grinding	84 (22%)	344 (31%)	<0.001
Thyroid Disease	13 (3.5%)	246 (22%)	<0.001
¹ Median (Q1, Q3); n (%)
² Wilcoxon rank sum test; Pearson’s Chi-squared test

summary_table_demo_sex %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_table_demo_sex.docx")

summary_migraine_only <- og_data %>%
  filter(HS_M == "Yes") %>%
  select(
    `Age at Onset` = Age_onset,
    `Disease Duration` = Disease_duration,
    Sex,
    Age,
    BMI,
    `Anxiety` = AA1_Anxiety,
    `Depression` = AA1_Depression,
    `Diabetes` = AA1_Diabetes,
    `GI Disease` = AA1_Gastrointestinal_disease,
    `Hyperlipidemia` = AA1_Hyperlipidemia,
    `Hypertension` = AA1_Hypertension,
    `Kidney Disease` = AA1_Kidney_disease,
    `Liver Disease` = AA1_Liver_disease,
    `Other Pain` = AA1_Other_pain,
    `Other Comorbidity` = AA1_Other,
    `Respiratory Disease` = AA1_Respiratory_disease,
    `Sleep Disorders` = AA1_Sleep_disorders,
    `Stroke` = AA1_Stroke,
    `Teeth Grinding` = AA1_Teeth_grinding,
    `Thyroid Disease` = AA1_Thyroid_diseases
  ) %>%
  tbl_summary(
    by = Sex,
    statistic = list(
      all_continuous() ~ "{median} ({p25}, {p75})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    missing = "no"
  ) %>%
  modify_header(label = "**Variable**") %>%
  add_p()  # Optional: add p-values for group comparisons

## Error in `modify_header()`:
## ! Expecting `data` argument to have at least 1 row and 1 column.

summary_migraine_only

## Error: object 'summary_migraine_only' not found

summary_migraine_only %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_migraine_only.docx")

## Error: object 'summary_migraine_only' not found

summary_cluster_only <- og_data %>%
  filter(HS_C == "Yes") %>%
  select(
    `Age at Onset` = Age_onset,
    `Disease Duration` = Disease_duration,
    Sex,
    Age,
    BMI,
    `Anxiety` = AA1_Anxiety,
    `Depression` = AA1_Depression,
    `Diabetes` = AA1_Diabetes,
    `GI Disease` = AA1_Gastrointestinal_disease,
    `Hyperlipidemia` = AA1_Hyperlipidemia,
    `Hypertension` = AA1_Hypertension,
    `Kidney Disease` = AA1_Kidney_disease,
    `Liver Disease` = AA1_Liver_disease,
    `Other Pain` = AA1_Other_pain,
    `Other Comorbidity` = AA1_Other,
    `Respiratory Disease` = AA1_Respiratory_disease,
    `Sleep Disorders` = AA1_Sleep_disorders,
    `Stroke` = AA1_Stroke,
    `Teeth Grinding` = AA1_Teeth_grinding,
    `Thyroid Disease` = AA1_Thyroid_diseases
  ) %>%
  tbl_summary(
    by = Sex,
    statistic = list(
      all_continuous() ~ "{median} ({p25}, {p75})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    missing = "no"
  ) %>%
  modify_header(label = "**Variable**") %>%
  add_p()

## Error in `modify_header()`:
## ! Expecting `data` argument to have at least 1 row and 1 column.

# Save to Word document
summary_cluster_only %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_cluster_only.docx")

## Error: object 'summary_cluster_only' not found

summary_tension_only <- og_data %>%
  filter(HS_T == "Yes") %>%
  select(
    `Age at Onset` = Age_onset,
    `Disease Duration` = Disease_duration,
    Sex,
    Age,
    BMI,
    `Anxiety` = AA1_Anxiety,
    `Depression` = AA1_Depression,
    `Diabetes` = AA1_Diabetes,
    `GI Disease` = AA1_Gastrointestinal_disease,
    `Hyperlipidemia` = AA1_Hyperlipidemia,
    `Hypertension` = AA1_Hypertension,
    `Kidney Disease` = AA1_Kidney_disease,
    `Liver Disease` = AA1_Liver_disease,
    `Other Pain` = AA1_Other_pain,
    `Other Comorbidity` = AA1_Other,
    `Respiratory Disease` = AA1_Respiratory_disease,
    `Sleep Disorders` = AA1_Sleep_disorders,
    `Stroke` = AA1_Stroke,
    `Teeth Grinding` = AA1_Teeth_grinding,
    `Thyroid Disease` = AA1_Thyroid_diseases
  ) %>%
  tbl_summary(
    by = Sex,
    statistic = list(
      all_continuous() ~ "{median} ({p25}, {p75})",
      all_categorical() ~ "{n} ({p}%)"
    ),
    missing = "no"
  ) %>%
  modify_header(label = "**Variable**") %>%
  add_p()

## Error in `modify_header()`:
## ! Expecting `data` argument to have at least 1 row and 1 column.

# Save to Word document
summary_tension_only %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_tension_only.docx")

## Error: object 'summary_tension_only' not found

Summary of Diagnostic recommendation

# recommended Diagnostics muss erstmal als korrekte Variable gelabelt werden
diag_vars <- c("DIAR_YN", "DIAR_CT", "DIAR_MRI", "DIAR_EEG", 
               "DIAR_ECG", "DIAR_DOPPLER", "DIAR_Lumbar_puncture")

# Convert to factors with labels
og_data[diag_vars] <- lapply(og_data[diag_vars], function(x) {
  factor(x, levels = c(0, 1, 9), labels = c("No", "Yes", "Not applicable"))
})

summary_reco_diagnostics <- og_data %>%
  select(Sex, all_of(diag_vars)) %>%
  tbl_summary(
    by = Sex,
    statistic = all_categorical() ~ "{n} ({p}%)",
    missing = "no",
    label = list(
      DIAR_YN               = "Any Diagnostics Recommended",
      DIAR_CT               = "CT Scan",
      DIAR_MRI              = "MRI",
      DIAR_EEG              = "EEG",
      DIAR_ECG              = "ECG",
      DIAR_DOPPLER          = "Doppler Ultrasound",
      DIAR_Lumbar_puncture  = "Lumbar Puncture"
    )
  ) %>%
  modify_header(label = "**Diagnostic Recommendation**") %>%
  add_p()


summary_reco_diagnostics

Diagnostic Recommendation	Male N = 374¹	Female N = 1,094¹	p-value²
Any Diagnostics Recommended			0.009
No	293 (78%)	923 (84%)
Yes	81 (22%)	171 (16%)
Not applicable	0 (0%)	0 (0%)
CT Scan			0.028
No	78 (21%)	165 (15%)
Yes	2 (0.5%)	6 (0.5%)
Not applicable	294 (79%)	923 (84%)
MRI			0.014
No	35 (9.4%)	60 (5.5%)
Yes	45 (12%)	111 (10%)
Not applicable	294 (79%)	923 (84%)
EEG			0.021
No	79 (21%)	165 (15%)
Yes	1 (0.3%)	6 (0.5%)
Not applicable	294 (79%)	923 (84%)
ECG			0.009
No	75 (20%)	167 (15%)
Yes	5 (1.3%)	4 (0.4%)
Not applicable	294 (79%)	923 (84%)
Doppler Ultrasound			0.024
No	77 (21%)	167 (15%)
Yes	3 (0.8%)	4 (0.4%)
Not applicable	294 (79%)	923 (84%)
Lumbar Puncture			0.031
No	71 (19%)	156 (14%)
Yes	9 (2.4%)	15 (1.4%)
Not applicable	294 (79%)	923 (84%)
¹ n (%)
² Fisher’s exact test; Pearson’s Chi-squared test

summary_reco_diagnostics %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_reco_diagnostics.docx")


# Step 1: Pivot longer
diag_long <- og_data %>%
  select(Sex, all_of(diag_vars)) %>%
  pivot_longer(
    cols = -Sex,
    names_to = "Diagnosis",
    values_to = "Value"
  ) %>%
  filter(Value == "Yes")  # Only plot actual recommendations

# Join label names to your long data
diag_long <- diag_long %>%
  left_join(diagnosis_labels, by = "Diagnosis")

## Error: object 'diagnosis_labels' not found

diagnosis_labels <- tibble::tibble(
  Diagnosis = c("DIAR_YN", "DIAR_CT", "DIAR_MRI", "DIAR_EEG", 
                "DIAR_ECG", "DIAR_DOPPLER", "DIAR_Lumbar_puncture"),
  Diagnosis_Label = c("Any Diagnostics", "CT Scan", "MRI", "EEG", 
                      "ECG", "Doppler Ultrasound", "Lumbar Puncture")
)

ggplot(diag_long, aes(x = Diagnosis_Label, fill = Sex)) +
  geom_bar(position = "dodge") +
  labs(
    title = "Diagnostic Recommendations by Sex",
    x = "Diagnostic Test",
    y = "Number of Recommendations",
    fill = "Sex"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

## Error in `geom_bar()`:
## ! Problem while computing aesthetics.
## ℹ Error occurred in the 1st layer.
## Caused by error:
## ! object 'Diagnosis_Label' not found

# 1. Count total number of individuals per sex
sex_totals <- og_data %>%
  count(Sex, name = "total_n")

# 2. Pivot diagnostics to long and keep only "Yes"
diag_long <- og_data %>%
  select(Sex, all_of(diag_vars)) %>%
  pivot_longer(
    cols = -Sex,
    names_to = "Diagnosis",
    values_to = "Value"
  ) %>%
  filter(Value == "Yes")

# 3. Add readable labels
diagnosis_labels <- tibble::tibble(
  Diagnosis = c("DIAR_YN", "DIAR_CT", "DIAR_MRI", "DIAR_EEG", 
                "DIAR_ECG", "DIAR_DOPPLER", "DIAR_Lumbar_puncture"),
  Diagnosis_Label = c("Any Diagnostics", "CT Scan", "MRI", "EEG", 
                      "ECG", "Doppler Ultrasound", "Lumbar Puncture")
)

diag_long <- diag_long %>%
  left_join(diagnosis_labels, by = "Diagnosis")

# 4. Count diagnosis occurrences and calculate percentages
diag_prop <- diag_long %>%
  count(Sex, Diagnosis_Label, name = "diagnosis_n") %>%
  left_join(sex_totals, by = "Sex") %>%
  mutate(percent = diagnosis_n / total_n * 100)

# 5. Plot percentages
ggplot(diag_prop, aes(x = Diagnosis_Label, y = percent, fill = Sex)) +
  geom_col(position = "dodge") +
  labs(
    title = "Diagnostic Recommendations by Sex (Relative to Group Size)",
    x = "Diagnostic Test",
    y = "Percentage of Patients",
    fill = "Sex"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

ggsave("diagnostic_recommendations_by_sex.png", width = 10, height = 6, dpi = 300)

dev.off()

## null device 
##           1

Summary of diagnosis

# Define diagnosis variables
hs_diag_vars <- c("HS_M", "HS_T", "HS_C", "HS_O", "HS_N")

# Convert to factors with Yes/No labels
og_data[hs_diag_vars] <- lapply(og_data[hs_diag_vars], function(x) {
  factor(x, levels = c(0, 1), labels = c("No", "Yes"))
})

summary_hs_diagnose <- og_data %>%
  select(Sex, all_of(hs_diag_vars)) %>%
  tbl_summary(
    by = Sex,
    statistic = all_categorical() ~ "{n} ({p}%)",
    missing = "no",
    label = list(
      HS_M ~ "Migraine",
      HS_T ~ "Tension-type Headache",
      HS_C ~ "Cluster Headache",
      HS_O ~ "Other Headache",
      HS_N ~ "No Diagnosis"
    )
  ) %>%
  modify_header(label = "**Specialist Diagnosis**") %>%
  add_p()

summary_hs_diagnose

Specialist Diagnosis	Male N = 374¹	Female N = 1,094¹	p-value²
Migraine	210 (56%)	958 (88%)	<0.001
Tension-type Headache	93 (25%)	134 (12%)	<0.001
Cluster Headache	44 (12%)	18 (1.6%)	<0.001
Other Headache	85 (23%)	182 (17%)	0.008
No Diagnosis	4 (1.1%)	4 (0.4%)	0.12
¹ n (%)
² Pearson’s Chi-squared test; Fisher’s exact test

summary_reco_diagnostics %>% 
  as_flex_table() %>% 
  flextable::save_as_docx(path = "summary_hs_diagnose.docx" )

#PIE CHART

hs_long <- og_data %>%
  select(Sex, HS_M, HS_T, HS_C, HS_O, HS_N) %>%
  pivot_longer(
    cols      = starts_with("HS_"),
    names_to  = "Diagnosis",
    values_to = "Value"
  ) %>%
  filter(Value == "Yes")  # Only count confirmed diagnoses

# Step 2: Compute relative frequencies within each sex
hs_plot_data <- hs_long %>%
  count(Sex, Diagnosis) %>%
  group_by(Sex) %>%
  mutate(Percent = n / sum(n) * 100)

# Step 3: Plot normalized pie chart
ggplot(hs_plot_data, aes(x = "", y = Percent, fill = Diagnosis)) +
  geom_bar(stat = "identity", width = 1, color = "white") +
  coord_polar("y") +
  facet_wrap(~ Sex) +
  labs(
    title = "Distribution of Specialist Diagnoses (Within Each Sex)",
    fill  = "Diagnosis"
  ) +
  theme_void() +
  theme(
    strip.text       = element_text(face = "bold", size = 12),
    plot.title       = element_text(hjust = 0.5),
    legend.position  = "right"
  )

ggsave("diagnosis_by_sex_pie.png", width = 8, height = 6, dpi = 300)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

KSA_Gender

2025-05-09

Summary of Diagnostic recommendation

Summary of diagnosis