#IMPORT DATA
og_data<- read_excel("OG_Data_KSA_Gender_CH.xlsx",
sheet = "DATA_CH")
#TOTAL COHORT describtive
#als erstes Summary of demographic data
demo_vars <- c(
"Age_onset", "Disease_duration", "Birthyear", "Visit_Year", "Sex", "Height", "Age", "BMI",
"AA1_Anxiety", "AA1_Depression", "AA1_Diabetes", "AA1_Gastrointestinal_disease", "AA1_Hyperlipidemia",
"AA1_Hypertension", "AA1_Kidney_disease", "AA1_Kidney_disease_txt", "AA1_Liver_disease", "AA1_Liver_disease_txt",
"AA1_Other_pain", "AA1_Other_pain_txt", "AA1_Other", "AA1_Other_txt1", "AA1_Other_txt2",
"AA1_Respiratory_disease", "AA1_Respiratory_disease_txt", "AA1_Sleep_disorders", "AA1_Stroke",
"AA1_Teeth_grinding", "AA1_Thyroid_diseases"
)
# Extrahieren
demo_data <- og_data[, demo_vars]
# schauen ob die Variablen, die korrekte Klassifikation haben (Continous, categrocal...)
str(demo_data)
## tibble [1,468 × 29] (S3: tbl_df/tbl/data.frame)
## $ Age_onset : num [1:1468] 32 18 12 21 6 14 28 20 23 14 ...
## $ Disease_duration : num [1:1468] 11 30 22 24 25 54 30 29 2 21 ...
## $ Birthyear : num [1:1468] 1973 1967 1982 1970 1985 ...
## $ Visit_Year : num [1:1468] 2016 2015 2016 2015 2016 ...
## $ Sex : num [1:1468] 2 1 2 2 2 2 2 2 2 2 ...
## $ Height : num [1:1468] 172 180 175 158 167 172 159 169 175 168 ...
## $ Age : num [1:1468] 43 48.5 33.2 45.7 30.3 67.6 57.5 48.7 24.3 34.4 ...
## $ BMI : num [1:1468] 22.5 25.6 NA 22.9 25.1 ...
## $ AA1_Anxiety : num [1:1468] 0 0 0 0 0 0 0 0 1 0 ...
## $ AA1_Depression : num [1:1468] 1 0 1 0 0 0 0 0 0 0 ...
## $ AA1_Diabetes : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
## $ AA1_Gastrointestinal_disease: num [1:1468] 0 0 0 0 0 0 0 0 1 0 ...
## $ AA1_Hyperlipidemia : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
## $ AA1_Hypertension : num [1:1468] 0 0 1 0 0 0 0 0 0 0 ...
## $ AA1_Kidney_disease : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
## $ AA1_Kidney_disease_txt : chr [1:1468] NA NA NA NA ...
## $ AA1_Liver_disease : num [1:1468] 0 0 0 0 0 0 0 0 0 1 ...
## $ AA1_Liver_disease_txt : chr [1:1468] NA NA NA NA ...
## $ AA1_Other_pain : num [1:1468] 0 0 1 0 0 0 0 0 1 1 ...
## $ AA1_Other_pain_txt : chr [1:1468] NA NA "Schmerzen in den Fingern und Handgelenken" NA ...
## $ AA1_Other : num [1:1468] 0 1 1 0 0 1 0 0 1 1 ...
## $ AA1_Other_txt1 : chr [1:1468] NA "Neurodermitis" "Hashimoto" NA ...
## $ AA1_Other_txt2 : chr [1:1468] NA NA NA NA ...
## $ AA1_Respiratory_disease : num [1:1468] 0 1 0 0 0 0 0 0 0 0 ...
## $ AA1_Respiratory_disease_txt : chr [1:1468] NA "Asthma" NA NA ...
## $ AA1_Sleep_disorders : num [1:1468] 0 1 0 0 0 0 1 0 0 0 ...
## $ AA1_Stroke : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
## $ AA1_Teeth_grinding : num [1:1468] 0 1 1 0 0 0 1 0 0 1 ...
## $ AA1_Thyroid_diseases : num [1:1468] 0 0 0 0 0 0 0 0 0 0 ...
# Proper conversion: only once, and only after verifying the values
demo_data$Sex <- factor(demo_data$Sex,
levels = c(1, 2),
labels = c("Male", "Female"))
og_data$Sex <- factor(og_data$Sex,
levels = c(1, 2),
labels = c("Male", "Female"))
binary_vars <- c(
"AA1_Anxiety", "AA1_Depression", "AA1_Diabetes", "AA1_Gastrointestinal_disease",
"AA1_Hyperlipidemia", "AA1_Hypertension", "AA1_Kidney_disease",
"AA1_Liver_disease", "AA1_Other_pain", "AA1_Other",
"AA1_Respiratory_disease", "AA1_Sleep_disorders", "AA1_Stroke",
"AA1_Teeth_grinding", "AA1_Thyroid_diseases"
)
# Convert all these columns to factors with labels
demo_data[binary_vars] <- lapply(demo_data[binary_vars], function(x) {
factor(x, levels = c(0, 1), labels = c("No", "Yes"))
})
demo_data[binary_vars] <- lapply(og_data[binary_vars], function(x) {
factor(x, levels = c(0, 1), labels = c("No", "Yes"))
})
# schauen ob die Variablen, die korrekte Klassifikation haben (Continous, categrocal...)
str(demo_data)
## tibble [1,468 × 29] (S3: tbl_df/tbl/data.frame)
## $ Age_onset : num [1:1468] 32 18 12 21 6 14 28 20 23 14 ...
## $ Disease_duration : num [1:1468] 11 30 22 24 25 54 30 29 2 21 ...
## $ Birthyear : num [1:1468] 1973 1967 1982 1970 1985 ...
## $ Visit_Year : num [1:1468] 2016 2015 2016 2015 2016 ...
## $ Sex : Factor w/ 2 levels "Male","Female": 2 1 2 2 2 2 2 2 2 2 ...
## $ Height : num [1:1468] 172 180 175 158 167 172 159 169 175 168 ...
## $ Age : num [1:1468] 43 48.5 33.2 45.7 30.3 67.6 57.5 48.7 24.3 34.4 ...
## $ BMI : num [1:1468] 22.5 25.6 NA 22.9 25.1 ...
## $ AA1_Anxiety : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 2 1 ...
## $ AA1_Depression : Factor w/ 2 levels "No","Yes": 2 1 2 1 1 1 1 1 1 1 ...
## $ AA1_Diabetes : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ AA1_Gastrointestinal_disease: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 2 1 ...
## $ AA1_Hyperlipidemia : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ AA1_Hypertension : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 1 1 ...
## $ AA1_Kidney_disease : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ AA1_Kidney_disease_txt : chr [1:1468] NA NA NA NA ...
## $ AA1_Liver_disease : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 2 ...
## $ AA1_Liver_disease_txt : chr [1:1468] NA NA NA NA ...
## $ AA1_Other_pain : Factor w/ 2 levels "No","Yes": 1 1 2 1 1 1 1 1 2 2 ...
## $ AA1_Other_pain_txt : chr [1:1468] NA NA "Schmerzen in den Fingern und Handgelenken" NA ...
## $ AA1_Other : Factor w/ 2 levels "No","Yes": 1 2 2 1 1 2 1 1 2 2 ...
## $ AA1_Other_txt1 : chr [1:1468] NA "Neurodermitis" "Hashimoto" NA ...
## $ AA1_Other_txt2 : chr [1:1468] NA NA NA NA ...
## $ AA1_Respiratory_disease : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 1 1 1 1 ...
## $ AA1_Respiratory_disease_txt : chr [1:1468] NA "Asthma" NA NA ...
## $ AA1_Sleep_disorders : Factor w/ 2 levels "No","Yes": 1 2 1 1 1 1 2 1 1 1 ...
## $ AA1_Stroke : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ AA1_Teeth_grinding : Factor w/ 2 levels "No","Yes": 1 2 2 1 1 1 2 1 1 2 ...
## $ AA1_Thyroid_diseases : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
# generiere automatisches Summary
summary(demo_data)
## Age_onset Disease_duration Birthyear Visit_Year Sex
## Min. : 2.00 Min. : 0.00 Min. :1930 Min. :2015 Male : 374
## 1st Qu.:14.00 1st Qu.: 5.00 1st Qu.:1965 1st Qu.:2017 Female:1094
## Median :20.00 Median :16.00 Median :1976 Median :2019
## Mean :24.72 Mean :18.75 Mean :1975 Mean :2019
## 3rd Qu.:33.00 3rd Qu.:29.00 3rd Qu.:1987 3rd Qu.:2021
## Max. :85.00 Max. :70.00 Max. :2004 Max. :2022
## NA's :11 NA's :11
## Height Age BMI AA1_Anxiety AA1_Depression
## Min. :120.0 Min. :15.70 Min. :15.32 No :1207 No :1142
## 1st Qu.:164.0 1st Qu.:31.27 1st Qu.:21.37 Yes: 261 Yes: 326
## Median :169.0 Median :43.20 Median :24.09
## Mean :169.6 Mean :43.41 Mean :25.07
## 3rd Qu.:175.0 3rd Qu.:53.40 3rd Qu.:27.59
## Max. :198.0 Max. :89.50 Max. :62.11
## NA's :39 NA's :112
## AA1_Diabetes AA1_Gastrointestinal_disease AA1_Hyperlipidemia AA1_Hypertension
## No :1423 No :1294 No :1354 No :1241
## Yes: 45 Yes: 174 Yes: 114 Yes: 227
##
##
##
##
##
## AA1_Kidney_disease AA1_Kidney_disease_txt AA1_Liver_disease
## No :1394 Length:1468 No :1416
## Yes: 74 Class :character Yes: 52
## Mode :character
##
##
##
##
## AA1_Liver_disease_txt AA1_Other_pain AA1_Other_pain_txt AA1_Other
## Length:1468 No :1187 Length:1468 No :1023
## Class :character Yes: 281 Class :character Yes: 445
## Mode :character Mode :character
##
##
##
##
## AA1_Other_txt1 AA1_Other_txt2 AA1_Respiratory_disease
## Length:1468 Length:1468 No :1291
## Class :character Class :character Yes: 177
## Mode :character Mode :character
##
##
##
##
## AA1_Respiratory_disease_txt AA1_Sleep_disorders AA1_Stroke AA1_Teeth_grinding
## Length:1468 No :935 No :1435 No :1040
## Class :character Yes:533 Yes: 33 Yes: 428
## Mode :character
##
##
##
##
## AA1_Thyroid_diseases
## No :1209
## Yes: 259
##
##
##
##
##
summary_table_demo <- demo_data %>%
select(
`Age at Onset` = Age_onset,
`Disease Duration` = Disease_duration,
Sex,
Age,
BMI,
`Anxiety` = AA1_Anxiety,
`Depression` = AA1_Depression,
`Diabetes` = AA1_Diabetes,
`GI Disease` = AA1_Gastrointestinal_disease,
`Hyperlipidemia` = AA1_Hyperlipidemia,
`Hypertension` = AA1_Hypertension,
`Kidney Disease` = AA1_Kidney_disease,
`Liver Disease` = AA1_Liver_disease,
`Other Pain` = AA1_Other_pain,
`Other Comorbidity` = AA1_Other,
`Respiratory Disease` = AA1_Respiratory_disease,
`Sleep Disorders` = AA1_Sleep_disorders,
`Stroke` = AA1_Stroke,
`Teeth Grinding` = AA1_Teeth_grinding,
`Thyroid Disease` = AA1_Thyroid_diseases
) %>%
tbl_summary(
statistic = list(
all_continuous() ~ "{median} ({p25},{p75})",
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) %>%
modify_header(label = "**Variable**")
summary_table_demo
| Variable | N = 1,4681 |
|---|---|
| Age at Onset | 20 (14,33) |
| Disease Duration | 16 (5,29) |
| Sex | |
| Â Â Â Â Male | 374 (25%) |
| Â Â Â Â Female | 1,094 (75%) |
| Age | 43 (31,53) |
| BMI | 24.1 (21.4,27.6) |
| Anxiety | 261 (18%) |
| Depression | 326 (22%) |
| Diabetes | 45 (3.1%) |
| GI Disease | 174 (12%) |
| Hyperlipidemia | 114 (7.8%) |
| Hypertension | 227 (15%) |
| Kidney Disease | 74 (5.0%) |
| Liver Disease | 52 (3.5%) |
| Other Pain | 281 (19%) |
| Other Comorbidity | 445 (30%) |
| Respiratory Disease | 177 (12%) |
| Sleep Disorders | 533 (36%) |
| Stroke | 33 (2.2%) |
| Teeth Grinding | 428 (29%) |
| Thyroid Disease | 259 (18%) |
| 1 Median (Q1,Q3); n (%) | |
summary_table_demo %>%
as_flex_table() %>%
flextable::save_as_docx(path = "summary_table_demo.docx")
summary_table_demo_sex <- demo_data %>%
select(
`Age at Onset` = Age_onset,
`Disease Duration` = Disease_duration,
Sex,
Age,
BMI,
`Anxiety` = AA1_Anxiety,
`Depression` = AA1_Depression,
`Diabetes` = AA1_Diabetes,
`GI Disease` = AA1_Gastrointestinal_disease,
`Hyperlipidemia` = AA1_Hyperlipidemia,
`Hypertension` = AA1_Hypertension,
`Kidney Disease` = AA1_Kidney_disease,
`Liver Disease` = AA1_Liver_disease,
`Other Pain` = AA1_Other_pain,
`Other Comorbidity` = AA1_Other,
`Respiratory Disease` = AA1_Respiratory_disease,
`Sleep Disorders` = AA1_Sleep_disorders,
`Stroke` = AA1_Stroke,
`Teeth Grinding` = AA1_Teeth_grinding,
`Thyroid Disease` = AA1_Thyroid_diseases
) %>%
tbl_summary(
by = Sex,
statistic = list(
all_continuous() ~ "{median} ({p25}, {p75})",
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) %>%
modify_header(label = "**Variable**") %>%
add_p() # Optional: add p-values for group comparisons
summary_table_demo_sex
| Variable | Male N = 3741 |
Female N = 1,0941 |
p-value2 |
|---|---|---|---|
| Age at Onset | 27 (14, 41) | 19 (14, 30) | <0.001 |
| Disease Duration | 10 (3, 23) | 19 (8, 30) | <0.001 |
| Age | 43 (31, 55) | 43 (31, 53) | 0.3 |
| BMI | 25.2 (23.1, 28.7) | 23.4 (20.9, 27.2) | <0.001 |
| Anxiety | 54 (14%) | 207 (19%) | 0.050 |
| Depression | 79 (21%) | 247 (23%) | 0.6 |
| Diabetes | 17 (4.5%) | 28 (2.6%) | 0.054 |
| GI Disease | 29 (7.8%) | 145 (13%) | 0.005 |
| Hyperlipidemia | 26 (7.0%) | 88 (8.0%) | 0.5 |
| Hypertension | 82 (22%) | 145 (13%) | <0.001 |
| Kidney Disease | 17 (4.5%) | 57 (5.2%) | 0.6 |
| Liver Disease | 14 (3.7%) | 38 (3.5%) | 0.8 |
| Other Pain | 52 (14%) | 229 (21%) | 0.003 |
| Other Comorbidity | 113 (30%) | 332 (30%) | >0.9 |
| Respiratory Disease | 45 (12%) | 132 (12%) | >0.9 |
| Sleep Disorders | 130 (35%) | 403 (37%) | 0.5 |
| Stroke | 10 (2.7%) | 23 (2.1%) | 0.5 |
| Teeth Grinding | 84 (22%) | 344 (31%) | <0.001 |
| Thyroid Disease | 13 (3.5%) | 246 (22%) | <0.001 |
| 1 Median (Q1, Q3); n (%) | |||
| 2 Wilcoxon rank sum test; Pearson’s Chi-squared test | |||
summary_table_demo_sex %>%
as_flex_table() %>%
flextable::save_as_docx(path = "summary_table_demo_sex.docx")
summary_migraine_only <- og_data %>%
filter(HS_M == "Yes") %>%
select(
`Age at Onset` = Age_onset,
`Disease Duration` = Disease_duration,
Sex,
Age,
BMI,
`Anxiety` = AA1_Anxiety,
`Depression` = AA1_Depression,
`Diabetes` = AA1_Diabetes,
`GI Disease` = AA1_Gastrointestinal_disease,
`Hyperlipidemia` = AA1_Hyperlipidemia,
`Hypertension` = AA1_Hypertension,
`Kidney Disease` = AA1_Kidney_disease,
`Liver Disease` = AA1_Liver_disease,
`Other Pain` = AA1_Other_pain,
`Other Comorbidity` = AA1_Other,
`Respiratory Disease` = AA1_Respiratory_disease,
`Sleep Disorders` = AA1_Sleep_disorders,
`Stroke` = AA1_Stroke,
`Teeth Grinding` = AA1_Teeth_grinding,
`Thyroid Disease` = AA1_Thyroid_diseases
) %>%
tbl_summary(
by = Sex,
statistic = list(
all_continuous() ~ "{median} ({p25}, {p75})",
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) %>%
modify_header(label = "**Variable**") %>%
add_p() # Optional: add p-values for group comparisons
## Error in `modify_header()`:
## ! Expecting `data` argument to have at least 1 row and 1 column.
summary_migraine_only
## Error: object 'summary_migraine_only' not found
summary_migraine_only %>%
as_flex_table() %>%
flextable::save_as_docx(path = "summary_migraine_only.docx")
## Error: object 'summary_migraine_only' not found
summary_cluster_only <- og_data %>%
filter(HS_C == "Yes") %>%
select(
`Age at Onset` = Age_onset,
`Disease Duration` = Disease_duration,
Sex,
Age,
BMI,
`Anxiety` = AA1_Anxiety,
`Depression` = AA1_Depression,
`Diabetes` = AA1_Diabetes,
`GI Disease` = AA1_Gastrointestinal_disease,
`Hyperlipidemia` = AA1_Hyperlipidemia,
`Hypertension` = AA1_Hypertension,
`Kidney Disease` = AA1_Kidney_disease,
`Liver Disease` = AA1_Liver_disease,
`Other Pain` = AA1_Other_pain,
`Other Comorbidity` = AA1_Other,
`Respiratory Disease` = AA1_Respiratory_disease,
`Sleep Disorders` = AA1_Sleep_disorders,
`Stroke` = AA1_Stroke,
`Teeth Grinding` = AA1_Teeth_grinding,
`Thyroid Disease` = AA1_Thyroid_diseases
) %>%
tbl_summary(
by = Sex,
statistic = list(
all_continuous() ~ "{median} ({p25}, {p75})",
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) %>%
modify_header(label = "**Variable**") %>%
add_p()
## Error in `modify_header()`:
## ! Expecting `data` argument to have at least 1 row and 1 column.
# Save to Word document
summary_cluster_only %>%
as_flex_table() %>%
flextable::save_as_docx(path = "summary_cluster_only.docx")
## Error: object 'summary_cluster_only' not found
summary_tension_only <- og_data %>%
filter(HS_T == "Yes") %>%
select(
`Age at Onset` = Age_onset,
`Disease Duration` = Disease_duration,
Sex,
Age,
BMI,
`Anxiety` = AA1_Anxiety,
`Depression` = AA1_Depression,
`Diabetes` = AA1_Diabetes,
`GI Disease` = AA1_Gastrointestinal_disease,
`Hyperlipidemia` = AA1_Hyperlipidemia,
`Hypertension` = AA1_Hypertension,
`Kidney Disease` = AA1_Kidney_disease,
`Liver Disease` = AA1_Liver_disease,
`Other Pain` = AA1_Other_pain,
`Other Comorbidity` = AA1_Other,
`Respiratory Disease` = AA1_Respiratory_disease,
`Sleep Disorders` = AA1_Sleep_disorders,
`Stroke` = AA1_Stroke,
`Teeth Grinding` = AA1_Teeth_grinding,
`Thyroid Disease` = AA1_Thyroid_diseases
) %>%
tbl_summary(
by = Sex,
statistic = list(
all_continuous() ~ "{median} ({p25}, {p75})",
all_categorical() ~ "{n} ({p}%)"
),
missing = "no"
) %>%
modify_header(label = "**Variable**") %>%
add_p()
## Error in `modify_header()`:
## ! Expecting `data` argument to have at least 1 row and 1 column.
# Save to Word document
summary_tension_only %>%
as_flex_table() %>%
flextable::save_as_docx(path = "summary_tension_only.docx")
## Error: object 'summary_tension_only' not found
# recommended Diagnostics muss erstmal als korrekte Variable gelabelt werden
diag_vars <- c("DIAR_YN", "DIAR_CT", "DIAR_MRI", "DIAR_EEG",
"DIAR_ECG", "DIAR_DOPPLER", "DIAR_Lumbar_puncture")
# Convert to factors with labels
og_data[diag_vars] <- lapply(og_data[diag_vars], function(x) {
factor(x, levels = c(0, 1, 9), labels = c("No", "Yes", "Not applicable"))
})
summary_reco_diagnostics <- og_data %>%
select(Sex, all_of(diag_vars)) %>%
tbl_summary(
by = Sex,
statistic = all_categorical() ~ "{n} ({p}%)",
missing = "no",
label = list(
DIAR_YN = "Any Diagnostics Recommended",
DIAR_CT = "CT Scan",
DIAR_MRI = "MRI",
DIAR_EEG = "EEG",
DIAR_ECG = "ECG",
DIAR_DOPPLER = "Doppler Ultrasound",
DIAR_Lumbar_puncture = "Lumbar Puncture"
)
) %>%
modify_header(label = "**Diagnostic Recommendation**") %>%
add_p()
summary_reco_diagnostics
| Diagnostic Recommendation | Male N = 3741 |
Female N = 1,0941 |
p-value2 |
|---|---|---|---|
| Any Diagnostics Recommended | 0.009 | ||
| Â Â Â Â No | 293 (78%) | 923 (84%) | |
| Â Â Â Â Yes | 81 (22%) | 171 (16%) | |
| Â Â Â Â Not applicable | 0 (0%) | 0 (0%) | |
| CT Scan | 0.028 | ||
| Â Â Â Â No | 78 (21%) | 165 (15%) | |
| Â Â Â Â Yes | 2 (0.5%) | 6 (0.5%) | |
| Â Â Â Â Not applicable | 294 (79%) | 923 (84%) | |
| MRI | 0.014 | ||
| Â Â Â Â No | 35 (9.4%) | 60 (5.5%) | |
| Â Â Â Â Yes | 45 (12%) | 111 (10%) | |
| Â Â Â Â Not applicable | 294 (79%) | 923 (84%) | |
| EEG | 0.021 | ||
| Â Â Â Â No | 79 (21%) | 165 (15%) | |
| Â Â Â Â Yes | 1 (0.3%) | 6 (0.5%) | |
| Â Â Â Â Not applicable | 294 (79%) | 923 (84%) | |
| ECG | 0.009 | ||
| Â Â Â Â No | 75 (20%) | 167 (15%) | |
| Â Â Â Â Yes | 5 (1.3%) | 4 (0.4%) | |
| Â Â Â Â Not applicable | 294 (79%) | 923 (84%) | |
| Doppler Ultrasound | 0.024 | ||
| Â Â Â Â No | 77 (21%) | 167 (15%) | |
| Â Â Â Â Yes | 3 (0.8%) | 4 (0.4%) | |
| Â Â Â Â Not applicable | 294 (79%) | 923 (84%) | |
| Lumbar Puncture | 0.031 | ||
| Â Â Â Â No | 71 (19%) | 156 (14%) | |
| Â Â Â Â Yes | 9 (2.4%) | 15 (1.4%) | |
| Â Â Â Â Not applicable | 294 (79%) | 923 (84%) | |
| 1 n (%) | |||
| 2 Fisher’s exact test; Pearson’s Chi-squared test | |||
summary_reco_diagnostics %>%
as_flex_table() %>%
flextable::save_as_docx(path = "summary_reco_diagnostics.docx")
# Step 1: Pivot longer
diag_long <- og_data %>%
select(Sex, all_of(diag_vars)) %>%
pivot_longer(
cols = -Sex,
names_to = "Diagnosis",
values_to = "Value"
) %>%
filter(Value == "Yes") # Only plot actual recommendations
# Join label names to your long data
diag_long <- diag_long %>%
left_join(diagnosis_labels, by = "Diagnosis")
## Error: object 'diagnosis_labels' not found
diagnosis_labels <- tibble::tibble(
Diagnosis = c("DIAR_YN", "DIAR_CT", "DIAR_MRI", "DIAR_EEG",
"DIAR_ECG", "DIAR_DOPPLER", "DIAR_Lumbar_puncture"),
Diagnosis_Label = c("Any Diagnostics", "CT Scan", "MRI", "EEG",
"ECG", "Doppler Ultrasound", "Lumbar Puncture")
)
ggplot(diag_long, aes(x = Diagnosis_Label, fill = Sex)) +
geom_bar(position = "dodge") +
labs(
title = "Diagnostic Recommendations by Sex",
x = "Diagnostic Test",
y = "Number of Recommendations",
fill = "Sex"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold")
)
## Error in `geom_bar()`:
## ! Problem while computing aesthetics.
## ℹ Error occurred in the 1st layer.
## Caused by error:
## ! object 'Diagnosis_Label' not found
# 1. Count total number of individuals per sex
sex_totals <- og_data %>%
count(Sex, name = "total_n")
# 2. Pivot diagnostics to long and keep only "Yes"
diag_long <- og_data %>%
select(Sex, all_of(diag_vars)) %>%
pivot_longer(
cols = -Sex,
names_to = "Diagnosis",
values_to = "Value"
) %>%
filter(Value == "Yes")
# 3. Add readable labels
diagnosis_labels <- tibble::tibble(
Diagnosis = c("DIAR_YN", "DIAR_CT", "DIAR_MRI", "DIAR_EEG",
"DIAR_ECG", "DIAR_DOPPLER", "DIAR_Lumbar_puncture"),
Diagnosis_Label = c("Any Diagnostics", "CT Scan", "MRI", "EEG",
"ECG", "Doppler Ultrasound", "Lumbar Puncture")
)
diag_long <- diag_long %>%
left_join(diagnosis_labels, by = "Diagnosis")
# 4. Count diagnosis occurrences and calculate percentages
diag_prop <- diag_long %>%
count(Sex, Diagnosis_Label, name = "diagnosis_n") %>%
left_join(sex_totals, by = "Sex") %>%
mutate(percent = diagnosis_n / total_n * 100)
# 5. Plot percentages
ggplot(diag_prop, aes(x = Diagnosis_Label, y = percent, fill = Sex)) +
geom_col(position = "dodge") +
labs(
title = "Diagnostic Recommendations by Sex (Relative to Group Size)",
x = "Diagnostic Test",
y = "Percentage of Patients",
fill = "Sex"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold")
)
ggsave("diagnostic_recommendations_by_sex.png", width = 10, height = 6, dpi = 300)
dev.off()
## null device
## 1
# Define diagnosis variables
hs_diag_vars <- c("HS_M", "HS_T", "HS_C", "HS_O", "HS_N")
# Convert to factors with Yes/No labels
og_data[hs_diag_vars] <- lapply(og_data[hs_diag_vars], function(x) {
factor(x, levels = c(0, 1), labels = c("No", "Yes"))
})
summary_hs_diagnose <- og_data %>%
select(Sex, all_of(hs_diag_vars)) %>%
tbl_summary(
by = Sex,
statistic = all_categorical() ~ "{n} ({p}%)",
missing = "no",
label = list(
HS_M ~ "Migraine",
HS_T ~ "Tension-type Headache",
HS_C ~ "Cluster Headache",
HS_O ~ "Other Headache",
HS_N ~ "No Diagnosis"
)
) %>%
modify_header(label = "**Specialist Diagnosis**") %>%
add_p()
summary_hs_diagnose
| Specialist Diagnosis | Male N = 3741 |
Female N = 1,0941 |
p-value2 |
|---|---|---|---|
| Migraine | 210 (56%) | 958 (88%) | <0.001 |
| Tension-type Headache | 93 (25%) | 134 (12%) | <0.001 |
| Cluster Headache | 44 (12%) | 18 (1.6%) | <0.001 |
| Other Headache | 85 (23%) | 182 (17%) | 0.008 |
| No Diagnosis | 4 (1.1%) | 4 (0.4%) | 0.12 |
| 1 n (%) | |||
| 2 Pearson’s Chi-squared test; Fisher’s exact test | |||
summary_reco_diagnostics %>%
as_flex_table() %>%
flextable::save_as_docx(path = "summary_hs_diagnose.docx" )
#PIE CHART
hs_long <- og_data %>%
select(Sex, HS_M, HS_T, HS_C, HS_O, HS_N) %>%
pivot_longer(
cols = starts_with("HS_"),
names_to = "Diagnosis",
values_to = "Value"
) %>%
filter(Value == "Yes") # Only count confirmed diagnoses
# Step 2: Compute relative frequencies within each sex
hs_plot_data <- hs_long %>%
count(Sex, Diagnosis) %>%
group_by(Sex) %>%
mutate(Percent = n / sum(n) * 100)
# Step 3: Plot normalized pie chart
ggplot(hs_plot_data, aes(x = "", y = Percent, fill = Diagnosis)) +
geom_bar(stat = "identity", width = 1, color = "white") +
coord_polar("y") +
facet_wrap(~ Sex) +
labs(
title = "Distribution of Specialist Diagnoses (Within Each Sex)",
fill = "Diagnosis"
) +
theme_void() +
theme(
strip.text = element_text(face = "bold", size = 12),
plot.title = element_text(hjust = 0.5),
legend.position = "right"
)
ggsave("diagnosis_by_sex_pie.png", width = 8, height = 6, dpi = 300)
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.