# ── Build missing data summary ────────────────────────────────────────────────
missing_summary <- linelist %>%
select(-case_id, -generation, - age_unit, -age ) %>%
summarise(across(everything(), ~ sum(is.na(.)))) %>%
pivot_longer(everything(),
names_to = "Variable",
values_to = "Missing_n") %>%
# Group
mutate(variable_cat = case_when(
Variable %in% c(
"outcome",
"infector",
"source")
~ "OUTCOME & IDENTIFIERS",
Variable %in% c(
"date_infection",
"date_outcome",
"date_onset",
"date_hospitalisation",
"time_admission",
"days_onset_hosp")
~ "DATES",
Variable %in% c(
"gender",
"age_years",
"age_cat",
"age_cat5",
"hospital")
~ "DEMOGRAPHICS",
Variable %in% c(
"temp",
"fever",
"chills",
"cough",
"aches",
"vomit")
~ "CLINICAL",
Variable %in% c(
"ct_blood",
"wt_kg",
"ht_cm",
"bmi",
"lon",
"lat")
~ "MEASUREMENTS (COMPLETE)"
)
) %>%
mutate(
Total = nrow(linelist),
Complete_n = Total - Missing_n,
Complete_rate = Complete_n / Total,
Missing_pct = Missing_n / Total, # * 100,
Concern = case_when(
Complete_rate == 1.00 ~ "✅ Complete",
Complete_rate >= 0.90 ~ "🟢 Low",
Complete_rate >= 0.70 ~ "🟡 Moderate",
TRUE ~ "🔴 High"
)
) %>%
arrange(Complete_rate)
# ── Render with gt ────────────────────────────────────────────────────────────
missing_summary %>%
select(variable_cat, Variable, Missing_n, Complete_n, Complete_rate, Missing_pct, Concern) %>%
group_by(variable_cat) %>%
gt() %>%
tab_header(
title = md("**Missing Data Summary**"),
subtitle = md(glue::glue("*linelist* — {nrow(linelist)} total observations"))
) %>%
cols_label(
Variable = "Variable",
Missing_n = "Missing (n)",
Complete_n = "Complete (n)",
Complete_rate = "Complete Rate",
Missing_pct = "Missing (%)",
Concern = "Concern Level"
) %>%
fmt_number(columns = c(Missing_n, Complete_n), decimals = 0) %>%
fmt_percent(columns = c(Complete_rate,Missing_pct), decimals = 2) %>%
# fmt_percent(columns = Complete_rate, decimals = 1) %>%
# fmt_number(columns = Missing_pct, decimals = 1, suffix = "%") %>%
# Colour-code the complete rate column
data_color(
columns = Complete_rate,
method = "numeric",
palette = c("#E24B4A", "#EF9F27", "#639922"),
domain = c(0, 1)
) %>%
# # Add an inline bar using gtExtra
# gt_plt_bar_pct(
# column = Complete_rate,
# scaled = T,
# fill = "steelblue",
# # background = "#f2f2f2"
# ) %>%
cols_label(Complete_rate = "Completeness") %>%
# opt_row_striping()
# Bold rows with high missingness
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(rows = Complete_rate < 0.70)
) %>%
# Stripe rows
opt_row_striping() %>%
tab_options(
table.font.size = 13,
heading.align = "left",
column_labels.font.weight = "bold"
) %>%
tab_footnote(
footnote = "High missingness (> 70%) may affect analysis validity.",
locations = cells_column_labels(columns = Concern)
)