PEU Data Analysis

Author

Dr Samuel Blay Nguah

Importing and cleaning data

Code
df_missed_1 <- 
    data1 %>% 
    janitor::clean_names() %>% 
    rename(
        missed_vac_prev_admin = for_children_who_have_missed_some_vaccinations_did_the_child_also_miss_opportunities_for_immunization_during_previous_admissions,
        missed_vac_prev_opp = in_children_with_missed_immunizations_did_the_child_miss_opportunities_for_immunization_during_previous_admissions_or_opd_consultations,
        deworm_6_mths = deworming_within_the_past_6_months_non_applicable_for_children_under_1_year, 
        eats_iron_rich_foods = does_the_child_eat_adequate_iron_rich_foods_regularly_e_g_meat_beans_leafy_vegetables, 
        eclusive_bf = was_exclusive_breastfeeding_done_for_at_least_4_months_in_older_children, 
        missed_vac_reason = if_missed_vaccination_give_reason_for_incomplete_vaccination_status, 
        med_history = does_the_child_have_a_history_of_the_following_check_all_that_applies,
        txn_3_months = recent_blood_transfusion_past_3_months_before_admission,
        adm_clinical_features = clinical_features_at_admission_select_all_that_apply, 
        blood_loss_3_mths = recent_blood_loss_past_3_months_before_admission,
        iron_supp_3mths = iron_supplementation_within_the_past_3_months,
        bld_film_comments = blood_film_comment_check_all_that_applies,
        anemia_type = type_of_anemia_diagnosed_if_applicable, 
        bld_txn_required = was_blood_transfusion_required, 
        dura_hosp = duration_of_hospital_stay_days,
        oth_med_hx = other_medical_history_specify, 
        mal_prev_measure = malaria_prevention_measures, 
        picu_admission = was_patient_admitted_to_picu, 
        disch_diag = discharge_diagnosis_list, 
        oth_lab_findings = other_laboratory_findings) %>% 
    mutate(
        sid = row_number(),
        missed_imm = case_when(
            immunization_status == "Fully vaccinated for age" ~ "No",
            immunization_status == "Not vaccinated" ~ "Yes",
            immunization_status == "Partially vaccinated for age" ~ "Yes"),
        age_months = interval(date_of_birth, date_of_admission) %>% 
            time_length("months"), 
        age_years = interval(date_of_birth, date_of_admission) %>% 
            time_length("yearss"),
        age_less_5yrs = case_when(age_years < 5 ~ "Yes", TRUE ~ "No"), 
        year_of_birth = year(date_of_birth),
        birth_cohort = case_when(
            year_of_birth <= 2020 ~ "Pre 2020", 
            year_of_birth <= 2021 ~ "2020-2021",
            year_of_birth > 2021 ~ "After 2021") %>% 
            factor(levels = c("2020-2021", "Pre 2020", "After 2021")),
        family_income = factor(
            family_income, 
            levels = c("Low", "Middle", "High")),
        outcome = case_when(
            outcome_at_discharge == "Died" ~ "Died", TRUE ~ "Survived") %>% 
            factor(levels = c("Survived", "Died")),
        died = case_when(outcome == "Died" ~ "Yes", TRUE ~ "No"),
        male = case_when(gender == "Male" ~ "Yes", TRUE ~ "No"),
        urban = case_when(residence == "Urban" ~ "Yes", TRUE ~ "No"), 
         dura_hosp2 = scale(dura_hosp)) %>% 
    select(-c(column_43, column_42)) 

df_missed_2 <- 
    data2 %>% 
    janitor::clean_names() %>% 
    rename(
        resp_diff = respiratory_difficulty_fast_breathing) %>% 
    mutate(
        scd = case_when(
            dd1 == "SCD" | dd2 == "SCD" | dd3 == "SCD" | 
                dd4 == "SCD" | dd5 == "SCD" ~ "Yes", 
            TRUE ~ "No"), 
        anemia = case_when(
            dd1 == "Anemia" | dd2 == "Anemia" | dd3 == "Anemia" | 
                dd4 == "Anemia" | dd5 == "Anemia" ~ "Yes", 
            TRUE ~ "No"),
        malaria = case_when(
            dd1 == "Malaria" | dd2 == "Malaria" | dd3 == "Malaria" | 
                dd4 == "Malaria" | dd5 == "Malaria" ~ "Yes", 
            TRUE ~ "No"),
        pneumonia = case_when(
            dd1 == "Pneumonia" | dd2 == "Pneumonia" | dd3 == "Pneumonia" | 
                dd4 == "Pneumonia" | dd5 == "Pneumonia" ~ "Yes", 
            TRUE ~ "No"))

# Determine the most common disgnosis
# data2 %>%
#     select(starts_with("dd")) %>% 
#     pivot_longer(cols = starts_with("dd")) %>% 
#     count(value) %>% 
#     arrange(desc(n)) %>% 
#     drop_na()



df_missed <- 
    df_missed_1 %>% 
    full_join(df_missed_2)
Joining with `by = join_by(study_id)`
Code
df_missed <- 
    df_missed %>% 
    labelled::set_variable_labels(
        year_of_birth = "Year of Birth",
        missed_imm = "Missed immunization",
        age_years = "Age in Years", 
        gender = "Sex",
        age_less_5yrs = "Age < 5 Years", 
        residence = "Residence type", 
        maternal_education = "Maternal Educational Level",
        paternal_education = "Paternal Educational Level",
        family_income = "Family Income",
        outcome = "Outcome of Admission", 
        dura_hosp = "Duration of Hospitalization", 
        dura_hosp2 = "Duration of Hospitalization", 
        picu_admission = "PICU Admission",
        chronic_condition = "Chronic Medical Condition", 
        hb_g_dl = "Hemoglobin (g.dL)",
        fever = "Fever", 
        weakness = "Weakness",
        resp_diff = "Respiratory Difficulty", 
        vomiting = "Vomiting",
        swelling_of_face = "Bodily Swelling", 
        anemia = "Anemia",
        pneumonia = "Pneumonia", 
        scd = "Sickle Cell Disease",
        malaria = "Severe Malaria", 
        died = "Outcome, Died",
        male = "Sex, Male", 
        urban = "Urban Residence", 
        birth_cohort = "Birth Cohort") 

Table 1: Descriptive

Note

Note that all continuous variables have been summarized and median(IQR)

Code
table_one <- 
    df_missed %>% 
    select(
        missed_imm, age_years, age_less_5yrs, gender, birth_cohort,
        residence, maternal_education, paternal_education, 
        family_income, outcome, picu_admission, dura_hosp, 
        hb_g_dl, chronic_condition, fever, weakness, 
        resp_diff, vomiting, scd, anemia, pneumonia, malaria) %>% 
    gtsummary::tbl_summary(
        by = missed_imm, 
        digits = gtsummary::all_categorical() ~ c(0,1), 
        statistic = list(
            gtsummary::all_categorical()~"{n} ({p})")) %>% 
    gtsummary::modify_spanning_header(
        gtsummary::all_stat_cols() ~ "**Missed Immunization**") %>%
    gtsummary::add_overall(last=T) %>%
    gtsummary::bold_labels()

table_one
Characteristic
Missed Immunization
Overall
N = 3061
No
N = 2281
Yes
N = 781
Age in Years 4.5 (2.0, 9.3) 1.0 (0.6, 1.8) 3.2 (1.2, 7.6)
Age < 5 Years 123 (53.9) 70 (89.7) 193 (63.1)
Sex


    Female 101 (44.3) 34 (43.6) 135 (44.1)
    Male 127 (55.7) 44 (56.4) 171 (55.9)
Birth Cohort


    2020-2021 21 (9.2) 1 (1.3) 22 (7.2)
    Pre 2020 126 (55.3) 8 (10.3) 134 (43.8)
    After 2021 81 (35.5) 69 (88.5) 150 (49.0)
Residence type


    Rural 97 (42.5) 29 (37.2) 126 (41.2)
    Urban 131 (57.5) 49 (62.8) 180 (58.8)
Maternal Educational Level


    None 25 (11.0) 7 (9.0) 32 (10.5)
    Primary 93 (40.8) 32 (41.0) 125 (40.8)
    Secondary 73 (32.0) 27 (34.6) 100 (32.7)
    Tertiary 37 (16.2) 12 (15.4) 49 (16.0)
Paternal Educational Level


    None 30 (13.2) 14 (17.9) 44 (14.4)
    Primary 74 (32.5) 19 (24.4) 93 (30.4)
    Secondary 78 (34.2) 31 (39.7) 109 (35.6)
    Tertiary 46 (20.2) 14 (17.9) 60 (19.6)
Family Income


    Low 155 (68.0) 60 (76.9) 215 (70.3)
    Middle 64 (28.1) 16 (20.5) 80 (26.1)
    High 9 (3.9) 2 (2.6) 11 (3.6)
Outcome of Admission


    Survived 212 (93.0) 65 (83.3) 277 (90.5)
    Died 16 (7.0) 13 (16.7) 29 (9.5)
PICU Admission 8 (3.5) 5 (6.4) 13 (4.2)
Duration of Hospitalization 6 (3, 12) 6 (3, 9) 6 (3, 10)
Hemoglobin (g.dL) 9.75 (8.05, 11.60) 10.10 (8.00, 11.20) 9.85 (8.00, 11.50)
Chronic Medical Condition 113 (49.6) 47 (60.3) 160 (52.3)
Fever 136 (59.6) 41 (52.6) 177 (57.8)
Weakness 91 (39.9) 26 (33.3) 117 (38.2)
Respiratory Difficulty 70 (30.7) 36 (46.2) 106 (34.6)
Vomiting 42 (18.4) 17 (21.8) 59 (19.3)
Sickle Cell Disease 31 (13.6) 8 (10.3) 39 (12.7)
Anemia 22 (9.6) 9 (11.5) 31 (10.1)
Pneumonia 17 (7.5) 10 (12.8) 27 (8.8)
Severe Malaria 23 (10.1) 6 (7.7) 29 (9.5)
1 Median (Q1, Q3); n (%)
Code
file.remove("table_one.docx")
[1] TRUE
Code
table_one %>% 
    gtsummary::as_gt() %>% 
    gt::gtsave("table_one.docx")

Table 2A: Univariate Regression

Note

Note that multivariate regression variables are based on known predictors in the literature and predictors with p<0.2

Code
tbl_one <- 
    df_missed %>% 
    mutate(
        missed_imm = factor(missed_imm)) %>% 
    select(
        missed_imm, age_less_5yrs, male, birth_cohort,
        urban, maternal_education, paternal_education, 
        family_income, died, picu_admission, dura_hosp2, 
        hb_g_dl, chronic_condition, fever, weakness, 
        resp_diff, vomiting, scd, anemia, pneumonia, malaria) %>% 
    gtsummary::tbl_uvregression(
        method = glm,
        y = missed_imm,
        method.args = family(binomial),
        exponentiate = TRUE,
        pvalue_fun = function(x) gtsummary::style_pvalue(x, digits = 3),
        show_single_row = c(
            age_less_5yrs, chronic_condition, fever, weakness, 
            resp_diff, vomiting, scd, anemia, pneumonia, malaria, 
            picu_admission, died, male, urban), 
        hide_n = TRUE) %>% 
    gtsummary::modify_header(
        update = list(
            estimate ~ "**COR**",
            label ~ "**Variable**")) %>% 
    gtsummary::add_global_p() %>% 
    gtsummary::bold_p(t = 0.2) %>% 
    gtsummary::bold_labels()


tbl_one
Variable COR 95% CI p-value
Age < 5 Years 7.47 3.63, 17.5 <0.001
Sex, Male 1.03 0.61, 1.74 0.913
Birth Cohort

<0.001
    2020-2021
    Pre 2020 1.33 0.23, 25.4
    After 2021 17.9 3.60, 325
Urban Residence 1.25 0.74, 2.14 0.404
Maternal Educational Level

0.946
    None
    Primary 1.23 0.51, 3.32
    Secondary 1.32 0.53, 3.62
    Tertiary 1.16 0.41, 3.49
Paternal Educational Level

0.417
    None
    Primary 0.55 0.24, 1.25
    Secondary 0.85 0.40, 1.85
    Tertiary 0.65 0.27, 1.57
Family Income

0.314
    Low
    Middle 0.65 0.34, 1.18
    High 0.57 0.09, 2.31
Outcome, Died 2.65 1.19, 5.79 0.017
PICU Admission 1.88 0.55, 5.83 0.293
Duration of Hospitalization 0.70 0.49, 0.96 0.025
Hemoglobin (g.dL) 0.99 0.90, 1.09 0.819
Chronic Medical Condition 1.54 0.92, 2.62 0.101
Fever 0.75 0.45, 1.26 0.275
Weakness 0.75 0.43, 1.28 0.299
Respiratory Difficulty 1.93 1.14, 3.28 0.014
Vomiting 1.23 0.64, 2.29 0.519
Sickle Cell Disease 0.73 0.30, 1.59 0.436
Anemia 1.22 0.51, 2.70 0.637
Pneumonia 1.83 0.77, 4.12 0.164
Severe Malaria 0.74 0.27, 1.79 0.525
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Table 2B: multivariate Regression

Code
tbl_two <- 
    df_missed %>% 
    mutate(missed_imm = factor(missed_imm)) %>% 
    select(
        missed_imm, age_less_5yrs, died, dura_hosp2, resp_diff, 
        male, chronic_condition, pneumonia, maternal_education,
        paternal_education) %>%
    glm(missed_imm ~ ., data = ., family = binomial) %>% 
    gtsummary::tbl_regression(
        exponentiate = T,
        show_single_row = c(
            age_less_5yrs, resp_diff, died, male,
            chronic_condition, pneumonia),
        hide_n = TRUE, 
        pvalue_fun = function(x) gtsummary::style_pvalue(x, digits = 3)) %>% 
    gtsummary::modify_header(
        update = list(
            estimate ~ "**AOR**",
            label ~ "**Variable**")) %>%
    gtsummary::bold_p() %>% 
    gtsummary::bold_labels() 
! `broom::tidy()` failed to tidy the model.
✔ `tidy_parameters()` used instead.
ℹ Add `tidy_fun = broom.helpers::tidy_parameters` to quiet these messages.
Code
tbl_two
Variable AOR 95% CI p-value
Age < 5 Years 7.86 3.66, 19.2 <0.001
Outcome, Died 1.38 0.56, 3.33 0.479
Duration of Hospitalization 0.65 0.41, 0.96 0.043
Respiratory Difficulty 2.02 1.09, 3.76 0.025
Sex, Male 1.05 0.59, 1.91 0.860
Chronic Medical Condition 1.52 0.84, 2.78 0.169
Pneumonia 1.23 0.45, 3.20 0.676
Maternal Educational Level


    None
    Primary 1.93 0.62, 6.60 0.274
    Secondary 1.52 0.43, 5.71 0.519
    Tertiary 1.14 0.29, 4.68 0.855
Paternal Educational Level


    None
    Primary 0.30 0.10, 0.87 0.027
    Secondary 0.51 0.18, 1.44 0.200
    Tertiary 0.47 0.14, 1.56 0.218
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Sensitivity Analysis:

We analyse those who died first

Code
df_missed %>% 
    filter(died == "Yes") %>%
    mutate(
        missed_imm = factor(missed_imm)) %>% 
    select(
        missed_imm, age_years, birth_cohort) %>% 
    gtsummary::tbl_uvregression(
        method = glm,
        y = missed_imm,
        method.args = family(binomial),
        exponentiate = TRUE,
        pvalue_fun = function(x) gtsummary::style_pvalue(x, digits = 3)) %>% 
    gtsummary::modify_header(
        update = list(
            estimate ~ "**COR**",
            label ~ "**Variable**")) %>% 
    gtsummary::add_global_p() %>% 
    gtsummary::bold_p(t = 0.2) %>% 
    gtsummary::bold_labels()
Variable N COR 95% CI p-value
Age in Years 29 0.82 0.57, 1.02 0.084
Birth Cohort 29

0.124
    2020-2021

    Pre 2020
3,130,272 0.00,

    After 2021
18,781,633 0.00,

Abbreviations: CI = Confidence Interval, OR = Odds Ratio

And then analyse those who did not die

Code
df_missed %>% 
    filter(died == "No") %>% 
    mutate(
        missed_imm = factor(missed_imm)) %>% 
    select(
        missed_imm, age_years, birth_cohort) %>% 
    gtsummary::tbl_uvregression(
        method = glm,
        y = missed_imm,
        method.args = family(binomial),
        exponentiate = TRUE,
        pvalue_fun = function(x) gtsummary::style_pvalue(x, digits = 3)) %>% 
    gtsummary::modify_header(
        update = list(
            estimate ~ "**COR**",
            label ~ "**Variable**")) %>% 
    gtsummary::add_global_p() %>% 
    gtsummary::bold_p(t = 0.2) %>% 
    gtsummary::bold_labels()
Variable N COR 95% CI p-value
Age in Years 277 0.67 0.57, 0.77 <0.001
Birth Cohort 277

<0.001
    2020-2021

    Pre 2020
1.16 0.19, 22.2
    After 2021
16.1 3.20, 292
Abbreviations: CI = Confidence Interval, OR = Odds Ratio
Note

The sensitivity analysis indicated death did not make a difference

Multivariate analysis using the birth cohort as a covariate instead of age less than 5 years

Then we conduct a multivariate analysis looking for a relationship between missed immunizations and age, correcting for the birth cohort type. The effect we are seeing is co-linearity, and so cannot be used.

Code
df_missed %>% 
    mutate(missed_imm = factor(missed_imm)) %>% 
    select(
         missed_imm, birth_cohort, died, dura_hosp2, resp_diff, 
        male, chronic_condition, pneumonia, maternal_education,
        paternal_education) %>%
    glm(missed_imm ~ ., data = ., family = binomial) %>% 
    gtsummary::tbl_regression(
        exponentiate = T,
        hide_n = TRUE, 
        pvalue_fun = function(x) gtsummary::style_pvalue(x, digits = 3)) %>% 
    gtsummary::bold_p() %>% 
    gtsummary::bold_labels() 
! `broom::tidy()` failed to tidy the model.
✔ `tidy_parameters()` used instead.
ℹ Add `tidy_fun = broom.helpers::tidy_parameters` to quiet these messages.
Characteristic OR 95% CI p-value
Birth Cohort


    2020-2021
    Pre 2020 1.29 0.21, 25.1 0.817
    After 2021 21.0 3.99, 391 0.004
Outcome, Died


    No
    Yes 1.11 0.43, 2.84 0.822
Duration of Hospitalization 0.65 0.40, 0.99 0.061
Respiratory Difficulty


    No
    Yes 2.08 1.07, 4.10 0.031
Sex, Male


    No
    Yes 1.30 0.69, 2.47 0.420
Chronic Medical Condition


    No
    Yes 1.54 0.81, 2.96 0.185
Pneumonia


    No
    Yes 0.99 0.35, 2.74 0.991
Maternal Educational Level


    None
    Primary 2.05 0.60, 7.75 0.269
    Secondary 1.28 0.32, 5.31 0.729
    Tertiary 0.83 0.18, 3.81 0.806
Paternal Educational Level


    None
    Primary 0.18 0.05, 0.58 0.005
    Secondary 0.40 0.12, 1.25 0.115
    Tertiary 0.47 0.12, 1.73 0.254
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Spline analysis

Showing the non-linear relationship between age and missed immunization

Code
df_missed %>% 
    mutate(
        missed_imm = factor(missed_imm) %>% unclass()-1) %>% 
    ggplot(aes(x = age_years, y = missed_imm)) +
    geom_smooth(method = "gam", formula = y ~ splines::ns(x, 3))+
        labs(x = "Age in years", y = " Probablitly of Missed Immunization")+
    theme_bw()

Univariate analysis with generalised additive modeling (Spline)

Code
X <- 
    df_missed %>% 
    mutate(
        missed_imm = factor(missed_imm) %>% 
            unclass()-1)
gam_age <- mgcv::gam(missed_imm ~ s(age_years), family = binomial, data = X)
tbl_age <- gtsummary::tbl_regression(gam_age, exponentiate = TRUE)
gam_los <- mgcv::gam(missed_imm ~ s(dura_hosp2), family = binomial, data = X)
tbl_los <- gtsummary::tbl_regression(gam_los, exponentiate = TRUE)
tbl_all <-
    X %>% 
    select(
        missed_imm, male, birth_cohort, urban, maternal_education, 
        paternal_education, family_income, died, picu_admission,
        hb_g_dl, chronic_condition, fever, weakness, resp_diff, 
        vomiting, scd, anemia, pneumonia, malaria) %>% 
    gtsummary::tbl_uvregression(
        method = mgcv::gam,
        y = missed_imm,
        method.args = family(binomial),
        exponentiate = TRUE,
        pvalue_fun = function(x) gtsummary::style_pvalue(x, digits = 3),
        show_single_row = c(
            chronic_condition, fever, weakness, 
            resp_diff, vomiting, scd, anemia, pneumonia, malaria, 
            picu_admission, died, male, urban), 
        hide_n = TRUE)    

gtsummary::tbl_stack(
    list(tbl_age, tbl_los, tbl_all)) %>% 
    gtsummary::modify_header(
        update = list(
            estimate ~ "**COR**",
            label ~ "**Variable**")) %>% 
    gtsummary::bold_p(t = 0.2) %>% 
    gtsummary::bold_labels()
Variable COR 95% CI p-value
s(age_years)

<0.001
s(dura_hosp2)

0.2
Sex, Male 1.03 0.61, 1.73 0.913
Birth Cohort


    2020-2021
    Pre 2020 1.33 0.16, 11.2 0.791
    After 2021 17.9 2.35, 136 0.005
Urban Residence 1.25 0.74, 2.12 0.406
Maternal Educational Level


    None
    Primary 1.23 0.49, 3.11 0.664
    Secondary 1.32 0.51, 3.41 0.565
    Tertiary 1.16 0.40, 3.35 0.786
Paternal Educational Level


    None
    Primary 0.55 0.24, 1.24 0.148
    Secondary 0.85 0.40, 1.82 0.678
    Tertiary 0.65 0.27, 1.56 0.337
Family Income


    Low
    Middle 0.65 0.35, 1.20 0.169
    High 0.57 0.12, 2.73 0.486
Outcome, Died 2.65 1.21, 5.80 0.015
PICU Admission 1.88 0.60, 5.94 0.280
Hemoglobin (g.dL) 0.99 0.90, 1.09 0.819
Chronic Medical Condition 1.54 0.91, 2.60 0.104
Fever 0.75 0.45, 1.26 0.275
Weakness 0.75 0.44, 1.29 0.303
Respiratory Difficulty 1.93 1.14, 3.28 0.014
Vomiting 1.23 0.66, 2.32 0.515
Sickle Cell Disease 0.73 0.32, 1.66 0.447
Anemia 1.22 0.54, 2.78 0.634
Pneumonia 1.83 0.80, 4.18 0.154
Severe Malaria 0.74 0.29, 1.90 0.534
Abbreviations: CI = Confidence Interval, OR = Odds Ratio

Multivariate analysis using age and duration of hsopitalization as a spline (3rd degree polynomial)

Code
mgcv::gam(
    missed_imm ~ s(age_years) + s(dura_hosp2) + paternal_education + 
        family_income + died + chronic_condition + resp_diff + 
        pneumonia,
        family = binomial(link = "logit"), 
        data = X) %>% 
    gtsummary::tbl_regression(
        exponentiate = TRUE,
        hide_n = FALSE, 
        pvalue_fun = function(x) gtsummary::style_pvalue(x, digits = 3)) %>% 
    gtsummary::modify_header(
        update = list(
            estimate ~ "**AOR**",
            label ~ "**Variable**")) %>%
    gtsummary::bold_p() %>% 
    gtsummary::bold_labels() 
Variable AOR 95% CI p-value
Paternal Educational Level


    None
    Primary 0.31 0.11, 0.89 0.030
    Secondary 0.60 0.21, 1.66 0.322
    Tertiary 0.62 0.16, 2.38 0.482
Family Income


    Low
    Middle 0.53 0.20, 1.37 0.190
    High 0.74 0.10, 5.55 0.768
Outcome, Died


    No
    Yes 1.09 0.42, 2.84 0.866
Chronic Medical Condition


    No
    Yes 1.59 0.82, 3.09 0.169
Respiratory Difficulty


    No
    Yes 1.97 0.99, 3.92 0.052
Pneumonia


    No
    Yes 0.73 0.25, 2.14 0.567
s(age_years)

<0.001
s(dura_hosp2)

0.154
Abbreviations: CI = Confidence Interval, OR = Odds Ratio