library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(dplyr)
library (lubridate)
library (ggplot2)
library (rmarkdown)
library(openxlsx)
library(writexl)
library(rstatix)
## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(eeptools)
## Registered S3 method overwritten by 'lme4':
##   method           from
##   na.action.merMod car
library(ggpubr)
library(readxl)
library(BlandAltmanLeh)
library(openxlsx)
PSFI_df_malnutrition <- read_xlsx("PSFI_final_malnutrition.xlsx")

Create malnutrition categories and rules

PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
  filter(case_control == 1)
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%

  mutate(
    wfhz_outlier = wfhz < -5 | wfhz > 5,
    baz_outlier  = baz  < -5 | baz  > 5,
    wfhz_clean = ifelse(wfhz_outlier, NA, wfhz),
    baz_clean  = ifelse(baz_outlier, NA, baz)
  )
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
  mutate(
    use_wfh  = age_years < 5,
    use_muac = age_months >= 3 & age_years < 5,
    use_baz  = age_years >= 2,

    has_valid_measure =
      (use_wfh  & !is.na(wfhz)) |
      (use_muac & !is.na(muac)) |
      (use_baz  & !is.na(baz)),

    mam_muac = use_muac & !is.na(muac) & muac >= 11.5 & muac < 12.5,
    sam_muac = use_muac & !is.na(muac) & muac < 11.5,

    mam_wfh = use_wfh & !is.na(wfhz) & wfhz >= -3 & wfhz < -2,
    sam_wfh = use_wfh & (
      (!is.na(wfhz) & wfhz < -3) |
      (!is.na(ht) & ht < 45)
    ),

    overweight_wfh = use_wfh & !is.na(wfhz) & wfhz > 2 & wfhz <= 3,
    obesity_wfh    = use_wfh & !is.na(wfhz) & wfhz > 3,

    mam_baz = use_baz & !is.na(baz) & baz >= -3 & baz < -2,
    sam_baz = use_baz & !is.na(baz) & baz < -3,

    overweight_baz_2_5 = use_baz & age_years <= 5 & !is.na(baz) & baz > 2 & baz <= 3,
    obesity_baz_2_5    = use_baz & age_years <= 5 & !is.na(baz) & baz > 3,

    overweight_baz_gt5 = use_baz & age_years > 5 & !is.na(baz) & baz > 1 & baz <= 2,
    obesity_baz_gt5    = use_baz & age_years > 5 & !is.na(baz) & baz > 2,

    overweight_baz = overweight_baz_2_5 | overweight_baz_gt5,
    obesity_baz    = obesity_baz_2_5 | obesity_baz_gt5,

    mam = mam_wfh | mam_baz | mam_muac,
    sam = sam_wfh | sam_baz | sam_muac,

    overweight = overweight_wfh | overweight_baz,
    obesity    = obesity_wfh | obesity_baz,

    malnutrition_who5 = case_when(
      sam ~ "SAM",
      mam ~ "MAM",
      obesity ~ "Obesity",
      overweight ~ "Overweight",
      has_valid_measure ~ "Normal",
      TRUE ~ NA_character_
    )
  )
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
  mutate(

    measure_used = case_when(

      sam_muac ~ "MUAC",
      sam_wfh  ~ "WFH",
      sam_baz  ~ "BMI-for-age",

      mam_muac ~ "MUAC",
      mam_wfh  ~ "WFH",
      mam_baz  ~ "BMI-for-age",

      obesity_wfh | overweight_wfh ~ "WFH",
      obesity_baz | overweight_baz ~ "BMI-for-age",

      use_wfh & !is.na(wfhz) ~ "WFH",
      use_baz & !is.na(baz) ~ "BMI-for-age",
      use_muac & !is.na(muac) ~ "MUAC",

      has_valid_measure  ~ "Normal",
      TRUE ~ NA_character_
    )
  )
measure_used_table <- PSFI_df_malnutrition %>%
  count(measure_used) %>%
  mutate(
    percent = round(
      100 * n / sum(n),
      1
    )
  )

measure_used_table
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
  filter(
    case_control == 1
  )

overview_table_long <- tibble(
  category = c(
    "SAM",
    "MAM",
    "Overweight",
    "Obesity",
    "Concurrent SAM + Overweight",
    "Concurrent MAM + Overweight",
    "Normal",
    "Total"
  ),

  n = c(
    sum(PSFI_df_malnutrition$sam, na.rm = TRUE),

    sum(
      PSFI_df_malnutrition$mam &
      !PSFI_df_malnutrition$sam,
      na.rm = TRUE
    ),

    sum(
      PSFI_df_malnutrition$overweight &
      !PSFI_df_malnutrition$obesity &
      !PSFI_df_malnutrition$sam &
      !PSFI_df_malnutrition$mam,
      na.rm = TRUE
    ),

    sum(
      PSFI_df_malnutrition$obesity &
      !PSFI_df_malnutrition$sam &
      !PSFI_df_malnutrition$mam,
      na.rm = TRUE
    ),

    sum(
      PSFI_df_malnutrition$sam &
      PSFI_df_malnutrition$overweight,
      na.rm = TRUE
    ),

    sum(
      PSFI_df_malnutrition$mam &
      PSFI_df_malnutrition$overweight &
      !PSFI_df_malnutrition$sam,
      na.rm = TRUE
    ),

    sum(
      !PSFI_df_malnutrition$sam &
      !PSFI_df_malnutrition$mam &
      !PSFI_df_malnutrition$overweight &
      !PSFI_df_malnutrition$obesity,
      na.rm = TRUE
    ),

    nrow(PSFI_df_malnutrition)
  )
) %>%
  mutate(
    percent = round(100 * n / nrow(PSFI_df_malnutrition), 1)
  )

overview_table_long
measure_table <- tibble(

  measure = c(
    "WFH used",
    "MUAC used",
    "BMI-for-age used",
    "WFH + MUAC overlap",
    "WFH + BMI overlap",
    "MUAC + BMI overlap",
    "All three measures"
  ),

  n = c(

    sum(
      PSFI_df_malnutrition$use_wfh &
      !is.na(PSFI_df_malnutrition$wfhz),
      na.rm = TRUE
    ),

    sum(
      PSFI_df_malnutrition$use_muac &
      !is.na(PSFI_df_malnutrition$muac),
      na.rm = TRUE
    ),

    sum(
      PSFI_df_malnutrition$use_baz &
      !is.na(PSFI_df_malnutrition$baz),
      na.rm = TRUE
    ),

    sum(
      PSFI_df_malnutrition$use_wfh &
      !is.na(PSFI_df_malnutrition$wfhz) &
      PSFI_df_malnutrition$use_muac &
      !is.na(PSFI_df_malnutrition$muac),
      na.rm = TRUE
    ),

    sum(
      PSFI_df_malnutrition$use_wfh &
      !is.na(PSFI_df_malnutrition$wfhz) &
      PSFI_df_malnutrition$use_baz &
      !is.na(PSFI_df_malnutrition$baz),
      na.rm = TRUE
    ),

    sum(
      PSFI_df_malnutrition$use_muac &
      !is.na(PSFI_df_malnutrition$muac) &
      PSFI_df_malnutrition$use_baz &
      !is.na(PSFI_df_malnutrition$baz),
      na.rm = TRUE
    ),

    sum(
      PSFI_df_malnutrition$use_wfh &
      !is.na(PSFI_df_malnutrition$wfhz) &
      PSFI_df_malnutrition$use_muac &
      !is.na(PSFI_df_malnutrition$muac) &
      PSFI_df_malnutrition$use_baz &
      !is.na(PSFI_df_malnutrition$baz),
      na.rm = TRUE
    )
  )
) %>%

  mutate(
    percent = round(
      100 * n / nrow(PSFI_df_malnutrition),
      1
    )
  )

measure_table

WHO5

PSFI_df_malnutrition <- PSFI_df_malnutrition

bar_x <- PSFI_df_malnutrition %>%
  mutate(
    malnutrition_who5 = factor(
      malnutrition_who5,
      levels = c(
        "SAM",
        "MAM",
        "Normal",
        "Overweight",
        "Obesity"
      )
    )
  ) %>%
  count(malnutrition_who5, .drop = FALSE)

print(bar_x)
## # A tibble: 6 × 2
##   malnutrition_who5     n
##   <fct>             <int>
## 1 SAM                 154
## 2 MAM                 107
## 3 Normal              378
## 4 Overweight           55
## 5 Obesity              60
## 6 <NA>                  1
barplot(
  bar_x$n,
  names.arg = bar_x$malnutrition_who5,
  col = rgb(1, 0.8, 0.8, 1),
  border = "white",
  ylab = "Count",
  las = 2,
  cex.names = 0.8
)

malnutrition_who5_table <- PSFI_df_malnutrition %>%
  mutate(
    malnutrition_who5 = factor(
      malnutrition_who5,
      levels = c(
        "SAM",
        "MAM",
        "Normal",
        "Overweight",
        "Obesity"
      )
    )
  ) %>%
  count(malnutrition_who5, .drop = FALSE) %>%
  mutate(
    percent = round(100 * n / sum(n), 1)
  )

malnutrition_who5_table
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
  mutate(

    # WHO5-style hierarchical category
    # 0 = Normal
    # 1 = MAM
    # 2 = SAM
    # 3 = Overweight
    # 4 = Obesity

    malnutrition_who5 = case_when(

      sam ~ 2L,

      mam ~ 1L,

      obesity ~ 4L,

      overweight ~ 3L,
      
      has_valid_measure ~ 0L,
      TRUE ~ NA_integer_
    ),

    # Optional labels
    malnutrition_who5_label = factor(
      case_when(
        malnutrition_who5== 0 ~ "Normal",
        malnutrition_who5 == 1 ~ "MAM",
        malnutrition_who5 == 2 ~ "SAM",
        malnutrition_who5 == 3 ~ "Overweight",
        malnutrition_who5 == 4 ~ "Obesity"
      ),
      levels = c(
        "SAM",
        "MAM",
        "Normal",
        "Overweight",
        "Obesity"
      )
    )
  )

WHO3

PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
  mutate(

    malnutrition_who3 = case_when(
      is.na(malnutrition_who5) ~ NA_integer_,
      malnutrition_who5 == 1 ~ 1L,                 # Malnutrition
      malnutrition_who5 == 2 ~ 1L,                 # Malnutrition
      malnutrition_who5 == 3 ~ 2L,          # Overweight
      malnutrition_who5 == 4 ~ 2L,             # Overweight
      TRUE ~ 0L                 # Normal
    ),

    malnutrition_who3_label = factor(
      case_when(
        malnutrition_who3 == 0 ~ "Normal",
        malnutrition_who3 == 1 ~ "Malnutrition",
        malnutrition_who3 == 2 ~ "Overweight"
      ),
      levels = c(
        "Malnutrition",
        "Normal",
        "Overweight"
      )
    )
  )
PSFI_df_malnutrition <- PSFI_df_malnutrition


bar_x <- PSFI_df_malnutrition %>%
  count(malnutrition_who3_label, .drop = FALSE)

print(bar_x)
## # A tibble: 4 × 2
##   malnutrition_who3_label     n
##   <fct>                   <int>
## 1 Malnutrition              261
## 2 Normal                    378
## 3 Overweight                115
## 4 <NA>                        1
barplot(
  bar_x$n,
  names.arg = bar_x$malnutrition_who3_label,
  col = rgb(1, 0.8, 0.8, 1),
  border = "white",
  ylab = "Count",
  las = 2,
  cex.names = 0.9
)

# =========================================
# WHO3 TABLE
# =========================================

malnutrition_cat_table_who3 <- PSFI_df_malnutrition %>%
  mutate(
    malnutrition_who3_label = factor(
      malnutrition_who3_label,
      levels = c(
        "Malnutrition",
        "Normal",
        "Overweight"
      )
    )
  ) %>%
  count(malnutrition_who3_label, .drop = FALSE) %>%
  mutate(
    percent = round(
      100 * n / sum(n),
      1
    )
  )

malnutrition_cat_table_who3
PSFI_df_malnutrition %>%
  summarise(
    missing_who5 = sum(is.na(malnutrition_who5)),
    missing_who3 = sum(is.na(malnutrition_who3))
  )
write.xlsx(PSFI_df_malnutrition, file = "PSFI_final_malnutrition2.xlsx")

Post analysis

discordant_cases <- PSFI_df_malnutrition %>%
  filter(

    # malnut_mod_sev = 1 but WHO5 says Normal/Overweight/Obesity
    (malnut_mod_sev == 1 &
       malnutrition_who5 %in% c(0, 3, 4)) |

    # malnut_mod_sev = 0 but WHO5 says MAM/SAM
    (malnut_mod_sev == 0 &
       malnutrition_who5 %in% c(1, 2))
  ) %>%

  select(
    record_id,
    malnut_mod_sev,
    malnutrition_who3_label, 
    malnutrition_who5_label,
    measure_used,
    age_months,
    muac,
    baz,
    bmiz,
    wfhz, 
    whz,
    sam, 
    ht, 
    wt, 
    ss_skin___5, 

  )

discordant_cases
discordant_summary <- discordant_cases %>%
  count(
    malnut_mod_sev,
    malnutrition_who5_label
  )

discordant_summary
malnutrition_categories_table <- PSFI_df_malnutrition %>%
  select(
    subject_id,
    record_id,
    malnut_mod_sev,
    malnutrition_who3,
    malnutrition_who5, 
    malnutrition_who3_label, 
    malnutrition_who5_label
  )

malnutrition_categories_table
write.xlsx(
  list(
    malnutrition_categories_table = malnutrition_categories_table,
    discordant_cases = discordant_cases
  ),
  file = "malnutrition_categories_v2.xlsx",
  overwrite = TRUE
)