library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.3     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(dplyr)
library (lubridate)
library (ggplot2)
library (rmarkdown)
library(openxlsx)
library(writexl)
library(rstatix)
## 
## Attaching package: 'rstatix'
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(eeptools)
## Registered S3 method overwritten by 'lme4':
##   method           from
##   na.action.merMod car
library(ggpubr)
library(readxl)
library(BlandAltmanLeh)
library(openxlsx)
PSFI_df_malnutrition <- read_xlsx("PSFI_final_malnutrition.xlsx")

Create malnutrition z score

# ASSIGNS Z-SCORE TO MUAC BASED ON THE 11.5 & 12.5CM LIMITS, THIS IS A PROXY AS LATER A Z-SCORE OF 0 WILL EQUAL NO MALNUTRITION, <-2 WILL EQUAL MODERATE MALNUTRITION AND <-3 WILL EQUAL SEVERE MALNUTRITION
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
  mutate(
    muacz1 = case_when(
      muac >= 12.5 ~ 0,
      muac >= 11.5 & muac < 12.5 ~ -2.5,
      muac < 11.5 ~ -4,
      TRUE ~ NA_real_
    )
  )
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
  mutate(
    zscore_unified = case_when(
      age_group == 0 ~ wfhz,
      
      age_group == 1 & age_years < 2 ~ ifelse(
        is.na(wfhz) & is.na(muacz1), NA_real_,
        ifelse(
          is.na(muacz1) | (!is.na(wfhz) & abs(wfhz) >= abs(muacz1)),
          wfhz,
          muacz1
        )
      ),
      
      age_group == 1 & age_years >= 2 ~ ifelse(
        is.na(wfhz) & is.na(muacz1) & is.na(baz), NA_real_,
        ifelse(
          is.na(muacz1) & is.na(baz), wfhz,
          ifelse(
            is.na(baz) | (!is.na(wfhz) & abs(wfhz) >= abs(muacz1) & abs(wfhz) >= abs(baz)),
            wfhz,
            ifelse(
              is.na(baz) | (!is.na(muacz1) & abs(muacz1) >= abs(baz)),
              muacz1,
              baz
            )
          )
        )
      ),
      
      age_group == 2 ~ baz,
      TRUE ~ NA_real_
    )
  )
#Creates a filter for extreme z-scores
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
  mutate(
    who_outlier = zscore_unified < -5 | zscore_unified > 5
  )
subset_zscore <- PSFI_df_malnutrition %>%
  filter(case_control == 1 & who_outlier == FALSE) %>%
  pull(zscore_unified)

# SUMMARY OF Z SCORE UNIFIED (CASES, NO OUTLIERS)
summary(subset_zscore)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -5.0000 -2.2200 -0.5450 -0.5281  1.0750  4.9200
# NUMBER OF CASES IN Z SCORE UNIFIED ()
length(subset_zscore)
## [1] 688
# Creates an extra category that outlines which anthropometric measure was used to display the z-score
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
  mutate(
    malnutrition_source = case_when(
      age_group == 0 ~ "WFH",
      
      age_group == 1 & age_years < 2 ~ ifelse(
        is.na(wfhz) & is.na(muacz1), NA_character_,
        ifelse(
          is.na(muacz1) | (!is.na(wfhz) & abs(wfhz) >= abs(muacz1)),
          "WFH",
          "MUAC"
        )
      ),
      
      age_group == 1 & age_years >= 2 ~ ifelse(
        is.na(wfhz) & is.na(muacz1) & is.na(baz), NA_character_,
        ifelse(
          is.na(muacz1) & is.na(baz), "WFH",
          ifelse(
            is.na(baz) | (!is.na(wfhz) & abs(wfhz) >= abs(muacz1) & abs(wfhz) >= abs(baz)),
            "WFH",
            ifelse(
              is.na(baz) | (!is.na(muacz1) & abs(muacz1) >= abs(baz)),
              "MUAC",
              "BFA"
            )
          )
        )
      ),
      
      age_group == 2 ~ "BFA",
      TRUE ~ NA_character_
    )
  )
# SUMMARY OF MALNUTRITION SOURCE

malnutrition_source_summary <- PSFI_df_malnutrition %>%
  filter(case_control == 1, who_outlier == FALSE) %>%
  count(malnutrition_source) %>%
  mutate(
    percent = round(100 * n / sum(n), 1)
  )

malnutrition_source_summary
# CREATES MALNUTRITION CATEGORY BASED ON UNIFIED Z SCORES
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
  mutate(
    malnutrition_full = case_when(
  is.na(zscore_unified) ~ NA_integer_,
  zscore_unified < -3 ~ 3L, 
  zscore_unified >= -3 & zscore_unified < -2 ~ 2L,
  zscore_unified >= -2 & zscore_unified < -1  ~ 1L, 
  zscore_unified >= -1 & zscore_unified <= 1  ~ 0L, 
  zscore_unified > 1 & zscore_unified <=2  ~ 4L, 
  zscore_unified > 2 & zscore_unified <=3  ~ 5L, 
  zscore_unified > 3  ~ 6L 
    )
  )

WHO_5

PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
  mutate(
    malnutrition_who5 = case_when(
      
      is.na(malnutrition_full) ~ NA_integer_,
      
      malnutrition_source == "BFA" &
        age_years < 5 &
        malnutrition_full == 4 ~ 3L, # overweight
      
      malnutrition_source == "BFA" &
        age_years < 5 &
        malnutrition_full == 5 ~ 4L, # obesity
      
-      malnutrition_full == 0 ~ 0L, # no malnutrition
      malnutrition_full == 1 ~ 0L, # no malnutrition
      malnutrition_full == 2 ~ 1L, # moderate malnutrition
      malnutrition_full == 3 ~ 2L, # severe malnutrition
      malnutrition_full == 4 ~ 0L, # no malnutrition
      malnutrition_full == 5 ~ 3L, # overweight
      malnutrition_full == 6 ~ 4L, # obesity
      
      TRUE ~ NA_integer_
    )
  )
PSFI_cases <- PSFI_df_malnutrition %>%
  filter(case_control == 1, who_outlier == FALSE)

x <- PSFI_cases %>%
  pull(zscore_unified)

xmin <- -5
xmax <- 5

bar_x <- PSFI_cases %>%
  mutate(
    malnutrition_who5_label = factor(
      case_when(
        malnutrition_who5 == 0 ~ "No malnutrition",
        malnutrition_who5 == 1 ~ "Moderate malnutrition",
        malnutrition_who5 == 2 ~ "Severe malnutrition",
        malnutrition_who5 == 3 ~ "Overweight",
        malnutrition_who5 == 4 ~ "Obesity",
        TRUE ~ NA_character_
      ),
      levels = c(
        "Severe malnutrition",
        "Moderate malnutrition",
        "No malnutrition",
        "Overweight",
        "Obesity"
      )
    )
  ) %>%
  count(malnutrition_who5_label, .drop = FALSE)

print(bar_x)
## # A tibble: 5 × 2
##   malnutrition_who5_label     n
##   <fct>                   <int>
## 1 Severe malnutrition        97
## 2 Moderate malnutrition      96
## 3 No malnutrition           396
## 4 Overweight                 59
## 5 Obesity                    40
layout(
  mat = matrix(c(1, 2), 2, 1, byrow = TRUE),
  heights = c(1, 8)
)

par(mar = c(0, 3.1, 1.1, 2.1))
boxplot(
  x,
  horizontal = TRUE,
  xaxt = "n",
  col = rgb(0.8, 0.8, 0, 0.5),
  frame = FALSE,
  xlim = c(xmin, xmax)
)

par(mar = c(8, 3.1, 1.1, 2.1))
barplot(
  bar_x$n,
  names.arg = bar_x$malnutrition_who5_label,
  col = rgb(1, 0.8, 0.8, 1),
  border = "white",
  main = "",
  ylab = "Count",
  las = 2,
  cex.names = 0.8
)

PSFI_cases <- PSFI_df_malnutrition %>%
  filter(case_control == 1, who_outlier == FALSE) %>%
  mutate(
    malnutrition_who5_label = case_when(
      malnutrition_who5 == 0 ~ "No malnutrition",
      malnutrition_who5 == 1 ~ "Moderate malnutrition",
      malnutrition_who5 == 2 ~ "Severe malnutrition",
      malnutrition_who5 == 3 ~ "Overweight",
      malnutrition_who5 == 4 ~ "Obesity",
      TRUE ~ NA_character_
    )
  )

malnutrition_cat_table_who5 <- PSFI_cases %>%
  group_by(malnutrition_who5_label) %>%
  summarise(
    n = n(),
    percent = round(100 * n() / nrow(PSFI_cases), 1),
    .groups = "drop"
  ) %>%
  arrange(
    factor(
      malnutrition_who5_label,
      levels = c(
        "Severe malnutrition",
        "Moderate malnutrition",
        "No malnutrition",
        "Overweight",
        "Obesity"
      )
    )
  )

malnutrition_cat_table_who5

WHO_3

PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
  mutate(
    malnutrition_who3 = case_when(
      
      is.na(malnutrition_full) ~ NA_integer_,
      
      # BMI-specific WHO 0-5y overweight/obesity rules
      malnutrition_source == "BFA" &
        age_years < 5 &
        malnutrition_full == 4 ~ 2L, # overweight
      
      # Standard recoding
      malnutrition_full == 0 ~ 0L, # no malnutrition
      malnutrition_full == 1 ~ 0L, # no malnutrition
      malnutrition_full == 2 ~ 1L, # malnutrition
      malnutrition_full == 3 ~ 1L, # malnutrition
      malnutrition_full == 4 ~ 0L, # no malnutrition
      malnutrition_full == 5 ~ 2L, # overweight
      malnutrition_full == 6 ~ 2L, # overweight
      
      TRUE ~ NA_integer_
    )
  )
# Creates a histogram-like bar plot of malnutrition categories based on malnutrition_who3

PSFI_cases <- PSFI_df_malnutrition %>%
  filter(case_control == 1, who_outlier == FALSE)

x <- PSFI_cases %>%
  pull(zscore_unified)

xmin <- -5
xmax <- 5

cat_counts <- PSFI_cases %>%
  mutate(
    malnutrition_who3_label = case_when(
      malnutrition_who3 == 0 ~ "No malnutrition",
      malnutrition_who3 == 1 ~ "Malnutrition",
      malnutrition_who3 == 2 ~ "Overweight",
      TRUE ~ NA_character_
    )
  ) %>%
  count(malnutrition_who3_label)

print(cat_counts)
## # A tibble: 3 × 2
##   malnutrition_who3_label     n
##   <chr>                   <int>
## 1 Malnutrition              193
## 2 No malnutrition           396
## 3 Overweight                 99
layout(
  mat = matrix(c(1, 2), 2, 1, byrow = TRUE),
  heights = c(1, 8)
)

par(mar = c(0, 3.1, 1.1, 2.1))

boxplot(
  x,
  horizontal = TRUE,
  xaxt = "n",
  col = rgb(0.8, 0.8, 0, 0.5),
  frame = FALSE,
  xlim = c(xmin, xmax)
)

par(mar = c(6, 3.1, 1.1, 2.1))

bar_x <- PSFI_cases %>%
  mutate(
    malnutrition_who3_label = factor(
      case_when(
        malnutrition_who3 == 0 ~ "No malnutrition",
        malnutrition_who3 == 1 ~ "Malnutrition",
        malnutrition_who3 == 2 ~ "Overweight",
        TRUE ~ NA_character_
      ),
      levels = c("Malnutrition", "No malnutrition", "Overweight")
    )
  ) %>%
  filter(!is.na(malnutrition_who3_label)) %>%
  count(malnutrition_who3_label, .drop = FALSE)

barplot(
  bar_x$n,
  names.arg = bar_x$malnutrition_who3_label,
  col = rgb(1, 0.8, 0.8, 1),
  border = "white",
  main = "",
  xlab = "",
  ylab = "Count",
  las = 2,
  cex.names = 0.9
)

PSFI_cases <- PSFI_df_malnutrition %>%
  filter(case_control == 1, who_outlier == FALSE) %>%
  mutate(
    malnutrition_who3_label = case_when(
      malnutrition_who3 == 0 ~ "No malnutrition",
      malnutrition_who3 == 1 ~ "Malnutrition",
      malnutrition_who3 == 2 ~ "Overweight",
      TRUE ~ NA_character_
    )
  )

malnutrition_cat_table_who3 <- PSFI_cases %>%
  group_by(malnutrition_who3_label) %>%
  summarise(
    n = n(),
    percent = round(100 * n() / nrow(PSFI_cases), 1),
    .groups = "drop"
  ) %>%
  arrange(
    factor(
      malnutrition_who3_label,
      levels = c(
        "Malnutrition",
        "No malnutrition",
        "Overweight"
        )
    )
  )

malnutrition_cat_table_who3
write.xlsx(
  list(
    WHO_5 = malnutrition_cat_table_who5,
    WHO_3 = malnutrition_cat_table_who3
    ),
  file = "malnutrition_categories_original.xlsx",
  overwrite = TRUE
)
who_export <- PSFI_df_malnutrition %>%
  select(
    subject_id,
    record_id,
    malnutrition_who5,
    malnutrition_who3
  ) %>%
  rename(
    WHO_5_category = malnutrition_who5,
    WHO_3_category = malnutrition_who3
  )

write.csv(
  who_export,
  file = "WHO_malnutrition_categories.csv",
  row.names = FALSE
)

write.xlsx(
  who_export,
  file = "WHO_malnutrition_categories.xlsx",
  overwrite = TRUE
)
## Warning in file.create(to[okay]): cannot create file
## 'WHO_malnutrition_categories.xlsx', reason 'Permission denied'
PSFI_df_malnutrition %>%
  summarise(
    missing_who5 = sum(is.na(malnutrition_who5)),
    missing_who3 = sum(is.na(malnutrition_who3))
  )
view(PSFI_df_malnutrition)