1. Defining malnutrition
1.1. Prerequisites
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.3 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(zscorer)
library(readr)
library(dplyr)
library (lubridate)
library (ggplot2)
library (rmarkdown)
library(openxlsx)
library(writexl)
library(rstatix)
##
## Attaching package: 'rstatix'
##
## The following object is masked from 'package:stats':
##
## filter
library(eeptools)
## Registered S3 method overwritten by 'lme4':
## method from
## na.action.merMod car
library(ggpubr)
PSFI_df_malnutrition <- read_csv("Ben_cut_4_27.csv")
## Rows: 858 Columns: 959
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (241): poct_emd_initials, poct_hiv_result1, poct_hiv_result2, poct_hiv_r...
## dbl (717): record_id, hiv_pos, poct_malaria_pos, poct_istat___1, poct_istat_...
## lgl (1): lab_hco3
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# CREATE A NEW CATEGORY THAT CALCULATES AGE IN DAYS BASED ON DATE OF BIRTH AND DATE OF PRESENTATION
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(
date_present = as.POSIXct(date_present, format = "%d%b%Y:%H:%M:%S", tz = "UTC"),
dob = mdy(dob),
age_days_exact = as.numeric(difftime(date_present, dob, units = "days"))
)
summary(PSFI_df_malnutrition$age_days_exact)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 32.03 289.19 604.61 1124.44 1461.64 5452.40
1.2. Anthroprometric analysis
# CREATE NEW CATEGORY THAT CALCULATES AGE IN YEARS BASED ON AGE IN DAYS
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(age_years = age_days_exact / 365.25)
summary(PSFI_df_malnutrition$age_years)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.08768 0.79176 1.65535 3.07854 4.00176 14.92786
# CREATES A SUMMARY OF CASES (ht=height, wt=weight, age_years= age in years, muac)
PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
select(ht, wt, age_years, muac) %>%
summary()
## ht wt age_years muac
## Min. : 20.00 Min. : 3.20 Min. : 0.08768 Min. : 9.00
## 1st Qu.: 67.00 1st Qu.: 7.00 1st Qu.: 0.73436 1st Qu.:14.00
## Median : 78.00 Median : 9.50 Median : 1.40141 Median :15.00
## Mean : 85.23 Mean :12.17 Mean : 2.83539 Mean :15.38
## 3rd Qu.: 98.00 3rd Qu.:14.00 3rd Qu.: 3.49267 3rd Qu.:17.00
## Max. :192.00 Max. :72.00 Max. :14.92786 Max. :27.00
## NAs :1 NAs :2
# CREATES A HISTOGRAM AND BOXPLOT OF HEIGHT (CASES)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$ht[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(0,200), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$ht[PSFI_df_malnutrition$case_control == 1]
, breaks=40 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Height (cm)", xlim=c(0,200))

# CREATES A HISTOGRAM AND BOXPLOT OF WEIGHT (CASES)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$wt[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(0,75), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$wt[PSFI_df_malnutrition$case_control == 1]
, breaks=40 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Weight (kg)", xlim=c(0,75))

# CREATES A HISTOGRAM AND BOXPLOT OF MUAC (CASES)
## BLUE LINE OUTLINES MODERATE MALNUTRITION (12.5cm)
## RED LINE OUTLINES SEVERE MALNUTRITION (11.5cm)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$muac[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(5,30), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$muac[PSFI_df_malnutrition$case_control == 1]
, breaks=20 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Mid-upper arm circumference (cm)", xlim=c(5,30))
abline(v = 11.5, col = "red", lwd = 2, lty = 2) # severe
abline(v = 12.5, col = "blue", lwd = 2, lty = 2) # moderate

# HISTOGRAM AND BOXPLOT OF AGE (CASES)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$age_years[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(0,15), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$age_years[PSFI_df_malnutrition$case_control == 1]
, breaks=40 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Age (years)", xlim=c(0,15))

1.3. Z-scorer
# CREATES A NEW CATEGORY DEFINING SEX AS 1/2, INSTEAD OF 0/1 (NECESSARY FOR ZSCORER PACKAGE)
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(sex_who = if_else(sex == 1, 1, 2))
summary(PSFI_df_malnutrition$sex_who)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 1.000 1.427 2.000 2.000
# CREATES A NEW CATEGORY CALCULATING AGE IN MONTHS BASED ON AGE IN DAYS
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(age_months = age_days_exact / 30.4375)
summary(PSFI_df_malnutrition$age_months)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.052 9.501 19.864 36.942 48.021 179.134
# CREATES A NEW CATEGORY (AGE<6M = 0, AGE 6M - 5J = 1, AGE > 5J = 2)
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(
age_group = case_when(
age_months < 6 ~ 0L,
age_months >= 6 & age_years < 5 ~ 1L,
age_years >= 5 ~ 2L,
TRUE ~ NA_integer_
)
)
# ZSCORER PACKAGE CALCULATES ZSCORE OF WEIGHT FOR LENGTH (wflz), WEIGHT FOR AGE (wfaz), HEIGHT FOR AGE (hfaz), WEIGHT FOR HEIGHT (wfhz), BMI FOR AGE (baz)
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(
wflz = addWGSR(
data = .,
sex = "sex_who",
firstPart = "wt",
secondPart = "ht",
index = "wfl"
)$wflz,
wfaz = addWGSR(
data = .,
sex = "sex_who",
firstPart = "wt",
secondPart = "age_days_exact",
index = "wfa"
)$wfaz,
hfaz = addWGSR(
data = .,
sex = "sex_who",
firstPart = "ht",
secondPart = "age_days_exact",
index = "hfa"
)$hfaz,
wfhz = addWGSR(
data = .,
sex = "sex_who",
firstPart = "wt",
secondPart = "ht",
index = "wfh"
)$wfhz,
baz = addWGSR(
data = .,
sex = "sex_who",
firstPart = "wt",
secondPart = "ht",
thirdPart = "age_days_exact",
index = "bfa"
)$bfaz
)
## ================================================================================
## ================================================================================
## ================================================================================
## ================================================================================
## ================================================================================
# ASSIGNS Z-SCORE TO MUAC BASED ON THE 11.5 & 12.5CM LIMITS, THIS IS A PROXY SINCE LATER A Z-SCORE OF 0 WILL EQUAL NO MALNUTRITION, -2.5 WILL EQUAL MODERATE MALNUTRITION AND -4 WILL EQUAL SEVERE MALNUTRITION
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(
muacz = case_when(
muac >= 12.5 ~ 0,
muac >= 11.5 & muac < 12.5 ~ -2.5,
muac < 11.5 ~ -4,
TRUE ~ NA_real_
)
)
# CREATE MALNUTRITION ZSCORE BASED ON WFHL, BAZ, MUACZ (to be defined later)
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(
zscore_unified = case_when(
age_group == 1 & ht >= 45 & ht < 65 ~ wflz,
age_group == 1 & ht >= 65 & ht < 120 ~ wfhz,
age_group == 1 & (ht < 45 | ht >= 120 | is.na(ht)) ~ muacz,
age_group == 2 ~ baz,
TRUE ~ NA_real_
)
)
# CREATE MALNUTRITION CATEGORY BASED ON PREVIOUS Z SCORE
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(
malnutrition = case_when(
is.na(zscore_unified) ~ NA_integer_,
zscore_unified < -3 ~ 2L,
zscore_unified >= -3 & zscore_unified < -2 ~ 1L,
TRUE ~ 0L
)
)
# ADD A MALNUTRITION SOURCE SO WE KNOW WHICH ANTHROPOMETRIC MEASURE IS BEING USED TO DEFINE MALNUTRITION
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(
malnutrition_source = case_when(
age_group == 1 & ht >= 45 & ht < 65 ~ "WFL",
age_group == 1 & ht >= 65 & ht < 120 ~ "WFH",
age_group == 1 & (ht < 45 | ht >= 120 | is.na(ht)) ~ "MUAC",
age_group == 2 ~ "BFA",
TRUE ~ NA_character_
)
)
1.4 Z score check
subset_wfaz <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
pull(wfaz)
# SUMMARY OF WEIGHT FOR AGE Z-SCORE (CASES)
summary(subset_wfaz)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NAs
## -7.5400 -2.3050 -0.9700 -1.0971 0.0475 10.2500 49
# NUMBER OF CASES IN WEIGHT FOR AGE Z SCORE
length(subset_wfaz)
## [1] 755
subset_wfaz1 <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(wfaz) %>%
pull(wfaz)
# NUMBER OF OUTLIERS IN WEIGHT FOR AGE Z SCORE
length(subset_wfaz1)
## [1] 10
subset_hfaz <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
pull(hfaz)
# SUMMARY OF HEIGHT FOR AGE Z SCORE (CASES)
summary(subset_hfaz)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NAs
## -18.2600 -2.8700 -1.1800 -0.9338 0.6350 44.5600 1
# NUMBER OF CASES IN HEIGHT FOR AGE Z SCORE
length(subset_hfaz)
## [1] 755
subset_hfaz1 <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(hfaz) %>%
pull(hfaz)
# NUMBER OF OUTLIERS FOR HEIGHT FOR AGE Z SCORE
length(subset_hfaz1)
## [1] 29
subset_wfhz <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
pull(wfhz)
# SUMMARY OF WEIGHT FOR HEIGHT Z SCORE (CASES)
summary(subset_wfhz)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NAs
## -10.5900 -2.2300 -0.5700 -0.4132 1.1875 42.7700 93
# NUMBER OF CASES IN WEIGHT FOR HEIGHT Z SCORE
length(subset_wfhz)
## [1] 755
subset_wfhz1 <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(wfhz) %>%
pull(wfhz)
# NUMBER OF OUTLIERS IN WEIGHT FOR HEIGHT Z SCORE
length(subset_wfhz1)
## [1] 20
subset_baz <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
pull(baz)
# SUMMARY OF BMI FOR AGE Z SCORE (CASES)
summary(subset_baz)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NAs
## -12.560 -2.250 -0.695 -0.583 1.060 50.500 1
# NUMBER OF CASES IN BMI FOR AGE Z SCORE
length(subset_baz)
## [1] 755
subset_baz1 <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(baz) %>%
pull(baz)
# NUMBER OF OUTLIERS IN BMI FOR AGE Z SCORE
length(subset_baz1)
## [1] 29
subset_muac <- PSFI_df_malnutrition %>%
filter(age_group == 1 & case_control == 1) %>%
pull(muac)
# SUMMARY OF MUAC (CASES, 6-59 MONTHS)
summary(subset_muac)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NAs
## 9.00 14.00 15.00 15.02 16.00 23.00 2
# NUMBER OF CASES IN MUAC
length(subset_muac)
## [1] 535
subset_muac1 <- PSFI_df_malnutrition %>%
filter(age_group == 1 & case_control == 1) %>%
identify_outliers(muac) %>%
pull(muac)
# NUMBER OF OUTLIERS IN MUAC
length(subset_muac1)
## [1] 18
# CREATES BOXPLOT AND HISTOGRAM FOR WEIGHT FOR HEIGHT Z SCORE (CASES)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$wfhz[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(-11,43), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$wfhz[PSFI_df_malnutrition$case_control == 1]
, breaks=50 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Weight-for-height (z-score)", xlim=c(-11,43))

# CREATES BOXPLOT AND HISTOGRAM OF WEIGHT FOR AGE (CASES)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$wfaz[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(-8,11), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$wfaz[PSFI_df_malnutrition$case_control == 1]
, breaks=50 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Weight-for-age (z-score)", xlim=c(-8,11))

# CREATES BOXPLOT AND HISTOGRAM OF BMI FOR AGE (CASES)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$baz[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(-13,51), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$baz[PSFI_df_malnutrition$case_control == 1]
, breaks=50 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="BMI-for-age (z-score)", xlim=c(-13,51))

# CREATE BOXPLOT AND HISTOGRAM OF HEIGHT FOR AGE
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$hfaz[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(-19,45), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$hfaz[PSFI_df_malnutrition$case_control == 1]
, breaks=50 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Height-for-age (z-score)", xlim=c(-19,45))

# CREATES HISTOGRAM AND BOXPLOT OF MUAC (CASES, 6-59 MONTHS)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$muac[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(5,30), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$muac[PSFI_df_malnutrition$case_control == 1]
, breaks=20 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Mid-upper arm circumference (cm)", xlim=c(5,30))
abline(v = 11.5, col = "red", lwd = 2, lty = 2) # severe
abline(v = 12.5, col = "blue", lwd = 2, lty = 2) # moderate

# MAKES A TABLE OF OUTLIERS FOR WEIGHT FOR HEIGHT
wfhz_outliers <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(wfhz) %>%
select(record_id, wt, ht, wfhz, age_months, is.outlier, is.extreme)
wfhz_outliers
## # A tibble: 20 × 7
## record_id wt ht wfhz age_months is.outlier is.extreme
## <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <lgl>
## 1 26 45 115 9 145. TRUE FALSE
## 2 51 20 68 12.9 35.7 TRUE TRUE
## 3 56 4 70 -7.99 14.9 TRUE FALSE
## 4 140 8.45 55 7.71 6.26 TRUE FALSE
## 5 151 30 57 42.8 121. TRUE TRUE
## 6 154 7 100 -7.63 13.5 TRUE FALSE
## 7 203 39 115 7.61 86.9 TRUE FALSE
## 8 277 4 88 -10.6 26.4 TRUE FALSE
## 9 318 4.10 70 -7.8 12.7 TRUE FALSE
## 10 412 8.5 56 6.39 7.82 TRUE FALSE
## 11 430 6 86 -7.48 6.16 TRUE FALSE
## 12 617 14 62 9.76 54.5 TRUE FALSE
## 13 653 11 54 13.8 13.0 TRUE TRUE
## 14 655 10 56 8.84 7.54 TRUE FALSE
## 15 657 10 58 7.78 13.4 TRUE FALSE
## 16 663 21 75 10.7 93.2 TRUE FALSE
## 17 665 12.5 60 10.0 33.3 TRUE FALSE
## 18 672 14 69 6.77 23.1 TRUE FALSE
## 19 734 10 56 9.6 12.4 TRUE FALSE
## 20 885 16.5 71 8.09 21.0 TRUE FALSE
# MAKES A TABLE OF OUTLIERS FOR WEIGHT FOR AGE
wfaz_outliers <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(wfaz) %>%
select(record_id,ht, wt, age_months, wfaz, is.outlier, is.extreme)
wfaz_outliers
## # A tibble: 10 × 7
## record_id ht wt age_months wfaz is.outlier is.extreme
## <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <lgl>
## 1 56 70 4 14.9 -6.89 TRUE FALSE
## 2 113 94 12 121. -6.02 TRUE FALSE
## 3 277 88 4 26.4 -7.54 TRUE FALSE
## 4 318 70 4.10 12.7 -6.55 TRUE FALSE
## 5 369 118 36 74.8 3.75 TRUE FALSE
## 6 486 74 12 7.15 3.63 TRUE FALSE
## 7 487 156 18 4.59 10.2 TRUE TRUE
## 8 552 89 17 19.4 3.8 TRUE FALSE
## 9 603 54 6 31.2 -5.97 TRUE FALSE
## 10 907 68 4.30 10.4 -6 TRUE FALSE
# MAKES A TABLE OF OUTLIERS FOR HEIGHT FOR AGE
hfaz_outliers <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(hfaz) %>%
select(record_id, ht, wt, age_months, hfaz, is.outlier, is.extreme)
hfaz_outliers
## # A tibble: 29 × 7
## record_id ht wt age_months hfaz is.outlier is.extreme
## <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <lgl>
## 1 39 20 3.90 1.28 -18.3 TRUE TRUE
## 2 94 166 16 50.6 14.0 TRUE TRUE
## 3 96 110 9.5 22.3 8.02 TRUE FALSE
## 4 135 178 15 52.5 16.2 TRUE TRUE
## 5 136 44 15 62.2 -14.4 TRUE TRUE
## 6 138 53 6 15.2 -10.4 TRUE FALSE
## 7 151 57 30 121. -12.7 TRUE FALSE
## 8 154 100 7 13.5 9.08 TRUE FALSE
## 9 174 165 8.10 5.54 44.6 TRUE TRUE
## 10 214 40 7 7.88 -13.8 TRUE TRUE
## # ℹ 19 more rows
# MAKES A TABLE OF OUTLIERS FOR BMI FOR AGE
baz_outliers <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(baz) %>%
select(record_id, wt, ht, baz, age_months, is.outlier, is.extreme)
baz_outliers
## # A tibble: 29 × 7
## record_id wt ht baz age_months is.outlier is.extreme
## <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <lgl>
## 1 39 3.90 20 50.5 1.28 TRUE TRUE
## 2 51 20 68 15.2 35.7 TRUE TRUE
## 3 56 4 70 -8.04 14.9 TRUE FALSE
## 4 94 16 166 -8.9 50.6 TRUE FALSE
## 5 96 9.5 110 -7.47 22.3 TRUE FALSE
## 6 135 15 178 -9.87 52.5 TRUE FALSE
## 7 136 15 44 32.7 62.2 TRUE TRUE
## 8 140 8.45 55 6.05 6.26 TRUE FALSE
## 9 151 30 57 16.9 121. TRUE TRUE
## 10 154 7 100 -8.31 13.5 TRUE FALSE
## # ℹ 19 more rows
# MAKES A TABLE OF OUTLIERS FOR MUAC
muac_outliers <- PSFI_df_malnutrition %>%
filter(age_group == 1 & case_control == 1) %>%
identify_outliers(muac) %>%
select(record_id, ht, wt, age_months, muac, is.outlier, is.extreme)
muac_outliers
## # A tibble: 18 × 7
## record_id ht wt age_months muac is.outlier is.extreme
## <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <lgl>
## 1 51 68 20 35.7 23 TRUE TRUE
## 2 106 86 10 16.9 20 TRUE FALSE
## 3 140 55 8.45 6.26 20 TRUE FALSE
## 4 160 94 14 35.8 20 TRUE FALSE
## 5 167 97 19 33.4 20 TRUE FALSE
## 6 277 88 4 26.4 10 TRUE FALSE
## 7 348 91 16.4 19.1 20 TRUE FALSE
## 8 390 70 9.40 12.6 20 TRUE FALSE
## 9 412 56 8.5 7.82 20 TRUE FALSE
## 10 552 89 17 19.4 20 TRUE FALSE
## 11 563 74 11 12.0 21 TRUE FALSE
## 12 611 75 8.40 48.0 10 TRUE FALSE
## 13 619 63 5 14.5 9 TRUE FALSE
## 14 674 100 24 56.6 20 TRUE FALSE
## 15 760 66 6.20 16.4 10 TRUE FALSE
## 16 784 132 16 53.6 22 TRUE FALSE
## 17 882 99 18 41.5 20 TRUE FALSE
## 18 884 112 25 52.6 20 TRUE FALSE
# CREATES AN EXCEL FILE OF ALL OUTLIER TABLES
write.xlsx(
list(
WFHZ = wfhz_outliers,
WFAZ = wfaz_outliers,
HAZ = hfaz_outliers,
MUAC = muac_outliers,
BAZ = baz_outliers
),
file = "anthropometric_outliers.xlsx"
)
# CREATE A TABLE OF ALL NA'S
na_table <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
mutate(
hfaz_missing = is.na(hfaz),
wfaz_missing = is.na(wfaz),
wfhz_missing = is.na(wfhz),
muac_missing = is.na (muac),
baz_missing = is.na (baz)
) %>%
filter(hfaz_missing | wfaz_missing | wfhz_missing | baz_missing | muac_missing) %>%
select(
record_id,
ht,
wt,
age_months,
hfaz,
wfaz,
wfhz,
baz,
muac,
hfaz_missing,
wfaz_missing,
wfhz_missing,
baz_missing,
muac_missing
)
na_table
## # A tibble: 98 × 14
## record_id ht wt age_months hfaz wfaz wfhz baz muac hfaz_missing
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 2 68 6 7.57 -0.9 -3.11 -3.58 -3.61 NA FALSE
## 2 4 142 35 144. -1.37 NA NA -0.29 23 FALSE
## 3 26 115 45 145. -5.36 NA 9 3.28 18 FALSE
## 4 32 125 24 150. -4.22 NA NA -1.55 18 FALSE
## 5 39 20 3.90 1.28 -18.3 -1.58 NA 50.5 11 FALSE
## 6 40 139 35 157. -2.56 NA NA -0.31 19 FALSE
## 7 52 133 51 133. -1.59 NA NA 3.1 20 FALSE
## 8 54 127 21.3 98.0 -0.2 -1.41 NA -2.13 15 FALSE
## 9 62 127 25 92.1 0.41 0.23 NA -0.04 18 FALSE
## 10 82 141 30 112. 1.08 0.15 NA -0.65 20 FALSE
## # ℹ 88 more rows
## # ℹ 4 more variables: wfaz_missing <lgl>, wfhz_missing <lgl>,
## # baz_missing <lgl>, muac_missing <lgl>
anthro_vars <- c("hfaz", "wfaz", "wfhz", "baz", "muac")
cases_df <- PSFI_df_malnutrition %>%
filter(case_control == 1)
anthro_summary <- lapply(anthro_vars, function(var) {
x <- cases_df[[var]]
outlier_info <- cases_df %>%
select(all_of(var)) %>%
identify_outliers(!!sym(var))
tibble(
measure = var,
n = sum(!is.na(x)),
n_missing = sum(is.na(x)),
pct_missing = round(mean(is.na(x)) * 100, 2),
n_outliers = sum(outlier_info$is.outlier, na.rm = TRUE),
n_extreme_outliers = sum(outlier_info$is.extreme, na.rm = TRUE)
)
}) %>%
bind_rows()
anthro_summary
## # A tibble: 5 × 6
## measure n n_missing pct_missing n_outliers n_extreme_outliers
## <chr> <int> <int> <dbl> <int> <int>
## 1 hfaz 754 1 0.13 29 10
## 2 wfaz 706 49 6.49 10 1
## 3 wfhz 662 93 12.3 20 3
## 4 baz 754 1 0.13 29 8
## 5 muac 753 2 0.26 17 1
write.xlsx(
list(
Missing = na_table,
Summary = anthro_summary
),
file = "NA.xlsx")
1.5 Data check
PSFI_df_malnutrition %>%
group_by(malnutrition, case_control) %>%
summarize(
malnutrition_missing = sum(is.na(malnutrition))
)
## `summarise()` has regrouped the output.
## ℹ Summaries were computed grouped by malnutrition and case_control.
## ℹ Output is grouped by malnutrition.
## ℹ Use `summarise(.groups = "drop_last")` to silence this message.
## ℹ Use `summarise(.by = c(malnutrition, case_control))` for per-operation
## grouping (`?dplyr::dplyr_by`) instead.
## # A tibble: 8 × 3
## # Groups: malnutrition [4]
## malnutrition case_control malnutrition_missing
## <int> <dbl> <int>
## 1 0 0 0
## 2 0 1 0
## 3 1 0 0
## 4 1 1 0
## 5 2 0 0
## 6 2 1 0
## 7 NA 0 1
## 8 NA 1 93
#103 mort_inhosp_missing = controls
PSFI_df_malnutrition %>%
filter (case_control == 1) %>%
group_by(malnutrition_source) %>%
summarize(
count_malnut = n()
)
## # A tibble: 5 × 2
## malnutrition_source count_malnut
## <chr> <int>
## 1 BFA 128
## 2 MUAC 10
## 3 WFH 458
## 4 WFL 67
## 5 <NA> 92
# NA due to child with height of 20 cm (under WHO curve for wfh/wfl) and 38 days old (too young for MUAC)
PSFI_df_malnutrition %>%
filter (case_control == 1) %>%
group_by(malnutrition) %>%
summarize(
count_malnut = n()
)
## # A tibble: 4 × 2
## malnutrition count_malnut
## <int> <int>
## 1 0 470
## 2 1 76
## 3 2 116
## 4 NA 93
PSFI_df_malnutrition %>%
drop_na(malnutrition, mort_inhosp) %>%
ggplot(aes(x = factor(malnutrition))) +
geom_bar() +
labs(
x = "Malnutrition",
y = "N"
)

PSFI_df_malnutrition%>%
filter (case_control == 1) %>%
count(malnutrition, mort_inhosp) %>%
group_by(malnutrition) %>%
mutate(prop = n / sum(n))
## # A tibble: 8 × 4
## # Groups: malnutrition [4]
## malnutrition mort_inhosp n prop
## <int> <dbl> <int> <dbl>
## 1 0 0 405 0.862
## 2 0 1 65 0.138
## 3 1 0 56 0.737
## 4 1 1 20 0.263
## 5 2 0 82 0.707
## 6 2 1 34 0.293
## 7 NA 0 64 0.688
## 8 NA 1 29 0.312
PSFI_df_malnutrition %>%
drop_na(mort_inhosp, malnutrition) %>% #only cases & minus the one previously mentioned NA
ggplot +
(aes(x = factor(malnutrition), fill = factor(mort_inhosp))) +
geom_bar(stat = "count", position = "fill") +
labs(
x = "Malnutrition",
y = "Proportion",
fill = "Mortality"
)

PSFI_df_malnutrition %>%
count(malnutrition, case_control) %>%
group_by(malnutrition) %>%
mutate(prop = n / sum(n))
## # A tibble: 8 × 4
## # Groups: malnutrition [4]
## malnutrition case_control n prop
## <int> <dbl> <int> <dbl>
## 1 0 0 78 0.142
## 2 0 1 470 0.858
## 3 1 0 9 0.106
## 4 1 1 76 0.894
## 5 2 0 15 0.115
## 6 2 1 116 0.885
## 7 NA 0 1 0.0106
## 8 NA 1 93 0.989
PSFI_df_malnutrition %>%
drop_na(malnutrition) %>%
ggplot +
(aes(x = factor(malnutrition), fill = factor(case_control))) +
geom_bar(stat = "count", position = "fill") +
labs(
x = "Malnutrition",
y = "Proportion",
fill = "Case/Control"
)

PSFI_df_malnutrition %>%
ggplot +
(aes(x = whz, y = wfhz)) +
geom_point(alpha = 0.5) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
stat_cor(method = "pearson") +
theme_minimal()
## Warning: Removed 157 rows containing non-finite outside the scale range
## (`stat_cor()`).
## Warning: Removed 157 rows containing missing values or values outside the scale range
## (`geom_point()`).

PSFI_df_malnutrition %>%
ggplot +
(aes(x = waz, y = wfaz)) +
geom_point(alpha = 0.5) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
stat_cor(method = "pearson") +
theme_minimal()
## Warning: Removed 65 rows containing non-finite outside the scale range
## (`stat_cor()`).
## Warning: Removed 65 rows containing missing values or values outside the scale range
## (`geom_point()`).

write.xlsx(PSFI_df_malnutrition, file = "PSFI_final_malnutrition.xlsx")