1.2. Anthroprometric analysis
# CREATE NEW CATEGORY THAT CALCULATES AGE IN YEARS BASED ON AGE IN DAYS
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(age_years = age_days_exact / 365.25)
summary(PSFI_df_malnutrition$age_years)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.08768 0.79176 1.65535 3.07854 4.00176 14.92786
# CREATES A SUMMARY OF CASES (ht=height, wt=weight, age_years= age in years, muac)
PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
select(ht, wt, age_years, muac) %>%
summary()
ht wt age_years muac
Min. : 20.00 Min. : 3.20 Min. : 0.08768 Min. : 9.00
1st Qu.: 67.00 1st Qu.: 7.00 1st Qu.: 0.73436 1st Qu.:14.00
Median : 78.00 Median : 9.50 Median : 1.40141 Median :15.00
Mean : 85.23 Mean :12.17 Mean : 2.83539 Mean :15.38
3rd Qu.: 98.00 3rd Qu.:14.00 3rd Qu.: 3.49267 3rd Qu.:17.00
Max. :192.00 Max. :72.00 Max. :14.92786 Max. :27.00
NAs :1 NAs :2
# CREATES A HISTOGRAM AND BOXPLOT OF HEIGHT (CASES)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$ht[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(0,200), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$ht[PSFI_df_malnutrition$case_control == 1]
, breaks=40 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Height (cm)", xlim=c(0,200))

# CREATES A HISTOGRAM AND BOXPLOT OF WEIGHT (CASES)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$wt[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(0,75), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$wt[PSFI_df_malnutrition$case_control == 1]
, breaks=40 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Weight (kg)", xlim=c(0,75))

# CREATES A HISTOGRAM AND BOXPLOT OF MUAC (CASES)
## BLUE LINE OUTLINES MODERATE MALNUTRITION (12.5cm)
## RED LINE OUTLINES SEVERE MALNUTRITION (11.5cm)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$muac[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(5,30), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$muac[PSFI_df_malnutrition$case_control == 1]
, breaks=20 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Mid-upper arm circumference (cm)", xlim=c(5,30))
abline(v = 11.5, col = "red", lwd = 2, lty = 2) # severe
abline(v = 12.5, col = "blue", lwd = 2, lty = 2) # moderate

# HISTOGRAM AND BOXPLOT OF AGE (CASES)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$age_years[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(0,15), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$age_years[PSFI_df_malnutrition$case_control == 1], breaks=40 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Age (years)", xlim=c(0,15))

1.3. Z-scorer
# CREATES A NEW CATEGORY DEFINING SEX AS 1/2, INSTEAD OF 0/1 (NECESSARY FOR ZSCORER PACKAGE)
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(sex_who = if_else(sex == 1, 1, 2))
summary(PSFI_df_malnutrition$sex_who)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 1.000 1.000 1.427 2.000 2.000
# CREATES A NEW CATEGORY CALCULATING AGE IN MONTHS BASED ON AGE IN DAYS
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(age_months = age_days_exact / 30.4375)
summary(PSFI_df_malnutrition$age_months)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.052 9.501 19.864 36.942 48.021 179.134
# CREATES A NEW CATEGORY (AGE<6M = 0, AGE 6M - 5J = 1, AGE > 5J = 2)
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(
age_group = case_when(
age_months < 6 ~ 0L,
age_months >= 6 & age_years < 5 ~ 1L,
age_years >= 5 ~ 2L,
TRUE ~ NA_integer_
)
)
# ZSCORER PACKAGE CALCULATES ZSCORE OF WEIGHT FOR LENGTH (wflz), WEIGHT FOR AGE (wfaz), HEIGHT FOR AGE (hfaz), WEIGHT FOR HEIGHT (wfhz), BMI FOR AGE (baz)
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(
wflz = addWGSR(
data = .,
sex = "sex_who",
firstPart = "wt",
secondPart = "ht",
index = "wfl"
)$wflz,
wfaz = addWGSR(
data = .,
sex = "sex_who",
firstPart = "wt",
secondPart = "age_days_exact",
index = "wfa"
)$wfaz,
hfaz = addWGSR(
data = .,
sex = "sex_who",
firstPart = "ht",
secondPart = "age_days_exact",
index = "hfa"
)$hfaz,
wfhz = addWGSR(
data = .,
sex = "sex_who",
firstPart = "wt",
secondPart = "ht",
index = "wfh"
)$wfhz,
baz = addWGSR(
data = .,
sex = "sex_who",
firstPart = "wt",
secondPart = "ht",
thirdPart = "age_days_exact",
index = "bfa"
)$bfaz
)
============================================================================================================================================
============================================================================================================================================
============================================================================================================================================
============================================================================================================================================
============================================================================================================================================
# ASSIGNS Z-SCORE TO MUAC BASED ON THE 11.5 & 12.5CM LIMITS, THIS IS A PROXY SINCE LATER A Z-SCORE OF 0 WILL EQUAL NO MALNUTRITION, -2.5 WILL EQUAL MODERATE MALNUTRITION AND -4 WILL EQUAL SEVERE MALNUTRITION
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(
muacz = case_when(
muac >= 12.5 ~ 0,
muac >= 11.5 & muac < 12.5 ~ -2.5,
muac < 11.5 ~ -4,
TRUE ~ NA_real_
)
)
# CREATE MALNUTRITION ZSCORE BASED ON WFHL, BAZ, MUACZ (to be defined later)
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(
zscore_unified = case_when(
age_group == 1 & ht >= 45 & ht < 65 ~ wflz,
age_group == 1 & ht >= 65 & ht < 120 ~ wfhz,
age_group == 1 & (ht < 45 | ht >= 120 | is.na(ht)) ~ muacz,
age_group == 2 ~ baz,
TRUE ~ NA_real_
)
)
# CREATE MALNUTRITION CATEGORY BASED ON PREVIOUS Z SCORE
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(
malnutrition = case_when(
is.na(zscore_unified) ~ NA_integer_,
zscore_unified < -3 ~ 2L,
zscore_unified >= -3 & zscore_unified < -2 ~ 1L,
TRUE ~ 0L
)
)
# ADD A MALNUTRITION SOURCE SO WE KNOW WHICH ANTHROPOMETRIC MEASURE IS BEING USED TO DEFINE MALNUTRITION
PSFI_df_malnutrition <- PSFI_df_malnutrition %>%
mutate(
malnutrition_source = case_when(
age_group == 1 & ht >= 45 & ht < 65 ~ "WFL",
age_group == 1 & ht >= 65 & ht < 120 ~ "WFH",
age_group == 1 & (ht < 45 | ht >= 120 | is.na(ht)) ~ "MUAC",
age_group == 2 ~ "BFA",
TRUE ~ NA_character_
)
)
1.4 Z score check
subset_wfaz <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
pull(wfaz)
# SUMMARY OF WEIGHT FOR AGE Z-SCORE (CASES)
summary(subset_wfaz)
Min. 1st Qu. Median Mean 3rd Qu. Max. NAs
-7.5400 -2.3050 -0.9700 -1.0971 0.0475 10.2500 49
# NUMBER OF CASES IN WEIGHT FOR AGE Z SCORE
length(subset_wfaz)
[1] 755
subset_wfaz1 <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(wfaz) %>%
pull(wfaz)
# NUMBER OF OUTLIERS IN WEIGHT FOR AGE Z SCORE
length(subset_wfaz1)
[1] 10
subset_hfaz <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
pull(hfaz)
# SUMMARY OF HEIGHT FOR AGE Z SCORE (CASES)
summary(subset_hfaz)
Min. 1st Qu. Median Mean 3rd Qu. Max. NAs
-18.2600 -2.8700 -1.1800 -0.9338 0.6350 44.5600 1
# NUMBER OF CASES IN HEIGHT FOR AGE Z SCORE
length(subset_hfaz)
[1] 755
subset_hfaz1 <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(hfaz) %>%
pull(hfaz)
# NUMBER OF OUTLIERS FOR HEIGHT FOR AGE Z SCORE
length(subset_hfaz1)
[1] 29
subset_wfhz <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
pull(wfhz)
# SUMMARY OF WEIGHT FOR HEIGHT Z SCORE (CASES)
summary(subset_wfhz)
Min. 1st Qu. Median Mean 3rd Qu. Max. NAs
-10.5900 -2.2300 -0.5700 -0.4132 1.1875 42.7700 93
# NUMBER OF CASES IN WEIGHT FOR HEIGHT Z SCORE
length(subset_wfhz)
[1] 755
subset_wfhz1 <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(wfhz) %>%
pull(wfhz)
# NUMBER OF OUTLIERS IN WEIGHT FOR HEIGHT Z SCORE
length(subset_wfhz1)
[1] 20
subset_baz <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
pull(baz)
# SUMMARY OF BMI FOR AGE Z SCORE (CASES)
summary(subset_baz)
Min. 1st Qu. Median Mean 3rd Qu. Max. NAs
-12.560 -2.250 -0.695 -0.583 1.060 50.500 1
# NUMBER OF CASES IN BMI FOR AGE Z SCORE
length(subset_baz)
[1] 755
subset_baz1 <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(baz) %>%
pull(baz)
# NUMBER OF OUTLIERS IN BMI FOR AGE Z SCORE
length(subset_baz1)
[1] 29
subset_muac <- PSFI_df_malnutrition %>%
filter(age_group == 1 & case_control == 1) %>%
pull(muac)
# SUMMARY OF MUAC (CASES, 6-59 MONTHS)
summary(subset_muac)
Min. 1st Qu. Median Mean 3rd Qu. Max. NAs
9.00 14.00 15.00 15.02 16.00 23.00 2
# NUMBER OF CASES IN MUAC
length(subset_muac)
[1] 535
subset_muac1 <- PSFI_df_malnutrition %>%
filter(age_group == 1 & case_control == 1) %>%
identify_outliers(muac) %>%
pull(muac)
# NUMBER OF OUTLIERS IN MUAC
length(subset_muac1)
[1] 18
# CREATES BOXPLOT AND HISTOGRAM FOR WEIGHT FOR HEIGHT Z SCORE (CASES)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$wfhz[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(-11,43), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$wfhz[PSFI_df_malnutrition$case_control == 1]
, breaks=50 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Weight-for-height (z-score)", xlim=c(-11,43))

# CREATES BOXPLOT AND HISTOGRAM OF WEIGHT FOR AGE (CASES)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$wfaz[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(-8,11), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$wfaz[PSFI_df_malnutrition$case_control == 1]
, breaks=50 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Weight-for-age (z-score)", xlim=c(-8,11))

# CREATES BOXPLOT AND HISTOGRAM OF BMI FOR AGE (CASES)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$baz[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(-13,51), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$baz[PSFI_df_malnutrition$case_control == 1]
, breaks=50 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="BMI-for-age (z-score)", xlim=c(-13,51))

# CREATE BOXPLOT AND HISTOGRAM OF HEIGHT FOR AGE
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$hfaz[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(-19,45), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$hfaz[PSFI_df_malnutrition$case_control == 1]
, breaks=50 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Height-for-age (z-score)", xlim=c(-19,45))

# CREATES HISTOGRAM AND BOXPLOT OF MUAC (CASES, 6-59 MONTHS)
layout(mat = matrix(c(1,2),2,1, byrow=TRUE), height = c(1,8))
par(mar=c(0, 3.1, 1.1, 2.1))
boxplot(PSFI_df_malnutrition$muac[PSFI_df_malnutrition$case_control == 1] , horizontal=TRUE , ylim=c(5,30), xaxt="n" , col=rgb(0.8,0.8,0,0.5) , frame=F)
par(mar=c(4, 3.1, 1.1, 2.1))
hist(PSFI_df_malnutrition$muac[PSFI_df_malnutrition$case_control == 1]
, breaks=20 , col=rgb(1,0.8,0.8,1) , border=F , main="" , xlab="Mid-upper arm circumference (cm)", xlim=c(5,30))
abline(v = 11.5, col = "red", lwd = 2, lty = 2) # severe
abline(v = 12.5, col = "blue", lwd = 2, lty = 2) # moderate

# MAKES A TABLE OF OUTLIERS FOR WEIGHT FOR HEIGHT
wfhz_outliers <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(wfhz) %>%
select(record_id, wt, ht, wfhz, age_months, is.outlier, is.extreme)
wfhz_outliers
# MAKES A TABLE OF OUTLIERS FOR WEIGHT FOR AGE
wfaz_outliers <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(wfaz) %>%
select(record_id,ht, wt, age_months, wfaz, is.outlier, is.extreme)
wfaz_outliers
# MAKES A TABLE OF OUTLIERS FOR HEIGHT FOR AGE
hfaz_outliers <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(hfaz) %>%
select(record_id, ht, wt, age_months, hfaz, is.outlier, is.extreme)
hfaz_outliers
# MAKES A TABLE OF OUTLIERS FOR BMI FOR AGE
baz_outliers <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
identify_outliers(baz) %>%
select(record_id, wt, ht, baz, age_months, is.outlier, is.extreme)
baz_outliers
# MAKES A TABLE OF OUTLIERS FOR MUAC
muac_outliers <- PSFI_df_malnutrition %>%
filter(age_group == 1 & case_control == 1) %>%
identify_outliers(muac) %>%
select(record_id, ht, wt, age_months, muac, is.outlier, is.extreme)
muac_outliers
# CREATES AN EXCEL FILE OF ALL OUTLIER TABLES
write.xlsx(
list(
WFHZ = wfhz_outliers,
WFAZ = wfaz_outliers,
HAZ = hfaz_outliers,
MUAC = muac_outliers,
BAZ = baz_outliers
),
file = "anthropometric_outliers.xlsx"
)
# CREATE A TABLE OF ALL NA'S
na_table <- PSFI_df_malnutrition %>%
filter(case_control == 1) %>%
mutate(
hfaz_missing = is.na(hfaz),
wfaz_missing = is.na(wfaz),
wfhz_missing = is.na(wfhz),
muac_missing = is.na (muac),
baz_missing = is.na (baz)
) %>%
filter(hfaz_missing | wfaz_missing | wfhz_missing | baz_missing | muac_missing) %>%
select(
record_id,
ht,
wt,
age_months,
hfaz,
wfaz,
wfhz,
baz,
muac,
hfaz_missing,
wfaz_missing,
wfhz_missing,
baz_missing,
muac_missing
)
na_table
anthro_vars <- c("hfaz", "wfaz", "wfhz", "baz", "muac")
cases_df <- PSFI_df_malnutrition %>%
filter(case_control == 1)
anthro_summary <- lapply(anthro_vars, function(var) {
x <- cases_df[[var]]
outlier_info <- cases_df %>%
select(all_of(var)) %>%
identify_outliers(!!sym(var))
tibble(
measure = var,
n = sum(!is.na(x)),
n_missing = sum(is.na(x)),
pct_missing = round(mean(is.na(x)) * 100, 2),
n_outliers = sum(outlier_info$is.outlier, na.rm = TRUE),
n_extreme_outliers = sum(outlier_info$is.extreme, na.rm = TRUE)
)
}) %>%
bind_rows()
anthro_summary
write.xlsx(
list(
Missing = na_table,
Summary = anthro_summary
),
file = "NA.xlsx")