This document contains the dataset needed and calculation for the calcium intake and immediate health benefit. It contains the number of averted cases of preeclampsia and preterm birth

Library and settings

library(tidyverse)
library(readxl)
library(nutriR)
library(ggplot2)
#library(tabulizer)
library(rnaturalearth)
library(sf)
library(DT)
library(lhs)
library(stringr)
library(countrycode)
library(plotly)

Read in dataset and data cleaning

Country code and group data

country=read_excel("data/CLASS.xlsx", sheet=1) %>% janitor::clean_names()

datatable(
data = country,
caption = "Table 1: country code",
filter = "top",
style = "bootstrap4"
)

Calcuim nutrition data (\(a_i\))

Calculate the calcium intake gamma distribution shape for each region

calcium_distr=get_dists(nutrients="Calcium" ,  sexes="F", ages=15:50 ) %>% 
  drop_na() %>% 
  left_join(country, by=c("iso3"="code"))  %>%
  group_by(region, age_group) %>% 
  summarize(mean_shape=mean(g_shape))

calcium_world=calcium_distr %>% 
  group_by(age_group) %>% 
  summarize(means=mean(mean_shape))

Calculate the proportion of people take insufficient calcuim

calciumNew=read.csv(file="data/v36_cnty.csv") %>% 
  filter(age>=17.5 & age<=47.5 & female=="1" & urban=="999" & edu=="999" & year=="2018") %>% 
  rowwise() %>%
  left_join(country, by=c("iso3"="code")) %>% 
  mutate(
    age_group = case_when(
    age == 17.5 ~ "15-19",
    age == 22.5 ~ "20-24",
    age == 27.5 ~ "25-29",
    age == 32.5 ~ "30-34",
    age == 37.5 ~ "35-39",
    age == 42.5 ~ "40-44",
    age == 47.5 ~ "45-49",
    TRUE ~ as.character(age) 
  )
  ) %>% 
  left_join(calcium_distr, by=c("region"="region", "age_group"="age_group") ) %>% 
  mutate(
    mean_shape = case_when(
      is.na(mean_shape) & age_group == "15-19" ~ calcium_world$means[calcium_world$age_group == "15-19"],
      is.na(mean_shape) & age_group == "20-24" ~ calcium_world$means[calcium_world$age_group == "20-24"],
      is.na(mean_shape) & age_group == "25-29" ~ calcium_world$means[calcium_world$age_group == "25-29"],
      is.na(mean_shape) & age_group == "30-34" ~ calcium_world$means[calcium_world$age_group == "30-34"],
      is.na(mean_shape) & age_group == "35-39" ~ calcium_world$means[calcium_world$age_group == "35-39"],
      is.na(mean_shape) & age_group == "40-44" ~ calcium_world$means[calcium_world$age_group == "40-44"],
      is.na(mean_shape) & age_group == "45-49" ~ calcium_world$means[calcium_world$age_group == "45-49"],
      TRUE ~ mean_shape
    ),
    grate=mean_shape/median
  ) %>%  
  rowwise() %>% 
  mutate(pro_low=pgamma(800, shape = mean_shape, rate = grate)
           )
  
calciumNew$sev_result = NA # Create a new column to store the SEV results


for (i in seq_len(nrow(calciumNew))) {
  calciumNew$sev_result[i] = sev(
    ear=800,
    cv=0.1,
    shape=calciumNew$mean_shape[i],
    rate=calciumNew$grate[i]
  )
}

datatable(
data = calciumNew,
caption = "Table 2-1: Calcium data",
filter = "top",
style = "bootstrap4"
)

The age population distribution

file_path = "data/WPP2022_POP_F02_3_POPULATION_5-YEAR_AGE_GROUPS_FEMALE.xlsx"
pop = read_excel(file_path, skip=15) 
colnames(pop) = pop[1,]

pop_data = pop[-1, ] %>% 
  janitor::clean_names() %>% 
  filter(year=="2021") %>% 
  rename_with(~ gsub("x(\\d+)_", "\\1-", .), starts_with("x")) %>%
  dplyr::select(iso3_alpha_code, "0-4":x100) %>%
  mutate(across("0-4":x100, as.numeric)) %>%
  drop_na() %>%
  rowwise() %>% 
  mutate(total = sum(c_across("15-19":"45-49")),
         across("15-19":"45-49", ~ . / total, .names = "proportion_{.col}")) %>% 
 dplyr:: select(iso3_alpha_code, starts_with("proportion_")) %>% 
  pivot_longer(cols = starts_with("proportion_"), names_to = "age_group", values_to = "proportion", names_prefix = "proportion_")

datatable(
data = pop_data,
caption = "Table 2-2: Population distribution",
filter = "top",
style = "bootstrap4"
) 

Joint the calcium with the population data to calculate

calciummerge=calciumNew %>% 
  left_join(pop_data, by=c("iso3"="iso3_alpha_code", "age_group"="age_group")) %>% 
  mutate(frac_sev=sev_result*proportion*0.01,
         frac_own=pro_low*proportion,) %>% 
  group_by(iso3, economy) %>% 
  summarize(tfrac_sev=sum(frac_sev),
            tfrac_own=sum(frac_own)) %>% 
  mutate(
         low_intake_sev= case_when(
   tfrac_sev>= 0.25  ~ 1,
   TRUE ~ 0
  ),
  low_intake_own= case_when(
   tfrac_own>= 0.25  ~ 1,
   TRUE ~ 0
  )) %>% 
  left_join(country, by=c("iso3"="code")) %>% 
  dplyr::select(-economy.x) %>% 
  rename(country=economy.y)

datatable(
data = calciummerge,
caption = "Table 2-3: Insufficient calcium intake",
filter = "top",
style = "bootstrap4"
)  

Antenatal care(ANC) (\(b_i\))

Use the lastest UNICEF data –up to 2021, 24 NA, 192 total. The coverage year is from 2015 to 2020.

Anc calculation

For Mean

–The country with record –Take its own mean –done

–The country without record – Take the regional mean—done

For Sd

–The country with >= 3 records – take its own sd –done

–The country with < 3 records – take the mean of all the country sd—done

–The country without recored – Take the regional sd – done

path="data/fusion_GLOBAL_DATAFLOW_UNICEF_1.0_.MNCH_ANC4...csv"
ancf=read.csv(file=path) %>% 
  janitor::clean_names()  %>%
  group_by(ref_area_geographic_area) %>% 
  arrange(desc(time_period_time_period)) %>% 
  slice_head(n = 5) %>% 
  filter(time_period_time_period>=2015) %>% 
  mutate(obs_value_observation_value = obs_value_observation_value*0.01)  %>% 
  separate(ref_area_geographic_area, into = c("code", "country"), sep = ": ") %>% 
  mutate(code = str_trim(code),
         country = str_trim(country)) 
anc_region=
  ancf%>% 
  left_join(country) %>% 
  group_by(region) %>% 
  summarise(
    mean_region = mean(obs_value_observation_value, na.rm = TRUE),
    sd_region =sd(obs_value_observation_value, na.rm=TRUE)
  )

avg_sd = ancf %>%
  group_by(country, code) %>%
  filter(n() >= 3) %>%
  summarise(sd = sd(obs_value_observation_value, na.rm = TRUE)) %>%
  pull(sd) %>%
  mean(na.rm = TRUE)


anc_summary = ancf %>%
  group_by(country, code) %>%
  summarise(
    origin_mean = mean(obs_value_observation_value, na.rm = TRUE),
    origin_sd = if_else(n() < 3, avg_sd, sd(obs_value_observation_value, na.rm = TRUE))
  )


global_mean_anc <- mean(anc_region$mean_region, na.rm = TRUE)
global_sd_anc <- sd(anc_region$mean_region, na.rm = TRUE)



new_row_anc <- data.frame(
  region = "North America",
  mean_region = global_mean_anc,
  sd_region = global_sd_anc
)

anc_region<- rbind(anc_region, new_row_anc)
anc=anc_summary %>% 
  right_join(calciummerge, by=c("code"="iso3")) %>% 
  left_join(anc_region, by=c("region"="region")) %>% 
  mutate(
    origin_mean = ifelse(is.na(origin_mean), mean_region,origin_mean),
    origin_sd = ifelse(is.na(origin_sd), sd_region,origin_sd)) %>% 
  ungroup () %>% 
  dplyr::select( code, country.y, origin_mean, origin_sd) %>% 
  rename(country=country.y)
  

datatable(
data = anc,
caption = "Table 3: ANC",
filter = "top",
style = "bootstrap4"
)

plot to check the distribution

plot(density(anc$origin_mean))

Fertility (\(c_i\))

The number of live birth(in 1000) of 2024 with interval

birthlower=read_excel("data/UN_PPP2022_Output_Births.xlsx",sheet=1, skip = 14)
birthmean=read_excel("data/UN_PPP2022_Output_Births.xlsx",sheet=3, skip = 14)
birthupper=read_excel("data/UN_PPP2022_Output_Births.xlsx",sheet=5, skip = 14)

colnames(birthlower) <- birthlower[1,]
colnames(birthmean) <- birthmean[1,]
colnames(birthupper) <- birthupper[1,]

birthlower <- birthlower[-1, ] %>% 
  janitor::clean_names()
birthmean <- birthmean[-1, ] %>% 
  janitor::clean_names()
birthupper <- birthupper[-1, ] %>% 
  janitor::clean_names()

birthnum= birthlower %>% 
  rbind(birthmean) %>% 
  rbind(birthupper)
birthnum2024= birthnum %>% 
  dplyr::select(variant, iso3_alpha_code,region_subregion_country_or_area, x2024) %>% 
  drop_na() %>% 
  pivot_wider(names_from = variant, values_from = x2024) %>% 
  janitor::clean_names() %>% 
  rename(iso3=iso3_alpha_code, country=region_subregion_country_or_area, birthmean=median_pi, birthupper=upper_95_pi, birthlower=lower_95_pi)


datatable(
data = birthnum2024,
caption = "Table 4: Birth number(per 1000) in 2024",
filter = "top",
style = "bootstrap4"
)

Preterm birth rate(\(d_i\))

Check the preterm birth

pret_bir=read.csv("/Users/hec442/Desktop/harvard/calcium/data/PTBcountryEstimates - pCCFullModelP_dataType1DQC1_16000_Estimates_1.csv") %>% 
  filter(year=="2020") %>% 
  janitor::clean_names() %>% 
  dplyr:: select(iso, est_l,est,est_u) %>% 
  rename(iso3=iso,
         ptb_rate= est,
         ptb_low=est_l,
         ptb_upp=est_u) %>% 
  mutate(across(ptb_low:ptb_upp, ~.*0.001))



datatable(
data = pret_bir,
caption = "Table 5: Preterm birth rate (per live birth)",
filter = "top",
style = "bootstrap4"
) 

Probability of women experiencing preeclampsia in 2020(\(e_i\))

The preeclampsia per live birth with interval, the live birth number is taken from the IHME

preecdf=read.csv("data/IHME-GBD_2019_DATA-759d3379-1/IHME-GBD_2019_DATA-759d3379-1.csv") %>% 
  janitor::clean_names() %>% 
  filter(measure_id==6)
birth_preec=read.csv("data/IHME_GBD_2019_FERTILITY_1950_2019_LB_Y2020M08D05.CSV") %>% 
  janitor::clean_names() %>% 
  filter(year_id==2019)%>% 
  rename(birth_num=val) %>% 
  dplyr::select(location_id, location_name, age_group_id, birth_num)
preec=preecdf %>% 
  rename(country =  location_name) %>% 
  dplyr::select("country", location_id, age_id, age_name, val, upper, lower) %>%
  left_join(birth_preec, by=c("location_id"="location_id", "age_id"="age_group_id")) %>% 
  filter(age_id!=22) %>% 
  mutate(
    birth_num=as.numeric(birth_num),
    across(val:lower, ~ . / birth_num, .names = "precrate_{.col}")) %>% 
  group_by(country) %>% 
  summarize(preec_rate_mean=mean(precrate_val),
            preec_rate_upp=mean(precrate_upper),
            preec_rate_low=mean(precrate_lower)) %>% 
  dplyr::select(country, preec_rate_mean,preec_rate_upp, preec_rate_low) %>% 
  mutate(iso3 = countrycode(country, "country.name", "iso3c"))



datatable(
data = preec,
caption = "Table 6: Prevalence of preeclampsia in 2020",
filter = "top",
style = "bootstrap4"
)   

Risk Ration of Preterm birth(f) and preeclampsia(g)

The risk ration of preterm birth and preeclampsia. For each risk ratio, a agmma distribution was generated based on the mean and CI, the sample size is both 185(align with the calcium data).

Create a function to create the gamma distribution:

gammapar <- function(tgt) {
  tgt <- as.numeric(tgt)
  mn <- tgt[1]; cir <- (tgt[3]-tgt[2])
  xopt <- function(b,mn,cir) {
    cir2 <- qgamma(c(1,39)/40,mn*b,b); 
    cir2 <- cir2[2]-cir2[1]
    (cir2-cir)^2 }
  zz <- optimize(xopt,c(0.1,100000),mn=mn,cir=cir)$minimum
  c(zz*mn,zz) }

Generate the sample with LHS

lhs <- randomLHS(100000, 1)
rrpret_para= gammapar(c(0.76,0.6,0.97))
print(paste("The preterm birth RR has mean and ci:", toString(qgamma(c(20,1,39)/40,rrpret_para[1],rrpret_para[2]))))
## [1] "The preterm birth RR has mean and ci: 0.756085396542927, 0.586120989136937, 0.956121004703605"
rrpret=qgamma(lhs, rrpret_para[1],rrpret_para[2])


rrpreec_para= gammapar(c(0.45,0.31, 0.65))
print(paste("The preeclampsia RR has mean and ci:", toString(qgamma(c(20,1,39)/40, rrpreec_para[1],rrpreec_para[2]))))
## [1] "The preeclampsia RR has mean and ci: 0.44440340274776, 0.295894845160828, 0.635894902723988"
rrpreec=qgamma(lhs, rrpreec_para[1],rrpreec_para[2])

Intervention uptake (\(h_i^{full}, h_i^{partial}\))

Import the data and calculate the fraction of anc and iron again

dhs = read.csv(file="data/STATcompilerExport2023104_14201.csv") %>% 
  janitor::clean_names() %>% 
  mutate(iso3=countrycode(country, "country.name", "iso3c"),
         year = as.numeric(substr(survey,1,4))) %>% 
  filter(!is.na(apply(.[,4:8],1,sum)) & year>2009)  

uhc_n = read.csv(file = "data/uhc_coverage_index_who.csv") %>% 
  janitor::clean_names() %>% 
  filter(period=="2021") %>% 
  dplyr::select(spatial_dim_value_code, fact_value_numeric) %>% 
  rename(iso3=spatial_dim_value_code,
         uhc_value=fact_value_numeric)

Calculate the full and partial adherence

Calculate varables for:

1 full_ad 60+ days

2 partial_ad 1-59 days

3 any_ad 1+ days

4 full_if_any full as a fraction of any

dhs_uhc= dhs %>% 
  left_join(uhc_n) %>% 
  mutate(full_ad= (took_iron_0_90p+took_iron_60_89)/anc_received_iron,
         partial_ad = took_iron_1_59/anc_received_iron,
         any_ad = partial_ad+ full_ad,
         full_if_any = full_ad/any_ad)

Fit the regression

fit_any <- glm(any_ad  ~I(scale(uhc_value))+I(year-2015),data=dhs_uhc,family=gaussian("logit"))
fit_full_if_any <- glm(full_if_any~I(scale(uhc_value))+I(year-2015),data=dhs_uhc,family=gaussian("logit"))

summary(fit_any)
## 
## Call:
## glm(formula = any_ad ~ I(scale(uhc_value)) + I(year - 2015), 
##     family = gaussian("logit"), data = dhs_uhc)
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.43505    0.10817  22.511   <2e-16 ***
## I(scale(uhc_value))  0.15490    0.11413   1.357    0.178    
## I(year - 2015)      -0.04801    0.03522  -1.363    0.176    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.005653273)
## 
##     Null deviance: 0.55295  on 95  degrees of freedom
## Residual deviance: 0.52573  on 93  degrees of freedom
## AIC: -219.47
## 
## Number of Fisher Scoring iterations: 6
summary(fit_full_if_any)
## 
## Call:
## glm(formula = full_if_any ~ I(scale(uhc_value)) + I(year - 2015), 
##     family = gaussian("logit"), data = dhs_uhc)
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)          0.26625    0.10446   2.549   0.0124 *
## I(scale(uhc_value))  0.21861    0.10344   2.113   0.0372 *
## I(year - 2015)       0.05905    0.03506   1.684   0.0955 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 0.05665208)
## 
##     Null deviance: 5.6433  on 95  degrees of freedom
## Residual deviance: 5.2687  on 93  degrees of freedom
## AIC: 1.7896
## 
## Number of Fisher Scoring iterations: 4

Add predicted value of adherence with year 2022 and uhc

dhs_full = calciummerge %>% 
  dplyr::select(country, iso3) %>% 
  left_join(uhc_n) %>% 
  mutate(year=2022)

dhs_full$pred_any_ad <- predict(fit_any,newdata=dhs_full,type="response")
dhs_full$pred_full_if_any <- predict(fit_full_if_any,newdata=dhs_full,type="response")
dhs_full$pred_full <- dhs_full$pred_any_ad*dhs_full$pred_full_if_any

datatable(
data = dhs_full,
caption = "Table 7: Intervention uptake ",
filter = "top",
style = "bootstrap4"
)

Some plot

plot(dhs_full$uhc_value,dhs_full$pred_any_ad,ylim=0:1)
points(dhs_full$uhc_value,dhs_full$pred_full_if_any,col=2,pch=16)
points(dhs_full$uhc_value,dhs_full$pred_full,col=4,pch=16)

The full table

calcium=calciummerge %>% 
  left_join(anc, by=c("iso3"="code")) %>% 
  left_join(birthnum2024, by=c("iso3"="iso3")) %>% 
  dplyr::select(iso3, country, region,  tfrac_own, low_intake_own, origin_mean, origin_sd, birthlower, birthmean, birthupper,  income_group) %>% 
  left_join(pret_bir, by=c("iso3"="iso3"))  %>% 
  left_join(preec, by=c("iso3"="iso3")) %>% 
  left_join(dhs_full, by=c("iso3"="iso3")) %>% 
  dplyr::select(-country.x, -country.y, -year)

  

calcium$rrpret=0.76
calcium$rrpreec=0.45


# add the j_i
calcium$redu=0.5
calcium$redu_up=0.75
calcium$redu_low=0.25

summary(calcium)
##      iso3              region            tfrac_own        low_intake_own  
##  Length:185         Length:185         Min.   :0.000204   Min.   :0.0000  
##  Class :character   Class :character   1st Qu.:0.560989   1st Qu.:1.0000  
##  Mode  :character   Mode  :character   Median :0.824526   Median :1.0000  
##                                        Mean   :0.738262   Mean   :0.9514  
##                                        3rd Qu.:0.957759   3rd Qu.:1.0000  
##                                        Max.   :1.000000   Max.   :1.0000  
##                                                                           
##   origin_mean       origin_sd         birthlower         birthmean        
##  Min.   :0.2065   Min.   :0.002986   Length:185         Length:185        
##  1st Qu.:0.6490   1st Qu.:0.031751   Class :character   Class :character  
##  Median :0.8070   Median :0.053563   Mode  :character   Mode  :character  
##  Mean   :0.7719   Mean   :0.070588                                        
##  3rd Qu.:0.9023   3rd Qu.:0.090681                                        
##  Max.   :0.9990   Max.   :0.234572                                        
##                                                                           
##   birthupper        income_group          ptb_low            ptb_rate       
##  Length:185         Length:185         Min.   :0.001626   Min.   :0.004101  
##  Class :character   Class :character   1st Qu.:0.005295   1st Qu.:0.007159  
##  Mode  :character   Mode  :character   Median :0.005547   Median :0.008448  
##                                        Mean   :0.005943   Mean   :0.008415  
##                                        3rd Qu.:0.006721   3rd Qu.:0.009303  
##                                        Max.   :0.011764   Max.   :0.016164  
##                                        NA's   :1          NA's   :1         
##     ptb_upp         preec_rate_mean   preec_rate_upp    preec_rate_low   
##  Min.   :0.004433   Min.   :0.02313   Min.   :0.02907   Min.   :0.01799  
##  1st Qu.:0.008428   1st Qu.:0.09909   1st Qu.:0.13717   1st Qu.:0.06421  
##  Median :0.011949   Median :0.14466   Median :0.19993   Median :0.09702  
##  Mean   :0.012277   Mean   :0.15124   Mean   :0.19867   Mean   :0.10738  
##  3rd Qu.:0.014959   3rd Qu.:0.21338   3rd Qu.:0.27213   3rd Qu.:0.15232  
##  Max.   :0.039088   Max.   :0.26969   Max.   :0.33137   Max.   :0.22795  
##  NA's   :1                                                               
##    country            uhc_value      pred_any_ad     pred_full_if_any
##  Length:185         Min.   :29.40   Min.   :0.8526   Min.   :0.5484  
##  Class :character   1st Qu.:51.73   1st Qu.:0.8774   1st Qu.:0.6213  
##  Mode  :character   Median :69.45   Median :0.8945   Median :0.6756  
##                     Mean   :65.44   Mean   :0.8899   Mean   :0.6620  
##                     3rd Qu.:78.55   3rd Qu.:0.9024   3rd Qu.:0.7019  
##                     Max.   :91.04   Max.   :0.9124   Max.   :0.7358  
##                     NA's   :2       NA's   :2        NA's   :2       
##    pred_full          rrpret        rrpreec          redu        redu_up    
##  Min.   :0.4676   Min.   :0.76   Min.   :0.45   Min.   :0.5   Min.   :0.75  
##  1st Qu.:0.5451   1st Qu.:0.76   1st Qu.:0.45   1st Qu.:0.5   1st Qu.:0.75  
##  Median :0.6043   Median :0.76   Median :0.45   Median :0.5   Median :0.75  
##  Mean   :0.5899   Mean   :0.76   Mean   :0.45   Mean   :0.5   Mean   :0.75  
##  3rd Qu.:0.6334   3rd Qu.:0.76   3rd Qu.:0.45   3rd Qu.:0.5   3rd Qu.:0.75  
##  Max.   :0.6714   Max.   :0.76   Max.   :0.45   Max.   :0.5   Max.   :0.75  
##  NA's   :2                                                                  
##     redu_low   
##  Min.   :0.25  
##  1st Qu.:0.25  
##  Median :0.25  
##  Mean   :0.25  
##  3rd Qu.:0.25  
##  Max.   :0.25  
## 
datatable(
data = calcium,
caption = "Table 7: Insufficient calcium intake worldwide",
filter = "top",
style = "bootstrap4"
)

EDA

Calcium intake

The calcium intake insufficient fraction bar plot

regions  = unique(calciummerge$region)
for (region in regions) {
  region_data = calciummerge %>%
    filter(region == !!region)
  
  p <- ggplot(region_data, aes(x = reorder(country, -tfrac_own), y = tfrac_own, fill = income_group)) +
    geom_bar(stat = "identity", alpha = 0.8) +
    geom_hline(yintercept = 0.25, linetype = "dashed", color = "red") +
    coord_flip() +
    xlab('Country') +
    ylab('Fraction') +
    ggtitle(paste('Insufficient Fraction in', region)) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
  
  print(p)
}

Worldwide calcium intake insufficient fraction map

world= read.csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
calcium_plot=world %>% 
  left_join(calciummerge, by = c("CODE" = "iso3"))

plot_ly(calcium_plot, type="choropleth", locations = calcium_plot$CODE, z=calcium_plot$tfrac_own, text=calcium_plot$COUNTRY, colorscale="Red") %>% 
  colorbar(title=list(text="Insufficent calcium intake fraction",font = list(size = 12)),
           tickfont = list(size = 8),
          len=0.5, x=1, y=0.85, thickness=10) %>% 
  layout(title =list(text= "Total calcium insufficent intake by Country", y=0.9),
         annotations = list(
      list(
        text = "Global Dietary Database  Results",
        x = 1,
        y= 0.15,
        xref = "paper",
        yref = "paper",
        showarrow = FALSE,
        font = list(size = 10)
      )))

The worldwide preterm birth rate map view

preterm_plot=world %>% 
  left_join(pret_bir, by = c("CODE" = "iso3"))

plot_ly(preterm_plot, type="choropleth", locations = preterm_plot$CODE, z=preterm_plot$ptb_rate, text=preterm_plot$COUNTRY, colorscale="Red") %>% 
  colorbar(title=list(text="Preterm Birth Rate",font = list(size = 12)),
           tickfont = list(size = 8),
          len=0.5, x=1, y=0.85, thickness=10) %>% 
  layout(title =list(text= "Worldwide preterm birth rate", y=0.9),
          annotations = list(
      list(
        text = "Global, regional, and national estimates of levels of preterm birth in 2014",
        x = 1,
        y= 0.1,
        xref = "paper",
        yref = "paper",
        showarrow = FALSE,
        font = list(size = 10)
      )))

preterm_plot = world %>% left_join( pret_bir %>% mutate(iso3 = case_when( iso3 == “SSD” ~ “SDS”, iso3 == “PSE” ~ “PSX”, TRUE ~ iso3 )), by = c(“adm0_a3” = “iso3”) )

ggplot(preterm_plot) + geom_sf(aes(fill = ptb_rate)) + scale_fill_gradient(low = “white”, high = “red”, na.value = “grey50”) + theme_minimal() + labs(fill = “Preterm Birth Rate”, title = “Worldwide preterm birth rate”, caption = “Global, regional, and national estimates of levels of preterm birth in 2014”)

The worldwide preeclampsia probability map view

preec_plot=world %>% 
  left_join(preec, by = c("CODE" = "iso3"))

plot_ly(preec_plot, type="choropleth", locations = preec_plot$CODE, z=preec_plot$preec_rate_mean, text=preec_plot$COUNTRY, colorscale="Red") %>% 
  colorbar(title=list(text="Total preeclampsia",font = list(size = 12)),
           tickfont = list(size = 8),
          len=0.5, x=1, y=0.85, thickness=10) %>% 
  layout(title =list(text= "Preeclampsia Probability of Pregnant Women by Country", y=0.9),
          annotations = list(
      list(
        text = "IHME",
        x = 1,
        y= 0.1,
        xref = "paper",
        yref = "paper",
        showarrow = FALSE,
        font = list(size = 10)
      )))

preec_plot <- world %>% left_join( preec %>% mutate(iso3 = case_when( iso3 == “SSD” ~ “SDS”, iso3 == “PSE” ~ “PSX”, TRUE ~ iso3 )), by = c(“adm0_a3” = “iso3”))

ggplot(preec_plot) + geom_sf(aes(fill = preec_rate_mean)) + scale_fill_gradient(low = “white”, high = “red”) + theme_minimal() + labs(fill = “Total preeclampsia”, title = “Total preeclampsia by Country”, caption = “Global Burden of Disease Study 2019 (GBD 2019) Results”)

Immediate health calculation

For each country, the number of cases of preterm birth averted (\(k_i\))

\[k_i= a_i*b_i*c_i*d_i*(1-f)*(h_i^{full}+h_i^{partial}*j)\]

imm_pret=calcium %>% 
  dplyr::select(iso3, country, region, low_intake_own, origin_mean, birthmean, ptb_rate, rrpret, redu, pred_full, pred_any_ad,pred_any_ad) %>% 
  mutate(
         birthmean=as.numeric(birthmean),
        health_pret=low_intake_own * origin_mean * birthmean * ptb_rate* (1-rrpret)*(pred_full + 0*redu)
  ) %>% 
  dplyr::select(iso3, country, region,health_pret, origin_mean, low_intake_own,  birthmean, ptb_rate, rrpret, pred_full, pred_any_ad,pred_any_ad, redu)

datatable(
data = imm_pret,
caption = "Table 8: Immediate health benefit of preterm birth",
filter = "top",
style = "bootstrap4"
)
immpret_plot=world %>% 
  left_join(imm_pret, by = c("CODE" = "iso3"))

plot_ly(immpret_plot, type="choropleth", locations = immpret_plot$CODE, z=immpret_plot$health_pret, text=immpret_plot$COUNTRY, colorscale="Red") %>% 
  colorbar(title=list(text="Immediate health benefit",font = list(size = 12)),
           tickfont = list(size = 8),
          len=0.5, x=1, y=0.85, thickness=10) %>% 
  layout(title =list(text= "Immediate Health Benefit for Preterm Birth", y=0.9),
          annotations = list(
      list(
        text = "Global, regional, and national estimates of levels of preterm birth in 2014",
        x = 1,
        y= 0.1,
        xref = "paper",
        yref = "paper",
        showarrow = FALSE,
        font = list(size = 10)
      )))

immpret_plot = world %>% left_join( imm_pret %>% mutate(iso3 = case_when( iso3 == “SSD” ~ “SDS”, iso3 == “PSE” ~ “PSX”, TRUE ~ iso3 )), by = c(“adm0_a3” = “iso3”))

ggplot(immpret_plot) + geom_sf(aes(fill = health_pret)) + scale_fill_gradient(low = “white”, high = “red”) + theme_minimal() + labs(fill = “Immediate health benefit”, title = “Immediate Health Benefit for Preterm Birth”, caption = “Global, regional, and national estimates of levels of preterm birth in 2014”)

For each country, the number of cases of preeclampsia averted (\(l_i\)):

\[l_i= a_i*b_i*c_i*e_i*(1-g)*(h_i^{full}+h_i^{partial}*j)\]

imm_prec=calcium %>% 
  dplyr::select(iso3, country, region, low_intake_own, origin_mean, birthmean,preec_rate_mean,  rrpreec, redu, pred_full, pred_any_ad,pred_any_ad) %>% 
  mutate(
         birthmean=as.numeric(birthmean),
        health_prec=low_intake_own * origin_mean * birthmean * preec_rate_mean* (1-rrpreec)*(pred_full + 0*redu)
  ) %>% 
  dplyr::select(iso3, country, region,health_prec, low_intake_own, origin_mean, birthmean,preec_rate_mean,  rrpreec, pred_full, pred_any_ad,pred_any_ad, redu)

datatable(
data = imm_prec,
caption = "Table 9: Immediate health benefit for preeclampsia",
filter = "top",
style = "bootstrap4"
)
immprec_plot=world %>% 
  left_join(imm_prec, by = c("CODE" = "iso3"))

plot_ly(immprec_plot, type="choropleth", locations = immprec_plot$CODE, z=immprec_plot$health_prec, text=immprec_plot$COUNTRY, colorscale="Red") %>% 
  colorbar(title=list(text="Immediate health benefit",font = list(size = 12)),
           tickfont = list(size = 8),
          len=0.5, x=1, y=0.85, thickness=10) %>% 
  layout(title =list(text= "Immediate Health Benefit for Preeclampsia", y=0.9),
          annotations = list(
      list(
        text = "IHME",
        x = 1,
        y= 0.1,
        xref = "paper",
        yref = "paper",
        showarrow = FALSE,
        font = list(size = 10)
      )))

immprec_plot = world %>% left_join( imm_prec %>% mutate(iso3 = case_when( iso3 == “SSD” ~ “SDS”, iso3 == “PSE” ~ “PSX”, TRUE ~ iso3 )), by = c(“adm0_a3” = “iso3”))

ggplot(immprec_plot) + geom_sf(aes(fill = health_prec)) + scale_fill_gradient(low = “white”, high = “red”) + theme_minimal() + labs(fill = “Immediate health benefit”, title = “Immediate Health Benefit for Preeclampsia”, caption = “IHME”)

 pret_bar= calcium %>% 
   dplyr:: select(country, ptb_rate, region, income_group)
  
  preec_bar= calcium %>% 
    dplyr::select(country, preec_rate_mean, region, income_group)
    
calcium_plot = calcium_plot %>% 
  rename(rate=tfrac_own)

preterm_plot = preterm_plot %>% 
  rename(rate=ptb_rate)

preec_plot = preec_plot %>% 
  rename(rate= preec_rate_mean)

Save all the data for the dashboard: save(country, calciummerge, pop_data, anc, birthnum2024, pret_bar, pret_bir, preec, preec_bar, dhs_full, calcium,imm_prec, imm_pret, calcium_plot, preterm_plot, preec_plot, immprec_plot, immpret_plot, file=“dashboard/immediate.RData”)

Supplementary

Country with missing data

country_left=anti_join(country, calciumNew, by=c("code"="iso3")) 

datatable(
data = country_left,
caption = "Table S1: Birth rate in 2021",
filter = "top",
style = "bootstrap4"
)

The look up table for the iso3 code and country name

unique_country_codes <- country %>% 
  arrange(economy) %>% 
  distinct(code, .keep_all = TRUE) %>% 
  select(code, economy)

unique_preec_country <- preec %>% 
  arrange(country) %>% 
  distinct(country, .keep_all = TRUE) %>% 
  select(country)
  

unique_preec_country$iso3 = countrycode(unique_preec_country$country, "country.name", "iso3c")
country_lookup=unique_country_codes %>% 
  full_join(unique_preec_country, by= c("code" = "iso3"))

datatable(
data = country_lookup,
caption = "Table S2: Country name look up table",
filter = "top",
style = "bootstrap4"
)

The birth rate data

birthrate=read_excel("data/UN_PPP2022_Input_TFR.xlsx", sheet=3, skip = 14)
colnames(birthrate) <- birthrate[1,]


birthrate <- birthrate[-1, ] %>% 
  janitor::clean_names()
birthrate2024= birthrate %>% 
  select(iso3_alpha_code,region_subregion_country_or_area, x2024) %>% 
  drop_na() %>% 
  rename(iso3=iso3_alpha_code, country=region_subregion_country_or_area, rate_2024=x2024) %>% 
  left_join(pop_data, by=c("iso3"="iso3_alpha_code" )) %>% 
  group_by(country, iso3) %>% 
  mutate(total = sum(proportion))

datatable(
data = birthrate2024,
caption = "Table S3: Birth rate in 2024",
filter = "top",
style = "bootstrap4"
)

The new ANC4 data–also updated until the 2021, 15 NA, 162 total

ancnew=read_excel("data/Data 2023-08-07 11-17.xlsx", sheet=2)

latest_recordnew <- ancnew %>%
  janitor::clean_names() %>% 
  group_by(country) %>% 
  arrange(desc(year)) %>% 
  slice_head(n = 5) %>% 
  summarise(
    mean_anc = mean(value_numeric, na.rm = TRUE),
    sd_anc = sd(value_numeric, na.rm = TRUE),
    lower_anc = mean_anc - sd_anc, 
    upper_anc = mean_anc + sd_anc 
  ) %>%
  ungroup()
  

datatable(
data = latest_recordnew,
caption = "Table S4: Latest ANC New record",
filter = "top",
style = "bootstrap4"
)

Still birth(just import)

#stillb = extract_tables("/Users/cuihening/Desktop/harvard/nutrition/data/stillbirth.pdf", pages = c(38, 39,40,41))

dhs other

path=“/Users/hec442/Desktop/harvard/calcium/data/STATcompilerExport2023822_172149.xlsx” dhs=read_excel(path, range = cell_cols(“A:L”), skip = 3) colnames(dhs) <- c(“country”, “year”, “anc0”, “anc1”, “anc23”,“anc4”, “anc8”, “iron”, “no_iron”, “iron60”, “iron60_89”, “iron90”)

dhs <- dhs[-c(1,2,3), ] %>% janitor::clean_names() %>% mutate(year=as.numeric(substr(year,1,4)), across(c(anc4, anc0, anc1, anc23, anc8, no_iron,iron60,iron60_89,iron90), as.numeric))%>% group_by(country) %>% arrange(year) %>% slice_tail(n = 1) %>% filter(year >=2010) %>% mutate(anc4frac=anc4/(anc0 + anc1 + anc23 + anc4), iron60frac=(iron90+iron60_89)/(no_iron+iron60+iron60_89+iron90), ironle60=iron60/(no_iron+iron60+iron60_89+iron90), full_ad=iron60frac/anc4frac, partial_ad=ironle60/anc4frac) %>% drop_na() %>% mutate(iso3=countrycode(country, “country.name”, “iso3c”))

Calculate the regional and world mean sd

dhs_re=dhs %>% left_join(country, by=c(“iso3”=“code”)) %>% group_by(region) %>% summarize(meanfullad=mean(full_ad), meanpartialad=mean(partial_ad), sdfullad=sd(full_ad), sdpartialad=sd(partial_ad))

global_mean_fullad <- mean(dhs\(full_ad, na.rm = TRUE) global_mean_partialad <- mean(dhs\)partial_ad, na.rm = TRUE) global_sdfullad <- sd(dhs\(full_ad, na.rm = TRUE) global_sdpartialad <- sd(dhs\)partial_ad, na.rm = TRUE)

Generate the beta distribution for each region

\[if\ var <\mu(1-\mu), \alpha=\mu(\frac{\mu(1-\mu)}{var}-1), \beta=\alpha\frac{1-\mu}{\mu}\]

var_full = global_sdfullad^2 var_partial = global_sdpartialad^2

check var< u*(1-u)

if (var_full < global_mean_fullad * (1 - global_mean_fullad)) { print(“full anc condition complete”) } else { print(“full anc condition not complete”) }

if (var_partial < global_mean_partialad * (1 - global_mean_partialad)) { print(“partial anc condition complete”) } else { print(“partial anc condition not complete”) }

both complete the condition, calculate based on the formula alpha_full <- ((1 - global_mean_fullad) / var_full - 1 / global_mean_fullad) * global_mean_fullad^2 beta_full <- alpha_full * ((1 / global_mean_fullad) - 1)

dhs_full_dis=qbeta(lhs, alpha_full, beta_full)

alpha_partial <- ((1 - global_mean_partialad) / var_partial - 1 / global_mean_partialad) * global_mean_partialad^2 beta_partial <- alpha_partial * ((1 / global_mean_partialad) - 1)

dhs_partial_dis=qbeta(lhs, alpha_partial, beta_partial)

Assign the h to each country Take the global mean for each country

dhs_full = dhs %>% dplyr::select(iso3,full_ad,partial_ad)

dhs_full = dhs_full %>% right_join(calciummerge)

dhs_full\(full_ad = global_mean_fullad dhs_full\)partial_ad=global_mean_partialad

datatable( data = dhs_full, caption = “Table 7: Intervention uptake”, filter = “top”, style = “bootstrap4” )