This document contains the dataset needed and calculation for the calcium intake and immediate health benefit. It contains the number of averted cases of preeclampsia and preterm birth
Library and settings
library(tidyverse)
library(readxl)
library(nutriR)
library(ggplot2)
#library(tabulizer)
library(rnaturalearth)
library(sf)
library(DT)
library(lhs)
library(stringr)
library(countrycode)
library(plotly)
country=read_excel("data/CLASS.xlsx", sheet=1) %>% janitor::clean_names()
datatable(
data = country,
caption = "Table 1: country code",
filter = "top",
style = "bootstrap4"
)
Calculate the calcium intake gamma distribution shape for each region
calcium_distr=get_dists(nutrients="Calcium" , sexes="F", ages=15:50 ) %>%
drop_na() %>%
left_join(country, by=c("iso3"="code")) %>%
group_by(region, age_group) %>%
summarize(mean_shape=mean(g_shape))
calcium_world=calcium_distr %>%
group_by(age_group) %>%
summarize(means=mean(mean_shape))
Calculate the proportion of people take insufficient calcuim
calciumNew=read.csv(file="data/v36_cnty.csv") %>%
filter(age>=17.5 & age<=47.5 & female=="1" & urban=="999" & edu=="999" & year=="2018") %>%
rowwise() %>%
left_join(country, by=c("iso3"="code")) %>%
mutate(
age_group = case_when(
age == 17.5 ~ "15-19",
age == 22.5 ~ "20-24",
age == 27.5 ~ "25-29",
age == 32.5 ~ "30-34",
age == 37.5 ~ "35-39",
age == 42.5 ~ "40-44",
age == 47.5 ~ "45-49",
TRUE ~ as.character(age)
)
) %>%
left_join(calcium_distr, by=c("region"="region", "age_group"="age_group") ) %>%
mutate(
mean_shape = case_when(
is.na(mean_shape) & age_group == "15-19" ~ calcium_world$means[calcium_world$age_group == "15-19"],
is.na(mean_shape) & age_group == "20-24" ~ calcium_world$means[calcium_world$age_group == "20-24"],
is.na(mean_shape) & age_group == "25-29" ~ calcium_world$means[calcium_world$age_group == "25-29"],
is.na(mean_shape) & age_group == "30-34" ~ calcium_world$means[calcium_world$age_group == "30-34"],
is.na(mean_shape) & age_group == "35-39" ~ calcium_world$means[calcium_world$age_group == "35-39"],
is.na(mean_shape) & age_group == "40-44" ~ calcium_world$means[calcium_world$age_group == "40-44"],
is.na(mean_shape) & age_group == "45-49" ~ calcium_world$means[calcium_world$age_group == "45-49"],
TRUE ~ mean_shape
),
grate=mean_shape/median
) %>%
rowwise() %>%
mutate(pro_low=pgamma(800, shape = mean_shape, rate = grate)
)
calciumNew$sev_result = NA # Create a new column to store the SEV results
for (i in seq_len(nrow(calciumNew))) {
calciumNew$sev_result[i] = sev(
ear=800,
cv=0.1,
shape=calciumNew$mean_shape[i],
rate=calciumNew$grate[i]
)
}
datatable(
data = calciumNew,
caption = "Table 2-1: Calcium data",
filter = "top",
style = "bootstrap4"
)
The age population distribution
file_path = "data/WPP2022_POP_F02_3_POPULATION_5-YEAR_AGE_GROUPS_FEMALE.xlsx"
pop = read_excel(file_path, skip=15)
colnames(pop) = pop[1,]
pop_data = pop[-1, ] %>%
janitor::clean_names() %>%
filter(year=="2021") %>%
rename_with(~ gsub("x(\\d+)_", "\\1-", .), starts_with("x")) %>%
dplyr::select(iso3_alpha_code, "0-4":x100) %>%
mutate(across("0-4":x100, as.numeric)) %>%
drop_na() %>%
rowwise() %>%
mutate(total = sum(c_across("15-19":"45-49")),
across("15-19":"45-49", ~ . / total, .names = "proportion_{.col}")) %>%
dplyr:: select(iso3_alpha_code, starts_with("proportion_")) %>%
pivot_longer(cols = starts_with("proportion_"), names_to = "age_group", values_to = "proportion", names_prefix = "proportion_")
datatable(
data = pop_data,
caption = "Table 2-2: Population distribution",
filter = "top",
style = "bootstrap4"
)
Joint the calcium with the population data to calculate
calciummerge=calciumNew %>%
left_join(pop_data, by=c("iso3"="iso3_alpha_code", "age_group"="age_group")) %>%
mutate(frac_sev=sev_result*proportion*0.01,
frac_own=pro_low*proportion,) %>%
group_by(iso3, economy) %>%
summarize(tfrac_sev=sum(frac_sev),
tfrac_own=sum(frac_own)) %>%
mutate(
low_intake_sev= case_when(
tfrac_sev>= 0.25 ~ 1,
TRUE ~ 0
),
low_intake_own= case_when(
tfrac_own>= 0.25 ~ 1,
TRUE ~ 0
)) %>%
left_join(country, by=c("iso3"="code")) %>%
dplyr::select(-economy.x) %>%
rename(country=economy.y)
datatable(
data = calciummerge,
caption = "Table 2-3: Insufficient calcium intake",
filter = "top",
style = "bootstrap4"
)
Use the lastest UNICEF data –up to 2021, 24 NA, 192 total. The coverage year is from 2015 to 2020.
Anc calculation
For Mean
–The country with record –Take its own mean –done
–The country without record – Take the regional mean—done
For Sd
–The country with >= 3 records – take its own sd –done
–The country with < 3 records – take the mean of all the country sd—done
–The country without recored – Take the regional sd – done
path="data/fusion_GLOBAL_DATAFLOW_UNICEF_1.0_.MNCH_ANC4...csv"
ancf=read.csv(file=path) %>%
janitor::clean_names() %>%
group_by(ref_area_geographic_area) %>%
arrange(desc(time_period_time_period)) %>%
slice_head(n = 5) %>%
filter(time_period_time_period>=2015) %>%
mutate(obs_value_observation_value = obs_value_observation_value*0.01) %>%
separate(ref_area_geographic_area, into = c("code", "country"), sep = ": ") %>%
mutate(code = str_trim(code),
country = str_trim(country))
anc_region=
ancf%>%
left_join(country) %>%
group_by(region) %>%
summarise(
mean_region = mean(obs_value_observation_value, na.rm = TRUE),
sd_region =sd(obs_value_observation_value, na.rm=TRUE)
)
avg_sd = ancf %>%
group_by(country, code) %>%
filter(n() >= 3) %>%
summarise(sd = sd(obs_value_observation_value, na.rm = TRUE)) %>%
pull(sd) %>%
mean(na.rm = TRUE)
anc_summary = ancf %>%
group_by(country, code) %>%
summarise(
origin_mean = mean(obs_value_observation_value, na.rm = TRUE),
origin_sd = if_else(n() < 3, avg_sd, sd(obs_value_observation_value, na.rm = TRUE))
)
global_mean_anc <- mean(anc_region$mean_region, na.rm = TRUE)
global_sd_anc <- sd(anc_region$mean_region, na.rm = TRUE)
new_row_anc <- data.frame(
region = "North America",
mean_region = global_mean_anc,
sd_region = global_sd_anc
)
anc_region<- rbind(anc_region, new_row_anc)
anc=anc_summary %>%
right_join(calciummerge, by=c("code"="iso3")) %>%
left_join(anc_region, by=c("region"="region")) %>%
mutate(
origin_mean = ifelse(is.na(origin_mean), mean_region,origin_mean),
origin_sd = ifelse(is.na(origin_sd), sd_region,origin_sd)) %>%
ungroup () %>%
dplyr::select( code, country.y, origin_mean, origin_sd) %>%
rename(country=country.y)
datatable(
data = anc,
caption = "Table 3: ANC",
filter = "top",
style = "bootstrap4"
)
plot to check the distribution
plot(density(anc$origin_mean))
The number of live birth(in 1000) of 2024 with interval
birthlower=read_excel("data/UN_PPP2022_Output_Births.xlsx",sheet=1, skip = 14)
birthmean=read_excel("data/UN_PPP2022_Output_Births.xlsx",sheet=3, skip = 14)
birthupper=read_excel("data/UN_PPP2022_Output_Births.xlsx",sheet=5, skip = 14)
colnames(birthlower) <- birthlower[1,]
colnames(birthmean) <- birthmean[1,]
colnames(birthupper) <- birthupper[1,]
birthlower <- birthlower[-1, ] %>%
janitor::clean_names()
birthmean <- birthmean[-1, ] %>%
janitor::clean_names()
birthupper <- birthupper[-1, ] %>%
janitor::clean_names()
birthnum= birthlower %>%
rbind(birthmean) %>%
rbind(birthupper)
birthnum2024= birthnum %>%
dplyr::select(variant, iso3_alpha_code,region_subregion_country_or_area, x2024) %>%
drop_na() %>%
pivot_wider(names_from = variant, values_from = x2024) %>%
janitor::clean_names() %>%
rename(iso3=iso3_alpha_code, country=region_subregion_country_or_area, birthmean=median_pi, birthupper=upper_95_pi, birthlower=lower_95_pi)
datatable(
data = birthnum2024,
caption = "Table 4: Birth number(per 1000) in 2024",
filter = "top",
style = "bootstrap4"
)
Check the preterm birth
pret_bir=read.csv("/Users/hec442/Desktop/harvard/calcium/data/PTBcountryEstimates - pCCFullModelP_dataType1DQC1_16000_Estimates_1.csv") %>%
filter(year=="2020") %>%
janitor::clean_names() %>%
dplyr:: select(iso, est_l,est,est_u) %>%
rename(iso3=iso,
ptb_rate= est,
ptb_low=est_l,
ptb_upp=est_u) %>%
mutate(across(ptb_low:ptb_upp, ~.*0.001))
datatable(
data = pret_bir,
caption = "Table 5: Preterm birth rate (per live birth)",
filter = "top",
style = "bootstrap4"
)
The preeclampsia per live birth with interval, the live birth number is taken from the IHME
preecdf=read.csv("data/IHME-GBD_2019_DATA-759d3379-1/IHME-GBD_2019_DATA-759d3379-1.csv") %>%
janitor::clean_names() %>%
filter(measure_id==6)
birth_preec=read.csv("data/IHME_GBD_2019_FERTILITY_1950_2019_LB_Y2020M08D05.CSV") %>%
janitor::clean_names() %>%
filter(year_id==2019)%>%
rename(birth_num=val) %>%
dplyr::select(location_id, location_name, age_group_id, birth_num)
preec=preecdf %>%
rename(country = location_name) %>%
dplyr::select("country", location_id, age_id, age_name, val, upper, lower) %>%
left_join(birth_preec, by=c("location_id"="location_id", "age_id"="age_group_id")) %>%
filter(age_id!=22) %>%
mutate(
birth_num=as.numeric(birth_num),
across(val:lower, ~ . / birth_num, .names = "precrate_{.col}")) %>%
group_by(country) %>%
summarize(preec_rate_mean=mean(precrate_val),
preec_rate_upp=mean(precrate_upper),
preec_rate_low=mean(precrate_lower)) %>%
dplyr::select(country, preec_rate_mean,preec_rate_upp, preec_rate_low) %>%
mutate(iso3 = countrycode(country, "country.name", "iso3c"))
datatable(
data = preec,
caption = "Table 6: Prevalence of preeclampsia in 2020",
filter = "top",
style = "bootstrap4"
)
The risk ration of preterm birth and preeclampsia. For each risk ratio, a agmma distribution was generated based on the mean and CI, the sample size is both 185(align with the calcium data).
Create a function to create the gamma distribution:
gammapar <- function(tgt) {
tgt <- as.numeric(tgt)
mn <- tgt[1]; cir <- (tgt[3]-tgt[2])
xopt <- function(b,mn,cir) {
cir2 <- qgamma(c(1,39)/40,mn*b,b);
cir2 <- cir2[2]-cir2[1]
(cir2-cir)^2 }
zz <- optimize(xopt,c(0.1,100000),mn=mn,cir=cir)$minimum
c(zz*mn,zz) }
Generate the sample with LHS
lhs <- randomLHS(100000, 1)
rrpret_para= gammapar(c(0.76,0.6,0.97))
print(paste("The preterm birth RR has mean and ci:", toString(qgamma(c(20,1,39)/40,rrpret_para[1],rrpret_para[2]))))
## [1] "The preterm birth RR has mean and ci: 0.756085396542927, 0.586120989136937, 0.956121004703605"
rrpret=qgamma(lhs, rrpret_para[1],rrpret_para[2])
rrpreec_para= gammapar(c(0.45,0.31, 0.65))
print(paste("The preeclampsia RR has mean and ci:", toString(qgamma(c(20,1,39)/40, rrpreec_para[1],rrpreec_para[2]))))
## [1] "The preeclampsia RR has mean and ci: 0.44440340274776, 0.295894845160828, 0.635894902723988"
rrpreec=qgamma(lhs, rrpreec_para[1],rrpreec_para[2])
Import the data and calculate the fraction of anc and iron again
dhs = read.csv(file="data/STATcompilerExport2023104_14201.csv") %>%
janitor::clean_names() %>%
mutate(iso3=countrycode(country, "country.name", "iso3c"),
year = as.numeric(substr(survey,1,4))) %>%
filter(!is.na(apply(.[,4:8],1,sum)) & year>2009)
uhc_n = read.csv(file = "data/uhc_coverage_index_who.csv") %>%
janitor::clean_names() %>%
filter(period=="2021") %>%
dplyr::select(spatial_dim_value_code, fact_value_numeric) %>%
rename(iso3=spatial_dim_value_code,
uhc_value=fact_value_numeric)
Calculate the full and partial adherence
Calculate varables for:
1 full_ad 60+ days
2 partial_ad 1-59 days
3 any_ad 1+ days
4 full_if_any full as a fraction of any
dhs_uhc= dhs %>%
left_join(uhc_n) %>%
mutate(full_ad= (took_iron_0_90p+took_iron_60_89)/anc_received_iron,
partial_ad = took_iron_1_59/anc_received_iron,
any_ad = partial_ad+ full_ad,
full_if_any = full_ad/any_ad)
Fit the regression
fit_any <- glm(any_ad ~I(scale(uhc_value))+I(year-2015),data=dhs_uhc,family=gaussian("logit"))
fit_full_if_any <- glm(full_if_any~I(scale(uhc_value))+I(year-2015),data=dhs_uhc,family=gaussian("logit"))
summary(fit_any)
##
## Call:
## glm(formula = any_ad ~ I(scale(uhc_value)) + I(year - 2015),
## family = gaussian("logit"), data = dhs_uhc)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.43505 0.10817 22.511 <2e-16 ***
## I(scale(uhc_value)) 0.15490 0.11413 1.357 0.178
## I(year - 2015) -0.04801 0.03522 -1.363 0.176
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.005653273)
##
## Null deviance: 0.55295 on 95 degrees of freedom
## Residual deviance: 0.52573 on 93 degrees of freedom
## AIC: -219.47
##
## Number of Fisher Scoring iterations: 6
summary(fit_full_if_any)
##
## Call:
## glm(formula = full_if_any ~ I(scale(uhc_value)) + I(year - 2015),
## family = gaussian("logit"), data = dhs_uhc)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.26625 0.10446 2.549 0.0124 *
## I(scale(uhc_value)) 0.21861 0.10344 2.113 0.0372 *
## I(year - 2015) 0.05905 0.03506 1.684 0.0955 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.05665208)
##
## Null deviance: 5.6433 on 95 degrees of freedom
## Residual deviance: 5.2687 on 93 degrees of freedom
## AIC: 1.7896
##
## Number of Fisher Scoring iterations: 4
Add predicted value of adherence with year 2022 and uhc
dhs_full = calciummerge %>%
dplyr::select(country, iso3) %>%
left_join(uhc_n) %>%
mutate(year=2022)
dhs_full$pred_any_ad <- predict(fit_any,newdata=dhs_full,type="response")
dhs_full$pred_full_if_any <- predict(fit_full_if_any,newdata=dhs_full,type="response")
dhs_full$pred_full <- dhs_full$pred_any_ad*dhs_full$pred_full_if_any
datatable(
data = dhs_full,
caption = "Table 7: Intervention uptake ",
filter = "top",
style = "bootstrap4"
)
Some plot
plot(dhs_full$uhc_value,dhs_full$pred_any_ad,ylim=0:1)
points(dhs_full$uhc_value,dhs_full$pred_full_if_any,col=2,pch=16)
points(dhs_full$uhc_value,dhs_full$pred_full,col=4,pch=16)
calcium=calciummerge %>%
left_join(anc, by=c("iso3"="code")) %>%
left_join(birthnum2024, by=c("iso3"="iso3")) %>%
dplyr::select(iso3, country, region, tfrac_own, low_intake_own, origin_mean, origin_sd, birthlower, birthmean, birthupper, income_group) %>%
left_join(pret_bir, by=c("iso3"="iso3")) %>%
left_join(preec, by=c("iso3"="iso3")) %>%
left_join(dhs_full, by=c("iso3"="iso3")) %>%
dplyr::select(-country.x, -country.y, -year)
calcium$rrpret=0.76
calcium$rrpreec=0.45
# add the j_i
calcium$redu=0.5
calcium$redu_up=0.75
calcium$redu_low=0.25
summary(calcium)
## iso3 region tfrac_own low_intake_own
## Length:185 Length:185 Min. :0.000204 Min. :0.0000
## Class :character Class :character 1st Qu.:0.560989 1st Qu.:1.0000
## Mode :character Mode :character Median :0.824526 Median :1.0000
## Mean :0.738262 Mean :0.9514
## 3rd Qu.:0.957759 3rd Qu.:1.0000
## Max. :1.000000 Max. :1.0000
##
## origin_mean origin_sd birthlower birthmean
## Min. :0.2065 Min. :0.002986 Length:185 Length:185
## 1st Qu.:0.6490 1st Qu.:0.031751 Class :character Class :character
## Median :0.8070 Median :0.053563 Mode :character Mode :character
## Mean :0.7719 Mean :0.070588
## 3rd Qu.:0.9023 3rd Qu.:0.090681
## Max. :0.9990 Max. :0.234572
##
## birthupper income_group ptb_low ptb_rate
## Length:185 Length:185 Min. :0.001626 Min. :0.004101
## Class :character Class :character 1st Qu.:0.005295 1st Qu.:0.007159
## Mode :character Mode :character Median :0.005547 Median :0.008448
## Mean :0.005943 Mean :0.008415
## 3rd Qu.:0.006721 3rd Qu.:0.009303
## Max. :0.011764 Max. :0.016164
## NA's :1 NA's :1
## ptb_upp preec_rate_mean preec_rate_upp preec_rate_low
## Min. :0.004433 Min. :0.02313 Min. :0.02907 Min. :0.01799
## 1st Qu.:0.008428 1st Qu.:0.09909 1st Qu.:0.13717 1st Qu.:0.06421
## Median :0.011949 Median :0.14466 Median :0.19993 Median :0.09702
## Mean :0.012277 Mean :0.15124 Mean :0.19867 Mean :0.10738
## 3rd Qu.:0.014959 3rd Qu.:0.21338 3rd Qu.:0.27213 3rd Qu.:0.15232
## Max. :0.039088 Max. :0.26969 Max. :0.33137 Max. :0.22795
## NA's :1
## country uhc_value pred_any_ad pred_full_if_any
## Length:185 Min. :29.40 Min. :0.8526 Min. :0.5484
## Class :character 1st Qu.:51.73 1st Qu.:0.8774 1st Qu.:0.6213
## Mode :character Median :69.45 Median :0.8945 Median :0.6756
## Mean :65.44 Mean :0.8899 Mean :0.6620
## 3rd Qu.:78.55 3rd Qu.:0.9024 3rd Qu.:0.7019
## Max. :91.04 Max. :0.9124 Max. :0.7358
## NA's :2 NA's :2 NA's :2
## pred_full rrpret rrpreec redu redu_up
## Min. :0.4676 Min. :0.76 Min. :0.45 Min. :0.5 Min. :0.75
## 1st Qu.:0.5451 1st Qu.:0.76 1st Qu.:0.45 1st Qu.:0.5 1st Qu.:0.75
## Median :0.6043 Median :0.76 Median :0.45 Median :0.5 Median :0.75
## Mean :0.5899 Mean :0.76 Mean :0.45 Mean :0.5 Mean :0.75
## 3rd Qu.:0.6334 3rd Qu.:0.76 3rd Qu.:0.45 3rd Qu.:0.5 3rd Qu.:0.75
## Max. :0.6714 Max. :0.76 Max. :0.45 Max. :0.5 Max. :0.75
## NA's :2
## redu_low
## Min. :0.25
## 1st Qu.:0.25
## Median :0.25
## Mean :0.25
## 3rd Qu.:0.25
## Max. :0.25
##
datatable(
data = calcium,
caption = "Table 7: Insufficient calcium intake worldwide",
filter = "top",
style = "bootstrap4"
)
regions = unique(calciummerge$region)
for (region in regions) {
region_data = calciummerge %>%
filter(region == !!region)
p <- ggplot(region_data, aes(x = reorder(country, -tfrac_own), y = tfrac_own, fill = income_group)) +
geom_bar(stat = "identity", alpha = 0.8) +
geom_hline(yintercept = 0.25, linetype = "dashed", color = "red") +
coord_flip() +
xlab('Country') +
ylab('Fraction') +
ggtitle(paste('Insufficient Fraction in', region)) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(p)
}
world= read.csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
calcium_plot=world %>%
left_join(calciummerge, by = c("CODE" = "iso3"))
plot_ly(calcium_plot, type="choropleth", locations = calcium_plot$CODE, z=calcium_plot$tfrac_own, text=calcium_plot$COUNTRY, colorscale="Red") %>%
colorbar(title=list(text="Insufficent calcium intake fraction",font = list(size = 12)),
tickfont = list(size = 8),
len=0.5, x=1, y=0.85, thickness=10) %>%
layout(title =list(text= "Total calcium insufficent intake by Country", y=0.9),
annotations = list(
list(
text = "Global Dietary Database Results",
x = 1,
y= 0.15,
xref = "paper",
yref = "paper",
showarrow = FALSE,
font = list(size = 10)
)))
preterm_plot=world %>%
left_join(pret_bir, by = c("CODE" = "iso3"))
plot_ly(preterm_plot, type="choropleth", locations = preterm_plot$CODE, z=preterm_plot$ptb_rate, text=preterm_plot$COUNTRY, colorscale="Red") %>%
colorbar(title=list(text="Preterm Birth Rate",font = list(size = 12)),
tickfont = list(size = 8),
len=0.5, x=1, y=0.85, thickness=10) %>%
layout(title =list(text= "Worldwide preterm birth rate", y=0.9),
annotations = list(
list(
text = "Global, regional, and national estimates of levels of preterm birth in 2014",
x = 1,
y= 0.1,
xref = "paper",
yref = "paper",
showarrow = FALSE,
font = list(size = 10)
)))
preterm_plot = world %>% left_join( pret_bir %>% mutate(iso3 = case_when( iso3 == “SSD” ~ “SDS”, iso3 == “PSE” ~ “PSX”, TRUE ~ iso3 )), by = c(“adm0_a3” = “iso3”) )
ggplot(preterm_plot) + geom_sf(aes(fill = ptb_rate)) + scale_fill_gradient(low = “white”, high = “red”, na.value = “grey50”) + theme_minimal() + labs(fill = “Preterm Birth Rate”, title = “Worldwide preterm birth rate”, caption = “Global, regional, and national estimates of levels of preterm birth in 2014”)
preec_plot=world %>%
left_join(preec, by = c("CODE" = "iso3"))
plot_ly(preec_plot, type="choropleth", locations = preec_plot$CODE, z=preec_plot$preec_rate_mean, text=preec_plot$COUNTRY, colorscale="Red") %>%
colorbar(title=list(text="Total preeclampsia",font = list(size = 12)),
tickfont = list(size = 8),
len=0.5, x=1, y=0.85, thickness=10) %>%
layout(title =list(text= "Preeclampsia Probability of Pregnant Women by Country", y=0.9),
annotations = list(
list(
text = "IHME",
x = 1,
y= 0.1,
xref = "paper",
yref = "paper",
showarrow = FALSE,
font = list(size = 10)
)))
preec_plot <- world %>% left_join( preec %>% mutate(iso3 = case_when( iso3 == “SSD” ~ “SDS”, iso3 == “PSE” ~ “PSX”, TRUE ~ iso3 )), by = c(“adm0_a3” = “iso3”))
ggplot(preec_plot) + geom_sf(aes(fill = preec_rate_mean)) + scale_fill_gradient(low = “white”, high = “red”) + theme_minimal() + labs(fill = “Total preeclampsia”, title = “Total preeclampsia by Country”, caption = “Global Burden of Disease Study 2019 (GBD 2019) Results”)
\[k_i= a_i*b_i*c_i*d_i*(1-f)*(h_i^{full}+h_i^{partial}*j)\]
imm_pret=calcium %>%
dplyr::select(iso3, country, region, low_intake_own, origin_mean, birthmean, ptb_rate, rrpret, redu, pred_full, pred_any_ad,pred_any_ad) %>%
mutate(
birthmean=as.numeric(birthmean),
health_pret=low_intake_own * origin_mean * birthmean * ptb_rate* (1-rrpret)*(pred_full + 0*redu)
) %>%
dplyr::select(iso3, country, region,health_pret, origin_mean, low_intake_own, birthmean, ptb_rate, rrpret, pred_full, pred_any_ad,pred_any_ad, redu)
datatable(
data = imm_pret,
caption = "Table 8: Immediate health benefit of preterm birth",
filter = "top",
style = "bootstrap4"
)
immpret_plot=world %>%
left_join(imm_pret, by = c("CODE" = "iso3"))
plot_ly(immpret_plot, type="choropleth", locations = immpret_plot$CODE, z=immpret_plot$health_pret, text=immpret_plot$COUNTRY, colorscale="Red") %>%
colorbar(title=list(text="Immediate health benefit",font = list(size = 12)),
tickfont = list(size = 8),
len=0.5, x=1, y=0.85, thickness=10) %>%
layout(title =list(text= "Immediate Health Benefit for Preterm Birth", y=0.9),
annotations = list(
list(
text = "Global, regional, and national estimates of levels of preterm birth in 2014",
x = 1,
y= 0.1,
xref = "paper",
yref = "paper",
showarrow = FALSE,
font = list(size = 10)
)))
immpret_plot = world %>% left_join( imm_pret %>% mutate(iso3 = case_when( iso3 == “SSD” ~ “SDS”, iso3 == “PSE” ~ “PSX”, TRUE ~ iso3 )), by = c(“adm0_a3” = “iso3”))
ggplot(immpret_plot) + geom_sf(aes(fill = health_pret)) + scale_fill_gradient(low = “white”, high = “red”) + theme_minimal() + labs(fill = “Immediate health benefit”, title = “Immediate Health Benefit for Preterm Birth”, caption = “Global, regional, and national estimates of levels of preterm birth in 2014”)
\[l_i= a_i*b_i*c_i*e_i*(1-g)*(h_i^{full}+h_i^{partial}*j)\]
imm_prec=calcium %>%
dplyr::select(iso3, country, region, low_intake_own, origin_mean, birthmean,preec_rate_mean, rrpreec, redu, pred_full, pred_any_ad,pred_any_ad) %>%
mutate(
birthmean=as.numeric(birthmean),
health_prec=low_intake_own * origin_mean * birthmean * preec_rate_mean* (1-rrpreec)*(pred_full + 0*redu)
) %>%
dplyr::select(iso3, country, region,health_prec, low_intake_own, origin_mean, birthmean,preec_rate_mean, rrpreec, pred_full, pred_any_ad,pred_any_ad, redu)
datatable(
data = imm_prec,
caption = "Table 9: Immediate health benefit for preeclampsia",
filter = "top",
style = "bootstrap4"
)
immprec_plot=world %>%
left_join(imm_prec, by = c("CODE" = "iso3"))
plot_ly(immprec_plot, type="choropleth", locations = immprec_plot$CODE, z=immprec_plot$health_prec, text=immprec_plot$COUNTRY, colorscale="Red") %>%
colorbar(title=list(text="Immediate health benefit",font = list(size = 12)),
tickfont = list(size = 8),
len=0.5, x=1, y=0.85, thickness=10) %>%
layout(title =list(text= "Immediate Health Benefit for Preeclampsia", y=0.9),
annotations = list(
list(
text = "IHME",
x = 1,
y= 0.1,
xref = "paper",
yref = "paper",
showarrow = FALSE,
font = list(size = 10)
)))
immprec_plot = world %>% left_join( imm_prec %>% mutate(iso3 = case_when( iso3 == “SSD” ~ “SDS”, iso3 == “PSE” ~ “PSX”, TRUE ~ iso3 )), by = c(“adm0_a3” = “iso3”))
ggplot(immprec_plot) + geom_sf(aes(fill = health_prec)) + scale_fill_gradient(low = “white”, high = “red”) + theme_minimal() + labs(fill = “Immediate health benefit”, title = “Immediate Health Benefit for Preeclampsia”, caption = “IHME”)
pret_bar= calcium %>%
dplyr:: select(country, ptb_rate, region, income_group)
preec_bar= calcium %>%
dplyr::select(country, preec_rate_mean, region, income_group)
calcium_plot = calcium_plot %>%
rename(rate=tfrac_own)
preterm_plot = preterm_plot %>%
rename(rate=ptb_rate)
preec_plot = preec_plot %>%
rename(rate= preec_rate_mean)
Save all the data for the dashboard: save(country, calciummerge, pop_data, anc, birthnum2024, pret_bar, pret_bir, preec, preec_bar, dhs_full, calcium,imm_prec, imm_pret, calcium_plot, preterm_plot, preec_plot, immprec_plot, immpret_plot, file=“dashboard/immediate.RData”)
country_left=anti_join(country, calciumNew, by=c("code"="iso3"))
datatable(
data = country_left,
caption = "Table S1: Birth rate in 2021",
filter = "top",
style = "bootstrap4"
)
unique_country_codes <- country %>%
arrange(economy) %>%
distinct(code, .keep_all = TRUE) %>%
select(code, economy)
unique_preec_country <- preec %>%
arrange(country) %>%
distinct(country, .keep_all = TRUE) %>%
select(country)
unique_preec_country$iso3 = countrycode(unique_preec_country$country, "country.name", "iso3c")
country_lookup=unique_country_codes %>%
full_join(unique_preec_country, by= c("code" = "iso3"))
datatable(
data = country_lookup,
caption = "Table S2: Country name look up table",
filter = "top",
style = "bootstrap4"
)
birthrate=read_excel("data/UN_PPP2022_Input_TFR.xlsx", sheet=3, skip = 14)
colnames(birthrate) <- birthrate[1,]
birthrate <- birthrate[-1, ] %>%
janitor::clean_names()
birthrate2024= birthrate %>%
select(iso3_alpha_code,region_subregion_country_or_area, x2024) %>%
drop_na() %>%
rename(iso3=iso3_alpha_code, country=region_subregion_country_or_area, rate_2024=x2024) %>%
left_join(pop_data, by=c("iso3"="iso3_alpha_code" )) %>%
group_by(country, iso3) %>%
mutate(total = sum(proportion))
datatable(
data = birthrate2024,
caption = "Table S3: Birth rate in 2024",
filter = "top",
style = "bootstrap4"
)
ancnew=read_excel("data/Data 2023-08-07 11-17.xlsx", sheet=2)
latest_recordnew <- ancnew %>%
janitor::clean_names() %>%
group_by(country) %>%
arrange(desc(year)) %>%
slice_head(n = 5) %>%
summarise(
mean_anc = mean(value_numeric, na.rm = TRUE),
sd_anc = sd(value_numeric, na.rm = TRUE),
lower_anc = mean_anc - sd_anc,
upper_anc = mean_anc + sd_anc
) %>%
ungroup()
datatable(
data = latest_recordnew,
caption = "Table S4: Latest ANC New record",
filter = "top",
style = "bootstrap4"
)
#stillb = extract_tables("/Users/cuihening/Desktop/harvard/nutrition/data/stillbirth.pdf", pages = c(38, 39,40,41))
path=“/Users/hec442/Desktop/harvard/calcium/data/STATcompilerExport2023822_172149.xlsx” dhs=read_excel(path, range = cell_cols(“A:L”), skip = 3) colnames(dhs) <- c(“country”, “year”, “anc0”, “anc1”, “anc23”,“anc4”, “anc8”, “iron”, “no_iron”, “iron60”, “iron60_89”, “iron90”)
dhs <- dhs[-c(1,2,3), ] %>% janitor::clean_names() %>% mutate(year=as.numeric(substr(year,1,4)), across(c(anc4, anc0, anc1, anc23, anc8, no_iron,iron60,iron60_89,iron90), as.numeric))%>% group_by(country) %>% arrange(year) %>% slice_tail(n = 1) %>% filter(year >=2010) %>% mutate(anc4frac=anc4/(anc0 + anc1 + anc23 + anc4), iron60frac=(iron90+iron60_89)/(no_iron+iron60+iron60_89+iron90), ironle60=iron60/(no_iron+iron60+iron60_89+iron90), full_ad=iron60frac/anc4frac, partial_ad=ironle60/anc4frac) %>% drop_na() %>% mutate(iso3=countrycode(country, “country.name”, “iso3c”))
Calculate the regional and world mean sd
dhs_re=dhs %>% left_join(country, by=c(“iso3”=“code”)) %>% group_by(region) %>% summarize(meanfullad=mean(full_ad), meanpartialad=mean(partial_ad), sdfullad=sd(full_ad), sdpartialad=sd(partial_ad))
global_mean_fullad <- mean(dhs\(full_ad, na.rm = TRUE) global_mean_partialad <- mean(dhs\)partial_ad, na.rm = TRUE) global_sdfullad <- sd(dhs\(full_ad, na.rm = TRUE) global_sdpartialad <- sd(dhs\)partial_ad, na.rm = TRUE)
Generate the beta distribution for each region
\[if\ var <\mu(1-\mu), \alpha=\mu(\frac{\mu(1-\mu)}{var}-1), \beta=\alpha\frac{1-\mu}{\mu}\]
var_full = global_sdfullad^2 var_partial = global_sdpartialad^2
check var< u*(1-u)
if (var_full < global_mean_fullad * (1 - global_mean_fullad)) { print(“full anc condition complete”) } else { print(“full anc condition not complete”) }
if (var_partial < global_mean_partialad * (1 - global_mean_partialad)) { print(“partial anc condition complete”) } else { print(“partial anc condition not complete”) }
both complete the condition, calculate based on the formula alpha_full <- ((1 - global_mean_fullad) / var_full - 1 / global_mean_fullad) * global_mean_fullad^2 beta_full <- alpha_full * ((1 / global_mean_fullad) - 1)
dhs_full_dis=qbeta(lhs, alpha_full, beta_full)
alpha_partial <- ((1 - global_mean_partialad) / var_partial - 1 / global_mean_partialad) * global_mean_partialad^2 beta_partial <- alpha_partial * ((1 / global_mean_partialad) - 1)
dhs_partial_dis=qbeta(lhs, alpha_partial, beta_partial)
Assign the h to each country Take the global mean for each country
dhs_full = dhs %>% dplyr::select(iso3,full_ad,partial_ad)
dhs_full = dhs_full %>% right_join(calciummerge)
dhs_full\(full_ad = global_mean_fullad dhs_full\)partial_ad=global_mean_partialad
datatable( data = dhs_full, caption = “Table 7: Intervention uptake”, filter = “top”, style = “bootstrap4” )