#Change ACC #Number of missing in variable “Time to get to water source (hv204)”
table_na_kable_1 =
haiti_all_long %>%
group_by(DHS) %>%
summarise(`Missing Values (NAs)` = round(sum(is.na(hv204) * wt, na.rm = TRUE),2)) %>%
dplyr::ungroup() %>%
kable(format = "html", caption = "Number of Missing Values by Group: Time to get to water source (hv204)") %>%
kableExtra::kable_styling(full_width = FALSE)
table_na_kable_1
| DHS | Missing Values (NAs) |
|---|---|
| DHS4 | 50.73 |
| DHS5 | 150.39 |
| DHS6 | 261.23 |
| DHS7 | 9.58 |
#Change ACC #School attendance”
##Check the possibility to
use the variable “Grade of education at the level of education attended
during current school year” (hv123)
aux =
haiti_all_long %>%
dplyr::mutate(girls_5_15 = if_else((hv219=="Female" | hv219=="female")& (hv105>=5 & hv105<=15),1,0)) %>%
dplyr::mutate(boys_5_15 = if_else((hv219=="Male" | hv219=="male")& (hv105>=5 & hv105<=15),1,0)) %>%
dplyr::mutate(child_5_15 = if_else(hv105>=5 & hv105<=15,1,0)) %>%
dplyr::mutate(att_school = if_else(hv121==0,0,
if_else(hv121==2,1,
if_else(hv121==9,9,NA_real_)))) %>%
dplyr::mutate(girls_5_15_att_school = if_else(girls_5_15==1 & att_school==1,1,if_else(girls_5_15==1 & att_school==0,0, NA_real_))) %>%
dplyr::mutate(boys_5_15_att_school = if_else(boys_5_15==1 & att_school==1,1,if_else(boys_5_15==1 & att_school==0,0, NA_real_))) %>%
dplyr::mutate(child_5_15_att_school = if_else(child_5_15==1 & att_school==1,1,if_else(child_5_15==1 & att_school==0,0, NA_real_))) %>%
dplyr::select(DHS,att_school, ends_with(c("att_school")),wt)
weighted.sd <- function(x, w, na.rm = TRUE) {
if (na.rm) {
na_idx <- is.na(x) | is.na(w)
x <- x[!na_idx]
w <- w[!na_idx]
}
weighted_mean <- sum(w * x) / sum(w)
sqrt(sum(w * (x - weighted_mean)^2 * w) / sum(w))
}
table_na_kable_2 =
aux %>%
group_by(DHS) %>%
summarise("% of child"=round(weighted.mean(child_5_15_att_school, wt, na.rm=T),2),
#"% of child sd"=round(weighted.sd(child_5_15_att_school, wt, na.rm=T),2),
"% of girls"=round(weighted.mean(girls_5_15_att_school, wt, na.rm=T),2),
"% of boys" =round(weighted.mean(boys_5_15_att_school, wt, na.rm=T),2)
) %>%
dplyr::ungroup() %>%
kable(format = "html", caption = "Attending school aged 5-15 (hv121)") %>%
kableExtra::kable_styling(full_width = FALSE)
table_na_kable_2
| DHS | % of child | % of girls | % of boys |
|---|---|---|---|
| DHS4 | 0.32 | 0.40 | 0.26 |
| DHS5 | 0.82 | 0.83 | 0.81 |
| DHS6 | 0.92 | 0.93 | 0.92 |
| DHS7 | 0.92 | 0.92 | 0.91 |
rm(aux)
#Women Employment - HTIR (Women Questionnaire) Variable v731: “Worked
in last 12 months”
##Descriptive Statistics
aux =
haiti_all_women %>%
dplyr::mutate(women_5_15 = if_else(age>=15 & age<=49,1,0)) %>%
dplyr::mutate(worked_12m = if_else(v731==1 | v731==2,1,
if_else(v731==0,0,NA_real_))) %>%
dplyr::mutate(women_5_15_worked_12m = if_else(women_5_15==1 & worked_12m==1,1,if_else(women_5_15==1 & worked_12m==0,0, NA_real_))) %>%
dplyr::rename(region = v024, urban = v025)
table_na_kable_3_nat =
aux %>%
group_by(DHS) %>%
summarise("% woman 15-49"=round(weighted.mean(women_5_15_worked_12m, wt, na.rm=T),2)) %>%
dplyr::ungroup() %>%
kable(format = "html", caption = "Women aged 15-49 who worked in last 12 months (731)") %>%
kableExtra::kable_styling(full_width = FALSE)
table_na_kable_3_nat
| DHS | % woman 15-49 |
|---|---|
| DHS4 | 0.50 |
| DHS5 | 0.52 |
| DHS6 | 0.51 |
| DHS7 | 0.56 |
table_na_kable_3_reg =
aux %>%
group_by(DHS,region) %>%
summarise("% woman 15-49"=round(weighted.mean(women_5_15_worked_12m, wt, na.rm=T),2)) %>%
dplyr::ungroup()
## `summarise()` has grouped output by 'DHS'. You can override using the `.groups`
## argument.
aux_1 =
table_na_kable_3_reg %>%
pivot_wider(names_from = c("DHS"),
values_from = `% woman 15-49`) %>%
dplyr::mutate(Region = if_else(region==1,"Aire Metropolitaine/reste-quest",
if_else(region==2, "Sud-Est",
if_else(region==3, "Nord",
if_else(region==4, "Nord-Est",
if_else(region==5, "Artibonite",
if_else(region==6, "Centre",
if_else(region==7,"Sud",
if_else(region==8,"Grand-Anse",
if_else(region==9,"Nord-Quest",
if_else(region==10,"Nippes",NA_character_)))))))))))
table_na_kable_3_reg =
aux_1%>%
kable(format = "html", caption = "Women aged 15-49 who worked in last 12 months (731) by region") %>%
kableExtra::kable_styling(full_width = FALSE)
table_na_kable_3_reg
| region | DHS4 | DHS5 | DHS6 | DHS7 | Region |
|---|---|---|---|---|---|
| 1 | 0.44 | 0.48 | 0.51 | 0.57 | Aire Metropolitaine/reste-quest |
| 2 | 0.54 | 0.55 | 0.53 | 0.56 | Sud-Est |
| 3 | 0.54 | 0.51 | 0.54 | 0.52 | Nord |
| 4 | 0.50 | 0.54 | 0.52 | 0.53 | Nord-Est |
| 5 | 0.52 | 0.60 | 0.54 | 0.60 | Artibonite |
| 6 | 0.49 | 0.57 | 0.51 | 0.56 | Centre |
| 7 | 0.64 | 0.56 | 0.46 | 0.52 | Sud |
| 8 | 0.50 | 0.50 | 0.51 | 0.55 | Grand-Anse |
| 9 | 0.55 | 0.50 | 0.50 | 0.54 | Nord-Quest |
| 10 | 0.52 | 0.50 | 0.50 | 0.56 | Nippes |
| 11 | NA | NA | 0.53 | NA | NA |
| 0 | NA | NA | NA | 0.57 | NA |
table_na_kable_3_urb =
aux %>%
group_by(DHS,urban) %>%
summarise("% woman 15-49"=round(weighted.mean(women_5_15_worked_12m, wt, na.rm=T),2)) %>%
dplyr::ungroup()
## `summarise()` has grouped output by 'DHS'. You can override using the `.groups`
## argument.
aux_1 =
table_na_kable_3_urb %>%
pivot_wider(names_from = c("DHS"), # Multiple key columns
values_from = `% woman 15-49`) %>%
dplyr::mutate(urban = as.numeric(urban)) %>%
dplyr::mutate(new_urban = if_else(urban==1,"Urban",
if_else(urban==2,"Rural",NA_character_))) %>%
dplyr::select(-urban) %>%
dplyr::select(new_urban,everything()) %>%
dplyr::rename(urban = new_urban)
table_na_kable_3_urb =
aux_1%>%
kable(format = "html", caption = "Women aged 15-49 who worked in last 12 months (731) by urban/rural") %>%
kableExtra::kable_styling(full_width = FALSE)
table_na_kable_3_urb
| urban | DHS4 | DHS5 | DHS6 | DHS7 |
|---|---|---|---|---|
| Urban | 0.44 | 0.48 | 0.49 | 0.55 |
| Rural | 0.55 | 0.55 | 0.53 | 0.57 |
rm(aux, aux_1)
##Scatterplot/Box plot: fetching water X employment
# aux =
# haiti_all_long %>%
# dplyr::filter(DHS== "DHS5") %>%
# dplyr::mutate(women_15_49 = if_else((hv219=="Female" | hv219=="female")& (hv105>=15 & hv105<=49),1,0)) %>%
# dplyr::mutate(men_15_49 = if_else((hv219=="Male" | hv219=="male")& (hv105>=15 & hv105<=49),1,0)) %>%
# dplyr::mutate(total_15_49 = if_else(hv105>=15 & hv105<=49,1,0)) %>%
# dplyr::mutate(att_school = if_else(hv121==0,0,
# if_else(hv121==2,1,
# if_else(hv121==9,9,NA_real_)))) %>%
# dplyr::mutate(girls_5_15_att_school = if_else(girls_5_15==1 & att_school==1,1,if_else(girls_5_15==1 & att_school==0,0, NA_real_))) %>%
# dplyr::mutate(boys_5_15_att_school = if_else(boys_5_15==1 & att_school==1,1,if_else(boys_5_15==1 & att_school==0,0, NA_real_))) %>%
# dplyr::mutate(child_5_15_att_school = if_else(child_5_15==1 & att_school==1,1,if_else(child_5_15==1 & att_school==0,0, NA_real_))) %>%
# dplyr::select(DHS,att_school, ends_with(c("att_school")),wt)
##Scatterplot/Box plot: fetching water X attendance to school
aux =
haiti_all_long %>%
dplyr::filter(DHS== "DHS5") %>%
dplyr::mutate(girls_5_15 = if_else((hv219=="Female" | hv219=="female")& (hv105>=5 & hv105<=15),1,0)) %>%
dplyr::mutate(boys_5_15 = if_else((hv219=="Male" | hv219=="male")& (hv105>=5 & hv105<=15),1,0)) %>%
dplyr::mutate(child_5_15 = if_else(hv105>=5 & hv105<=15,1,0)) %>%
dplyr::mutate(att_school = if_else(hv121==0,0,
if_else(hv121==2,1,
if_else(hv121==9,9,NA_real_)))) %>%
dplyr::mutate(girls_5_15_att_school = if_else(girls_5_15==1 & att_school==1,1,if_else(girls_5_15==1 & att_school==0,0, NA_real_))) %>%
dplyr::mutate(boys_5_15_att_school = if_else(boys_5_15==1 & att_school==1,1,if_else(boys_5_15==1 & att_school==0,0, NA_real_))) %>%
dplyr::mutate(child_5_15_att_school = if_else(child_5_15==1 & att_school==1,1,if_else(child_5_15==1 & att_school==0,0, NA_real_)))%>%
dplyr::select(DHS,att_school, ends_with(c("att_school")),wt,hv236)
legend_title <- "Legend"
plot_child =
aux %>%
group_by(hv236, child_5_15_att_school) %>%
summarise(weighted_count = sum(wt, na.rm = TRUE)) %>%
group_by(hv236) %>%
mutate(weighted_percentage = weighted_count / sum(weighted_count)) %>%
ggplot(aes(x = hv236, y = weighted_percentage, fill = factor(child_5_15_att_school))) +
geom_bar(stat = "identity", position = "fill", alpha = .5) +
scale_y_continuous(labels = scales::percent) +
labs(x = "", y = "Proportion",
title = "All children") +
scale_fill_manual(
name = "Legend",
values = c("0" = "orange", "1" = "#A288A6"),
labels = c("0" = "No", "1" = "Yes")) +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'hv236'. You can override using the
## `.groups` argument.
plot_girl =
aux %>%
group_by(hv236, girls_5_15_att_school) %>%
summarise(weighted_count = sum(wt, na.rm = TRUE)) %>%
group_by(hv236) %>%
mutate(weighted_percentage = weighted_count / sum(weighted_count)) %>%
ggplot(aes(x = hv236, y = weighted_percentage, fill = factor(girls_5_15_att_school))) +
geom_bar(stat = "identity", position = "fill", alpha = .5) +
scale_y_continuous(labels = scales::percent) +
labs(x = "", y = "Proportion",
title = "Girls") +
scale_fill_manual(
name = "Legend",
values = c("0" = "orange", "1" = "#A288A6"),
labels = c("0" = "No", "1" = "Yes")) +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'hv236'. You can override using the
## `.groups` argument.
plot_boy =
aux %>%
group_by(hv236, boys_5_15_att_school) %>%
summarise(weighted_count = sum(wt, na.rm = TRUE)) %>%
group_by(hv236) %>%
mutate(weighted_percentage = weighted_count / sum(weighted_count)) %>%
ggplot(aes(x = hv236, y = weighted_percentage, fill = factor(boys_5_15_att_school))) +
geom_bar(stat = "identity", position = "fill", alpha = .5) +
scale_y_continuous(labels = scales::percent) +
labs(x = "", y = "Proportion",
title = "Boys") +
scale_fill_manual(
name = "Legend",
values = c("0" = "orange", "1" = "#A288A6"),
labels = c("0" = "No", "1" = "Yes")) +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'hv236'. You can override using the
## `.groups` argument.
plot= ggpubr::ggarrange(plot_child, plot_girl, plot_boy, ncol = 3, common.legend = TRUE,legend="bottom")
ggpubr::annotate_figure(plot, top = ggpubr::text_grob("% of aged 5-15 attending school, by person who fetches water",
color = "#2a9d8f",
face = "bold",
size = 14))
#Water-borne diseases – diarrhea - HTKR (Children’s Questionnaire)
Variable H11: “Had diarrhea recently”
##Descriptive
Statistics
aux =
haiti_all_child %>%
dplyr::mutate(diarrhea = if_else(h11==0,0,
if_else(h11==2,1,
NA_real_))) %>%
dplyr::mutate(gender = if_else(sex==1,"Boy",
if_else(sex==2,"Girl",NA_character_))) %>%
dplyr::mutate(time_water = if_else(time_fetch_water<=30, "Less than 30 min",
if_else(time_fetch_water>30 & time_fetch_water<500, "More than 30 min",
if_else(time_fetch_water==996, "On premises",NA_character_)))) %>%
dplyr::select(DHS,diarrhea, h11,gender,wt,age,time_water)
table_na_kable_4_all =
aux %>%
group_by(DHS) %>%
summarise("% child"=round(weighted.mean(diarrhea, wt, na.rm=T),2)) %>%
dplyr::ungroup() %>%
kable(format = "html", caption = "% of children with diarrhea in the last 2 weeks") %>%
kableExtra::kable_styling(full_width = FALSE)
table_na_kable_4_all
| DHS | % child |
|---|---|
| DHS4 | 0.27 |
| DHS5 | 0.24 |
| DHS6 | 0.21 |
| DHS7 | 0.21 |
table_na_kable_4_gender =
aux %>%
group_by(DHS,gender) %>%
summarise("% diarrhea"=round(weighted.mean(diarrhea, wt, na.rm=T),2)) %>%
dplyr::ungroup()
## `summarise()` has grouped output by 'DHS'. You can override using the `.groups`
## argument.
aux_1 =
table_na_kable_4_gender %>%
pivot_wider(names_from = c("DHS"), # Multiple key columns
values_from = `% diarrhea`)
table_na_kable_4_gender =
aux_1%>%
kable(format = "html", caption = "% of boys and girls with diarrhea") %>%
kableExtra::kable_styling(full_width = FALSE)
table_na_kable_4_gender
| gender | DHS4 | DHS5 | DHS6 | DHS7 |
|---|---|---|---|---|
| Boy | 0.28 | 0.25 | 0.21 | 0.22 |
| Girl | 0.26 | 0.24 | 0.21 | 0.21 |
table_na_kable_4_fet_wat =
aux %>%
group_by(DHS,time_water) %>%
summarise("% diarrhea"=round(weighted.mean(diarrhea, wt, na.rm=T),2)) %>%
dplyr::ungroup()
## `summarise()` has grouped output by 'DHS'. You can override using the `.groups`
## argument.
aux_1 =
table_na_kable_4_fet_wat %>%
pivot_wider(names_from = c("DHS"), # Multiple key columns
values_from = `% diarrhea`)
table_na_kable_4_fet_wat =
aux_1%>%
kable(format = "html", caption = "% of children with diarrhea, by time fetching water") %>%
kableExtra::kable_styling(full_width = FALSE)
table_na_kable_4_fet_wat
| time_water | DHS4 | DHS5 | DHS6 | DHS7 |
|---|---|---|---|---|
| Less than 30 min | 0.28 | 0.26 | 0.22 | 0.21 |
| More than 30 min | 0.31 | 0.24 | 0.21 | 0.22 |
| On premises | 0.26 | 0.21 | 0.17 | 0.23 |
| NA | 0.18 | 0.20 | 0.29 | 0.21 |
#Check the existence of a variable related with “neighborhood safety”
#Working on this