#Change ACC #Number of missing in variable “Time to get to water source (hv204)”

table_na_kable_1 = 
  haiti_all_long %>%
  group_by(DHS) %>%
  summarise(`Missing Values (NAs)` = round(sum(is.na(hv204) * wt, na.rm = TRUE),2)) %>%
  dplyr::ungroup() %>% 
  kable(format = "html", caption = "Number of Missing Values by Group: Time to get to water source (hv204)") %>%
  kableExtra::kable_styling(full_width = FALSE)
table_na_kable_1
Number of Missing Values by Group: Time to get to water source (hv204)
DHS Missing Values (NAs)
DHS4 50.73
DHS5 150.39
DHS6 261.23
DHS7 9.58





#Change ACC #School attendance”

##Check the possibility to use the variable “Grade of education at the level of education attended during current school year” (hv123)

aux =
  haiti_all_long %>%
  dplyr::mutate(girls_5_15 = if_else((hv219=="Female" | hv219=="female")& (hv105>=5 & hv105<=15),1,0)) %>%
  dplyr::mutate(boys_5_15  = if_else((hv219=="Male"   | hv219=="male")& (hv105>=5 & hv105<=15),1,0)) %>%
  dplyr::mutate(child_5_15 = if_else(hv105>=5 & hv105<=15,1,0)) %>%
  dplyr::mutate(att_school = if_else(hv121==0,0,
                                     if_else(hv121==2,1,
                                             if_else(hv121==9,9,NA_real_)))) %>%
  dplyr::mutate(girls_5_15_att_school = if_else(girls_5_15==1 & att_school==1,1,if_else(girls_5_15==1 & att_school==0,0, NA_real_))) %>%
  dplyr::mutate(boys_5_15_att_school  = if_else(boys_5_15==1  & att_school==1,1,if_else(boys_5_15==1  & att_school==0,0, NA_real_))) %>% 
  dplyr::mutate(child_5_15_att_school = if_else(child_5_15==1 & att_school==1,1,if_else(child_5_15==1 & att_school==0,0, NA_real_))) %>%
  dplyr::select(DHS,att_school, ends_with(c("att_school")),wt)

weighted.sd <- function(x, w, na.rm = TRUE) {
  if (na.rm) {
    na_idx <- is.na(x) | is.na(w)
    x <- x[!na_idx]
    w <- w[!na_idx]
  }
  
  weighted_mean <- sum(w * x) / sum(w)
  sqrt(sum(w * (x - weighted_mean)^2 * w) / sum(w))
}

table_na_kable_2 = 
  aux %>%
  group_by(DHS) %>%
  summarise("% of child"=round(weighted.mean(child_5_15_att_school, wt, na.rm=T),2),
            #"% of child sd"=round(weighted.sd(child_5_15_att_school, wt, na.rm=T),2),
            "% of girls"=round(weighted.mean(girls_5_15_att_school, wt, na.rm=T),2),
            "% of boys" =round(weighted.mean(boys_5_15_att_school,  wt, na.rm=T),2)
            ) %>%  
  dplyr::ungroup() %>% 
  kable(format = "html", caption = "Attending school aged 5-15 (hv121)") %>%
  kableExtra::kable_styling(full_width = FALSE)
table_na_kable_2
Attending school aged 5-15 (hv121)
DHS % of child % of girls % of boys
DHS4 0.32 0.40 0.26
DHS5 0.82 0.83 0.81
DHS6 0.92 0.93 0.92
DHS7 0.92 0.92 0.91
rm(aux)





#Women Employment - HTIR (Women Questionnaire) Variable v731: “Worked in last 12 months”

##Descriptive Statistics

aux =
  haiti_all_women %>%
  dplyr::mutate(women_5_15 = if_else(age>=15 & age<=49,1,0)) %>%
  dplyr::mutate(worked_12m = if_else(v731==1 | v731==2,1,
                                     if_else(v731==0,0,NA_real_))) %>%
  dplyr::mutate(women_5_15_worked_12m = if_else(women_5_15==1 & worked_12m==1,1,if_else(women_5_15==1 & worked_12m==0,0, NA_real_))) %>%
  dplyr::rename(region = v024, urban = v025)

table_na_kable_3_nat = 
  aux %>%
  group_by(DHS) %>%
  summarise("% woman 15-49"=round(weighted.mean(women_5_15_worked_12m, wt, na.rm=T),2)) %>%  
  dplyr::ungroup() %>% 
  kable(format = "html", caption = "Women aged 15-49 who worked in last 12 months (731)") %>%
  kableExtra::kable_styling(full_width = FALSE)
table_na_kable_3_nat
Women aged 15-49 who worked in last 12 months (731)
DHS % woman 15-49
DHS4 0.50
DHS5 0.52
DHS6 0.51
DHS7 0.56
table_na_kable_3_reg = 
  aux %>%
  group_by(DHS,region) %>%
  summarise("% woman 15-49"=round(weighted.mean(women_5_15_worked_12m, wt, na.rm=T),2)) %>%  
  dplyr::ungroup() 
## `summarise()` has grouped output by 'DHS'. You can override using the `.groups`
## argument.
aux_1 = 
  table_na_kable_3_reg %>% 
  pivot_wider(names_from = c("DHS"),  
              values_from = `% woman 15-49`) %>%
  dplyr::mutate(Region = if_else(region==1,"Aire Metropolitaine/reste-quest",
                                 if_else(region==2, "Sud-Est",
                                         if_else(region==3, "Nord",
                                                 if_else(region==4, "Nord-Est",
                                                         if_else(region==5, "Artibonite",
                                                                 if_else(region==6, "Centre",
                                                                         if_else(region==7,"Sud",
                                                                                 if_else(region==8,"Grand-Anse",
                                                                                         if_else(region==9,"Nord-Quest",
                                                                                                 if_else(region==10,"Nippes",NA_character_)))))))))))
table_na_kable_3_reg = 
  aux_1%>% 
  kable(format = "html", caption = "Women aged 15-49 who worked in last 12 months (731) by region") %>%
  kableExtra::kable_styling(full_width = FALSE)
table_na_kable_3_reg
Women aged 15-49 who worked in last 12 months (731) by region
region DHS4 DHS5 DHS6 DHS7 Region
1 0.44 0.48 0.51 0.57 Aire Metropolitaine/reste-quest
2 0.54 0.55 0.53 0.56 Sud-Est
3 0.54 0.51 0.54 0.52 Nord
4 0.50 0.54 0.52 0.53 Nord-Est
5 0.52 0.60 0.54 0.60 Artibonite
6 0.49 0.57 0.51 0.56 Centre
7 0.64 0.56 0.46 0.52 Sud
8 0.50 0.50 0.51 0.55 Grand-Anse
9 0.55 0.50 0.50 0.54 Nord-Quest
10 0.52 0.50 0.50 0.56 Nippes
11 NA NA 0.53 NA NA
0 NA NA NA 0.57 NA
table_na_kable_3_urb = 
  aux %>%
  group_by(DHS,urban) %>%
  summarise("% woman 15-49"=round(weighted.mean(women_5_15_worked_12m, wt, na.rm=T),2)) %>%  
  dplyr::ungroup() 
## `summarise()` has grouped output by 'DHS'. You can override using the `.groups`
## argument.
aux_1 = 
  table_na_kable_3_urb %>% 
  pivot_wider(names_from = c("DHS"),  # Multiple key columns
              values_from = `% woman 15-49`) %>%
  dplyr::mutate(urban = as.numeric(urban)) %>%
  dplyr::mutate(new_urban = if_else(urban==1,"Urban", 
                                if_else(urban==2,"Rural",NA_character_))) %>%
  dplyr::select(-urban) %>%
  dplyr::select(new_urban,everything()) %>%
  dplyr::rename(urban = new_urban)
table_na_kable_3_urb = 
  aux_1%>% 
  kable(format = "html", caption = "Women aged 15-49 who worked in last 12 months (731) by urban/rural") %>%
  kableExtra::kable_styling(full_width = FALSE)
table_na_kable_3_urb
Women aged 15-49 who worked in last 12 months (731) by urban/rural
urban DHS4 DHS5 DHS6 DHS7
Urban 0.44 0.48 0.49 0.55
Rural 0.55 0.55 0.53 0.57
rm(aux, aux_1)

##Scatterplot/Box plot: fetching water X employment

# aux =
#   haiti_all_long %>%
#   dplyr::filter(DHS== "DHS5") %>%
#   dplyr::mutate(women_15_49 = if_else((hv219=="Female" | hv219=="female")& (hv105>=15 & hv105<=49),1,0)) %>%
#   dplyr::mutate(men_15_49   = if_else((hv219=="Male"   | hv219=="male")&   (hv105>=15 & hv105<=49),1,0)) %>%
#   dplyr::mutate(total_15_49 = if_else(hv105>=15 & hv105<=49,1,0)) %>%
#   dplyr::mutate(att_school = if_else(hv121==0,0,
#                                      if_else(hv121==2,1,
#                                              if_else(hv121==9,9,NA_real_)))) %>%
#   dplyr::mutate(girls_5_15_att_school = if_else(girls_5_15==1 & att_school==1,1,if_else(girls_5_15==1 & att_school==0,0, NA_real_))) %>%
#   dplyr::mutate(boys_5_15_att_school  = if_else(boys_5_15==1  & att_school==1,1,if_else(boys_5_15==1  & att_school==0,0, NA_real_))) %>% 
#   dplyr::mutate(child_5_15_att_school = if_else(child_5_15==1 & att_school==1,1,if_else(child_5_15==1 & att_school==0,0, NA_real_))) %>%
#   dplyr::select(DHS,att_school, ends_with(c("att_school")),wt)

##Scatterplot/Box plot: fetching water X attendance to school

aux =
  haiti_all_long %>%
  dplyr::filter(DHS== "DHS5") %>%
  dplyr::mutate(girls_5_15 = if_else((hv219=="Female" | hv219=="female")& (hv105>=5 & hv105<=15),1,0)) %>%
  dplyr::mutate(boys_5_15  = if_else((hv219=="Male"   | hv219=="male")& (hv105>=5   & hv105<=15),1,0)) %>%
  dplyr::mutate(child_5_15 = if_else(hv105>=5 & hv105<=15,1,0)) %>%
  dplyr::mutate(att_school = if_else(hv121==0,0,
                                     if_else(hv121==2,1,
                                             if_else(hv121==9,9,NA_real_)))) %>%
  dplyr::mutate(girls_5_15_att_school = if_else(girls_5_15==1 & att_school==1,1,if_else(girls_5_15==1 & att_school==0,0, NA_real_))) %>%
  dplyr::mutate(boys_5_15_att_school  = if_else(boys_5_15==1  & att_school==1,1,if_else(boys_5_15==1  & att_school==0,0, NA_real_))) %>% 
  dplyr::mutate(child_5_15_att_school = if_else(child_5_15==1 & att_school==1,1,if_else(child_5_15==1 & att_school==0,0, NA_real_)))%>%
  dplyr::select(DHS,att_school, ends_with(c("att_school")),wt,hv236)

legend_title <- "Legend"

plot_child = 
  aux %>%
  group_by(hv236, child_5_15_att_school) %>%
  summarise(weighted_count = sum(wt, na.rm = TRUE)) %>%
  group_by(hv236) %>%
  mutate(weighted_percentage = weighted_count / sum(weighted_count)) %>%
  ggplot(aes(x = hv236, y = weighted_percentage, fill = factor(child_5_15_att_school))) +
  geom_bar(stat = "identity", position = "fill", alpha = .5) +  
  scale_y_continuous(labels = scales::percent) +
  labs(x = "", y = "Proportion", 
       title = "All children") +
  scale_fill_manual(
    name = "Legend",
    values = c("0" = "orange", "1" = "#A288A6"),  
    labels = c("0" = "No", "1" = "Yes")) +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'hv236'. You can override using the
## `.groups` argument.
plot_girl = 
  aux %>%
  group_by(hv236, girls_5_15_att_school) %>%
  summarise(weighted_count = sum(wt, na.rm = TRUE)) %>%
  group_by(hv236) %>%
  mutate(weighted_percentage = weighted_count / sum(weighted_count)) %>%
  ggplot(aes(x = hv236, y = weighted_percentage, fill = factor(girls_5_15_att_school))) +
  geom_bar(stat = "identity", position = "fill", alpha = .5) +  
  scale_y_continuous(labels = scales::percent) +
  labs(x = "", y = "Proportion", 
       title = "Girls") +
  scale_fill_manual(
    name = "Legend",
    values = c("0" = "orange", "1" = "#A288A6"),  
    labels = c("0" = "No", "1" = "Yes")) +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'hv236'. You can override using the
## `.groups` argument.
plot_boy = 
 aux %>%
  group_by(hv236, boys_5_15_att_school) %>%
  summarise(weighted_count = sum(wt, na.rm = TRUE)) %>%
  group_by(hv236) %>%
  mutate(weighted_percentage = weighted_count / sum(weighted_count)) %>%
  ggplot(aes(x = hv236, y = weighted_percentage, fill = factor(boys_5_15_att_school))) +
  geom_bar(stat = "identity", position = "fill", alpha = .5) +  
  scale_y_continuous(labels = scales::percent) +
  labs(x = "", y = "Proportion", 
       title = "Boys") +
  scale_fill_manual(
    name = "Legend",
    values = c("0" = "orange", "1" = "#A288A6"),  
    labels = c("0" = "No", "1" = "Yes")) +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 90))
## `summarise()` has grouped output by 'hv236'. You can override using the
## `.groups` argument.
plot= ggpubr::ggarrange(plot_child, plot_girl, plot_boy, ncol = 3,  common.legend = TRUE,legend="bottom")
ggpubr::annotate_figure(plot, top = ggpubr::text_grob("% of aged 5-15 attending school, by person who fetches water", 
                                                 color = "#2a9d8f", 
                                                 face = "bold", 
                                                 size = 14))





#Water-borne diseases – diarrhea - HTKR (Children’s Questionnaire) Variable H11: “Had diarrhea recently”

##Descriptive Statistics

aux =
  haiti_all_child %>%
  dplyr::mutate(diarrhea = if_else(h11==0,0,
                                    if_else(h11==2,1,
                                            NA_real_))) %>%
  dplyr::mutate(gender  = if_else(sex==1,"Boy",
                                  if_else(sex==2,"Girl",NA_character_))) %>%
  dplyr::mutate(time_water = if_else(time_fetch_water<=30, "Less than 30 min",
                                     if_else(time_fetch_water>30 & time_fetch_water<500, "More than 30 min",
                                             if_else(time_fetch_water==996, "On premises",NA_character_)))) %>%
  dplyr::select(DHS,diarrhea, h11,gender,wt,age,time_water)


table_na_kable_4_all = 
  aux %>%
  group_by(DHS) %>%
  summarise("% child"=round(weighted.mean(diarrhea, wt, na.rm=T),2)) %>%  
  dplyr::ungroup() %>% 
  kable(format = "html", caption = "% of children with diarrhea in the last 2 weeks") %>%
  kableExtra::kable_styling(full_width = FALSE)
table_na_kable_4_all
% of children with diarrhea in the last 2 weeks
DHS % child
DHS4 0.27
DHS5 0.24
DHS6 0.21
DHS7 0.21





table_na_kable_4_gender = 
  aux %>%
  group_by(DHS,gender) %>%
  summarise("% diarrhea"=round(weighted.mean(diarrhea, wt, na.rm=T),2)) %>%  
  dplyr::ungroup() 
## `summarise()` has grouped output by 'DHS'. You can override using the `.groups`
## argument.
aux_1 = 
  table_na_kable_4_gender %>% 
  pivot_wider(names_from = c("DHS"),  # Multiple key columns
              values_from = `% diarrhea`) 
table_na_kable_4_gender = 
  aux_1%>% 
  kable(format = "html", caption = "% of boys and girls with diarrhea") %>%
  kableExtra::kable_styling(full_width = FALSE)
table_na_kable_4_gender
% of boys and girls with diarrhea
gender DHS4 DHS5 DHS6 DHS7
Boy 0.28 0.25 0.21 0.22
Girl 0.26 0.24 0.21 0.21
table_na_kable_4_fet_wat = 
  aux %>%
  group_by(DHS,time_water) %>%
  summarise("% diarrhea"=round(weighted.mean(diarrhea, wt, na.rm=T),2)) %>%  
  dplyr::ungroup() 
## `summarise()` has grouped output by 'DHS'. You can override using the `.groups`
## argument.
aux_1 = 
  table_na_kable_4_fet_wat %>% 
  pivot_wider(names_from = c("DHS"),  # Multiple key columns
              values_from = `% diarrhea`) 
table_na_kable_4_fet_wat = 
  aux_1%>% 
  kable(format = "html", caption = "% of children with diarrhea, by time fetching water") %>%
  kableExtra::kable_styling(full_width = FALSE)
table_na_kable_4_fet_wat
% of children with diarrhea, by time fetching water
time_water DHS4 DHS5 DHS6 DHS7
Less than 30 min 0.28 0.26 0.22 0.21
More than 30 min 0.31 0.24 0.21 0.22
On premises 0.26 0.21 0.17 0.23
NA 0.18 0.20 0.29 0.21

#Check the existence of a variable related with “neighborhood safety”

#Working on this