Testing DHS API

7/2/2020

Load libraries

library(ggplot2)
library(dplyr)
library(magrittr)
library(ggcharts)

Import data from DHS API

Import DHS Indicator data. See https://api.dhsprogram.com/#/api-querybuilder.cfm to help build API query. List of indicators is here:https://api.dhsprogram.com/rest/dhs/indicators?returnFields=IndicatorId,Label,Definition&f=html

ghana65plus <- read.csv("https://api.dhsprogram.com/rest/dhs/data?breakdown=subnational&indicatorIds=HC_AGEG_P_SNR&countryIds=GH&surveyIds=GH2014DHS&lang=en&f=csv", stringsAsFactors = F)

Plot

ghana65plus %>% 
filter(ByVariableLabel=="Total", CharacteristicLabel != "Northern, Upper West, Upper East") %>% 
  lollipop_chart(x = CharacteristicLabel, y = Value ) +
  labs(x="Region", y="Percentage of population above 65", title = "Ghana's Elderly Population") +
  theme( plot.background = element_rect(fill = "transparent"),
  plot.title = (element_text(family = "Avenir Next Condensed", face = "bold")),
  axis.text = (element_text(family = "Avenir Next Condensed")),
  axis.title = (element_text(family = "Avenir Next Condensed")),
  legend.text = (element_text(family = "Avenir Next Condensed")))

Repeat for other indicators

# Household size
ghanahousehold <- read.csv("https://api.dhsprogram.com/rest/dhs/data?breakdown=subnational&indicatorIds=HC_MEMB_H_MNM&countryIds=GH&surveyIds=GH2014DHS&lang=en&f=csv", stringsAsFactors = F)

ghanahousehold %>% 
  filter(CharacteristicLabel != "Northern, Upper West, Upper East") %>% 
  lollipop_chart(x = CharacteristicLabel, y = Value ) +
  labs(x="Region", y="Mean number of household members", title = "Household size")+
  theme( plot.background = element_rect(fill = "transparent"),
  plot.title = (element_text(family = "Avenir Next Condensed", face = "bold")),
  axis.text = (element_text(family = "Avenir Next Condensed")),
  axis.title = (element_text(family = "Avenir Next Condensed")),
  legend.text = (element_text(family = "Avenir Next Condensed")))

# Media access

ghanawomenaccess <- read.csv("https://api.dhsprogram.com/rest/dhs/data?breakdown=subnational&indicatorIds=ED_MDIA_W_3MD&countryIds=GH&surveyIds=GH2014DHS&lang=en&f=csv", stringsAsFactors = F)

ghanawomenaccess %>% 
  filter(CharacteristicLabel != "Northern, Upper West, Upper East") %>% 
  lollipop_chart(x = CharacteristicLabel, y = Value ) +
  labs(x="Region", y="% of women who access mass media (newspaper, television or radio) at least once a week", title = "Access to Media")+
  theme( plot.background = element_rect(fill = "transparent"),
  plot.title = (element_text(family = "Avenir Next Condensed", face = "bold")),
  axis.text = (element_text(family = "Avenir Next Condensed")),
  axis.title = (element_text(family = "Avenir Next Condensed")),
  legend.text = (element_text(family = "Avenir Next Condensed")))

# Handwashing access

ghanahandwash <- read.csv("https://api.dhsprogram.com/rest/dhs/data?breakdown=subnational&indicatorIds=WS_HNDW_P_BAS&countryIds=GH&surveyIds=GH2014DHS&lang=en&f=csv", stringsAsFactors = F)

ghanahandwash %>% 
  filter(CharacteristicLabel != "Northern, Upper West, Upper East") %>% 
  lollipop_chart(x = CharacteristicLabel, y = Value ) +
  labs(x="Region", y="% of households with handwashing facilities", title = "Hand Washing")+
  theme( plot.background = element_rect(fill = "transparent"),
  plot.title = (element_text(family = "Avenir Next Condensed", face = "bold")),
  axis.text = (element_text(family = "Avenir Next Condensed")),
  axis.title = (element_text(family = "Avenir Next Condensed")),
  legend.text = (element_text(family = "Avenir Next Condensed")))

Or we could do Regional vs National comparisons?

First, shrink the output from the API to smaller, less unwieldy data frames:

ghana65plusmini<- ghana65plus %>% 
  select(CharacteristicLabel, Value,ByVariableLabel)  %>% 
  filter(ByVariableLabel=="Total")
ghana65plusmini <- cbind(ghana65plusmini, VariableName = "% of Population over 65")
ghana65plusmini$ByVariableLabel <- NULL

ghanahouseholdmini<- ghanahousehold %>% 
  select(CharacteristicLabel, Value,ByVariableLabel)
ghanahouseholdmini <- cbind(ghanahouseholdmini, VariableName = "Mean Household Size")
ghanahouseholdmini$ByVariableLabel <- NULL

ghanawomenaccessmini <- ghanawomenaccess  %>% 
  select(CharacteristicLabel, Value,ByVariableLabel)
ghanawomenaccessmini <- cbind(ghanawomenaccessmini, VariableName = "% who Accessed Mass \n Media in the Last Week")
ghanawomenaccessmini$ByVariableLabel <- NULL

ghanahandwashmini <- ghanahandwash  %>% 
  select(CharacteristicLabel, Value,ByVariableLabel)
ghanahandwashmini <- cbind(ghanahandwashmini, VariableName = "% of Households with \n Handwashing Facilities")
ghanahandwashmini$ByVariableLabel <- NULL

Then add a row to each data frame for the national average:

ghana65plusmini <- rbind(ghana65plusmini, c("National", round(mean(ghana65plusmini$Value), digits = 2), "% of Population over 65" ))
ghanahouseholdmini <- rbind(ghanahouseholdmini, c("National", round(mean(ghanahouseholdmini$Value), digits = 2), "Mean Household Size"))
ghanawomenaccessmini <- rbind(ghanawomenaccessmini, c("National", round(mean(ghanawomenaccessmini$Value), digits = 2), "% who Accessed Mass \n Media in the Last Week"))
ghanahandwashmini <- rbind(ghanahandwashmini, c("National", round(mean(ghanahandwashmini$Value), digits = 2), "% of Households with \n Handwashing Facilities"))

Combine all indicators into one data frame, change some names and some variable types:

# create a single data frame that combines all indicators
ghana_indicators <- rbind(ghana65plusmini,ghanahouseholdmini,ghanawomenaccessmini, ghanahandwashmini) 

# recode names
ghana_indicators$CharacteristicLabel[which(ghana_indicators$CharacteristicLabel == "..Northern")] <- "Northern Region"
ghana_indicators$CharacteristicLabel[which(ghana_indicators$CharacteristicLabel == "National")] <- "National Average"

# change variable types
ghana_indicators$CharacteristicLabel <- as.factor(ghana_indicators$CharacteristicLabel)
ghana_indicators$VariableName<- as.character(ghana_indicators$VariableName)
ghana_indicators$Value<- as.numeric(ghana_indicators$Value)

Plot

ghana_indicators %>% 
  filter(CharacteristicLabel == "Northern Region" | CharacteristicLabel == "National Average" ) %>% 
  ggplot()  +
  geom_bar(aes(x = VariableName, y = Value, fill = CharacteristicLabel), stat = "identity", position="dodge") +
  #geom_text(aes(x = VariableName, y = Value, label=Value, fill = CharacteristicLabel), position= position_dodge(width= 1), vjust= -0.4 ) +
  labs(fill="", x="", y="", title = "Vulnerability Indicators for Ghana's Northern Region") +
  theme_classic() +
  theme(text = element_text(color = "gray20"),
        legend.position = "bottom",
        # legend.text = element_text(size = 11, color = "gray10"),
        legend.title = element_blank(),
        axis.line = element_line(color = "gray40", size = 0.5),
        axis.line.y = element_blank(),
        plot.title = (element_text(family = "Avenir Next Condensed", face = "bold")),
        axis.text = (element_text(family = "Avenir Next Condensed")),
        legend.text = (element_text(family = "Avenir Next Condensed")),
        panel.grid.major = element_line(color = alpha("gray50", 0.2), size = 0.5),
        panel.grid.major.x = element_blank(),
        #panel.grid.major.y = element_blank(),
        plot.background = element_rect(fill = alpha("grey", 0.05)),
        panel.background = element_rect(fill = "transparent"),
        legend.background = element_rect(fill = "transparent")) +
  scale_fill_manual(values=c("National Average" = "#4d92ca", 
                             "Northern Region" = "#fc656d"))

We could try to have one page of key graphs for each region in the reports. So for example, for the Volta region in Ghana, we would have a page with an epicurve (regional vs national), cases disaggegregated by age and sex, then a plot comparing vulnerability indicators?

Could we map the indicator values to shapefiles?

The idea would be to reproduce something like what Claudia and Flavio’s group did here: https://www.medrxiv.org/content/10.1101/2020.03.19.20039131v2.full.pdf+html. But at a lower resolution.

vulnerability criteria

I don’t know how to do this at all.

And for other countries?

In theory, all of the below could be repeated for any country simply by changing the two letter country code in the DHS API query.