Bootstrapping DHIS2

Jason Pickering

August 6, 2017

Introduction

Choice of tools

General approach to bootstrapping

Goal for this excercise

Let’s get started

require(httr)
require(jsonlite)
require(assertthat)
require(rlist)
require(reshape2)
require(ggplot2)

We will load a few helpful libraries

UID generation

generateUID <- function(codeSize = 11) {
  #Generate a random seed
  runif(1)
  allowedLetters <- c(LETTERS, letters)
  allowedChars <- c(LETTERS, letters, 0:9)
  #First character must be a letter according to the DHIS2 spec
  firstChar <- sample(allowedLetters, 1)
  otherChars <- sample(allowedChars, codeSize - 1)
  uid <- paste(c(firstChar,paste(
  otherChars, sep = "", collapse = "" )), sep = "", collapse = "")
  return(uid)
}

Logging in

startTime<-Sys.time()

setwd("/home/jason/development/dhis2-data-munging/")
baseurl<-"http://localhost:8080/dhis/"
username<-"admin"
password<-"district"
#This will give us a cookie we can use for later use. 

loginDHIS2<-function(baseurl,username,password) {
url<-paste0(baseurl,"api/me")
r<-GET(url,authenticate(username,password))
assert_that(r$status_code == 200L) }

loginDHIS2(baseurl,username,password)
## [1] TRUE

Loading our organisation units

#Get the OUS
ous<-fromJSON("https://raw.githubusercontent.com/jason-p-pickering/dhis2-data-munging/master/ous/ous.json")
#Create the URL
url<-paste0(baseurl,"api/27/metadata?importStrategy=CREATE&atomicMode=NONE")
#Post to the metadata API as JSON
r<-POST(url,body=toJSON(ous,auto_unbox = TRUE),content_type_json())
assert_that(r$status_code == 200L)
## [1] TRUE

Testing our organisation units are there

#Request all of the orgunit IDs from the server
url<-paste0(baseurl,"/api/organisationUnits?fields=id&paging=false")
ous_from_server<-fromJSON(content(GET(url),"text"))
assert_that(all.equal(sort(ous$organisationUnits$id) , 
                      sort(ous_from_server$organisationUnits$id)))
## [1] TRUE

Setting organisation unit levels

organisationUnitLevels<-data.frame(level=c(1,2,3),
                                   name=c("Global","Continent","Country"))
#Generate some UIDs
organisationUnitLevels$uid<-sapply(rep(11,nrow(organisationUnitLevels)),generateUID)
url<-paste0(baseurl,"api/27/metadata?importStrategy=CREATE&atomicMode=NONE")
#Post to the metadata API as JSON
r<-POST(url,body=toJSON(list(organisationUnitLevels = organisationUnitLevels),
                        auto_unbox = TRUE),
                        content_type_json())
assert_that(r$status_code == 200L)
## [1] TRUE

Set your user’s organisation unit

#We want to be a global user
url<-paste0(baseurl,"/api/organisationUnits?filter=name:eq:Global&fields=id")
global_uid<-fromJSON(content(GET(url),"text"))
url<-paste0(baseurl,"api/me")
me<-fromJSON(content(GET(url),"text"))
url<-paste0(baseurl,"api/users/",me$id)
me<-fromJSON(content(GET(url),"text"))
me$organisationUnits<-list(list(id = global_uid$organisationUnits$id))
url<-paste0(baseurl,"api/27/metadata?importStrategy=UPDATE")
r<-POST(url,body=toJSON(list(users=list(me)),auto_unbox = TRUE),content_type_json() )
assert_that(r$status_code == 200L)
## [1] TRUE

Get the data elements

des<-read.csv("https://extranet.who.int/tme/generateCSV.asp?ds=dictionary")
des$name<-substring(des$definition,0,230)
#Check and be sure that no names are duplicated.
assert_that(Reduce("&",duplicated(des$name)) == FALSE)
## [1] TRUE

Loading the data elements

#Data elements
set.seed(94005004)
des_import<-data.frame(name=des$name
                       ,code=des$variable_name
                       ,shortName=des$variable_name,
                       aggregationType="SUM",
                       valueType="NUMBER",
                       domainType="AGGREGATE")
des_import$id<-sapply(rep(11,nrow(des_import)),generateUID)
url<-paste0(baseurl,"api/27/metadata?importStrategy=CREATE")
#Post to the metadata API as JSON
r<-POST(url,body=toJSON(list(dataElements = des_import),
                        auto_unbox = TRUE),
                        content_type_json())
#assert_that(fromJSON(content(r,"text"))$stats$created == nrow(des))

Data element groups

#Get the groups and assign some UIDs.
de_groups<-data.frame(name=unique(des$dataset))
de_groups$id<-sapply(rep(11,nrow(de_groups)),generateUID)

#Lets create a map of data elements and which data element groups they belong to
foo<-merge(des[,c("variable_name","dataset")],
           des_import[,c("code","id")],
           by.x="variable_name",by.y="code")

[{“name”:[“Community engagement”],“id”:[“V9b7hWU2ZPa”],“dataElements”:[{“id”:[“Rfjg6LEMGyb”]},{“id”:[“OBwq1fzkTm4”]},{“id”:[“v8gJOREcVnC”]},{“id”:[“A5

Transforming and importing the data

tb<-read.csv("https://extranet.who.int/tme/generateCSV.asp?ds=estimates",
             stringsAsFactors=FALSE)
tb_cases<-read.csv("https://extranet.who.int/tme/generateCSV.asp?ds=notifications",
                   stringsAsFactors = FALSE)
knitr::kable(tb[1:2,c("iso3","year","e_pop_num","e_inc_100k")])
iso3 year e_pop_num e_inc_100k
AFG 2000 19701940 190
AFG 2001 20531160 189
tb<-reshape2::melt(tb,id.vars=c("country","iso2","iso3","iso_numeric",
                                "g_whoregion","year"))
tb<-tb[!is.na(tb$value),]
tb_cases<-reshape2::melt(tb_cases,id.vars=c("country","iso2","iso3",
                                            "iso_numeric","g_whoregion","year"))
tb_cases<-tb_cases[!is.na(tb_cases$value),]
tb<-rbind(tb,tb_cases)
knitr::kable(tb_cases[1:2,])
country iso2 iso3 iso_numeric g_whoregion year variable value
18 Afghanistan AF AFG 4 EMR 1997 new_sp 618
19 Afghanistan AF AFG 4 EMR 1998 new_sp 1833

Merging it with the organisation units

tb<-merge(tb,des_import[,c("code","id")],by.x="variable",by.y="code")
knitr::kable(tb[1:2,])
variable country iso2 iso3 iso_numeric g_whoregion year value id
all_conf_xdr Afghanistan AF AFG 4 EMR 2015 1 hkZEgnPCNis
all_conf_xdr Albania AL ALB 8 EUR 2015 0 hkZEgnPCNis
#We need to get the Country codes
r <- GET(paste0(baseurl,"api/27/organisationUnits?paging=false&filter=level:eq:3&fields=id,code"))
r<- httr::content(r, "text")
ous<-jsonlite::fromJSON(r,flatten=TRUE)$organisationUnits
names(ous)<-c("ou_code","ou_id")
#Merge/INNER JOIN the OUs with the data
tb<-merge(tb,ous,by.x="iso3",by.y="ou_code")
knitr::kable(tb[1:2,])
iso3 variable country iso2 iso_numeric g_whoregion year value id ou_id
ABW e_pop_num Aruba AW 533 AMR 2005 100031 goH6vP5XtCq wFtLm5IujWA
ABW e_pop_num Aruba AW 533 AMR 2006 100830 goH6vP5XtCq wFtLm5IujWA

Fine tuning prior to import

tb_out<-tb[,c("id","year","ou_id","value")]
tb_out<-tb_out[!is.na(tb_out$value),]
tb_out<-plyr::colwise(as.character)(tb_out)
names(tb_out)<-c("dataElement","period","orgUnit","value")
knitr::kable(head(tb_out))
dataElement period orgUnit value
goH6vP5XtCq 2005 wFtLm5IujWA 100031
goH6vP5XtCq 2006 wFtLm5IujWA 100830
HLT8faniodR 2011 wFtLm5IujWA 0
RqKNafpPB3o 2000 wFtLm5IujWA 10
VjkTb4G6rwX 2012 wFtLm5IujWA 1
XGMfURZiW2V 2008 wFtLm5IujWA 0.64

Data import and analytics

#Import the data, skipping checks for existing values
url<-paste0(baseurl,"api/27/dataValueSets?preheatCache=true&skipExistingCheck=true")
r<-POST(url,body=toJSON(list(dataValues = tb_out),auto_unbox = TRUE),content_type_json())
#Lets trigger analytics
url<-paste0(baseurl,"api/27/resourceTables/analytics")
r<-POST(url)
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.

What did we end up with?

#Period dimensions
start_year<-min(unique(tb_cases[tb_cases$variable == "c_newinc",c("year")]))
end_year<-max(unique(tb_cases[tb_cases$variable == "c_newinc",c("year")]))
years<-paste(seq(start_year,end_year),sep="",collapse=";")
#Data element dimension
c_newinc<-fromJSON(content(GET(paste0(baseurl,"api/dataElements?filter=code:eq:c_newinc&fields=[id,name]")),"text"))$dataElements$id
#Orgunit dimension
sa<-fromJSON(content(GET(paste0(baseurl,
                          "api/organisationUnits?filter=name:eq:South%20Africa&fields=[id,name]")),
                     "text"))$organisationUnits$id
sl<-fromJSON(content(GET(paste0(baseurl,
                          "api/organisationUnits?filter=name:eq:Sierra%20Leone&fields=[id,name]")),
                     "text"))$organisationUnits$id
#Assemble the URL
url<-paste0(baseurl,"api/27/analytics.json?")
url<-paste0(url,"dimension=ou:",sa,";",sl)
url<-paste0(url,"&dimension=pe:",years)
url<-paste0(url,"&filter=dx:",c_newinc)
url<-paste0(url,"&displayProperty=NAME&skipMeta=false")
data<-fromJSON(content(GET(url),"text"))
#We need to munge the data a bit to get it into a suitable form.
metadata<-do.call(rbind,lapply(data$metaData$items,
                               data.frame,stringsAsFactors=FALSE))
metadata$from<-row.names(metadata)
this_data<-data.frame(data$rows)
names(this_data)<-data$headers$name
this_data$ou<-plyr::mapvalues(this_data$ou,metadata$from,
                              metadata$name,warn_missing=FALSE)
this_data$value<-as.numeric(as.character(this_data$value))/1000

Lets look at the data

#And, create the graph
g <- ggplot(data=this_data,aes(x=pe,y=value,group=ou, color=ou)) 
g <- g + geom_line() 
g <- g + geom_point()
g <- g +ggtitle("Reported TB cases in Sierra Leone and South Africa") 
g <- g + labs(x = "Year", y = "Number of cases (thousands)",fill = NULL)
g <- g + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g

Summary


  1. https://en.wikipedia.org/wiki/Bootstrapping