Bootstrapping DHIS2

Jason Pickering

August 6, 2017

Introduction

Choice of tools

General approach to bootstrapping

Goal for this excercise

A word on the R language

A note on security and credentials

Let’s get started

require(httr)
require(jsonlite)
require(assertthat)
require(rlist)
require(reshape2)
require(ggplot2)

We will load a few helpful libraries:

UID generation

generateUID <- function(codeSize = 11) {
  #Generate a random seed
  runif(1)
  allowedLetters <- c(LETTERS, letters)
  allowedChars <- c(LETTERS, letters, 0:9)
  #First character must be a letter according to the DHIS2 spec
  firstChar <- sample(allowedLetters, 1)
  otherChars <- sample(allowedChars, codeSize - 1)
  uid <- paste(c(firstChar,paste(
  otherChars, sep = "", collapse = "" )), sep = "", collapse = "")
  return(uid)
}

Logging in

startTime<-Sys.time()

baseurl<-"http://localhost:8080/dhis/"
username<-"admin"
password<-"district"
#This will give us a cookie we can use for later use. 

loginDHIS2<-function(baseurl,username,password) {
url<-paste0(baseurl,"api/me")
r<-GET(url,authenticate(username,password))
assert_that(r$status_code == 200L) }

loginDHIS2(baseurl,username,password)
## [1] TRUE

Loading our organisation units

#Get the OUS
ous<-fromJSON("https://raw.githubusercontent.com/jason-p-pickering/dhis2-data-munging/master/ous/ous.json")
#Create the URL
url<-sprintf("%sapi/27/metadata?importStrategy=CREATE&atomicMode=NONE",baseurl)
#Post to the metadata API as JSON
r<-POST(url,body=toJSON(ous,auto_unbox = TRUE),content_type_json())
assert_that(r$status_code == 200L)
## [1] TRUE

Testing our organisation units are there

#Request all of the orgunit IDs from the server
url<-paste0(baseurl,"/api/organisationUnits?fields=id&paging=false")
ous_from_server<-fromJSON(content(GET(url),"text"))
assert_that(all.equal(sort(ous$organisationUnits$id) , 
                      sort(ous_from_server$organisationUnits$id)))
## [1] TRUE

Setting organisation unit levels

organisationUnitLevels<-data.frame(level=c(1,2,3),
                                   name=c("Global","Continent","Country"))
#Generate some UIDs
organisationUnitLevels$uid<-sapply(rep(11,nrow(organisationUnitLevels)),generateUID)
url<-paste0(baseurl,"api/27/metadata?importStrategy=CREATE&atomicMode=NONE")
#Post to the metadata API as JSON
r<-POST(url,body=toJSON(list(organisationUnitLevels = organisationUnitLevels),
                        auto_unbox = TRUE),
                        content_type_json())
assert_that(r$status_code == 200L)
## [1] TRUE

Set your user’s organisation unit

#We want to be a global user
url<-paste0(baseurl,"/api/organisationUnits?filter=name:eq:Global&fields=id")
global_uid<-fromJSON(content(GET(url),"text"))
url<-paste0(baseurl,"api/me")
me<-fromJSON(content(GET(url),"text"))
url<-paste0(baseurl,"api/users/",me$id)
me<-fromJSON(content(GET(url),"text"))
me$organisationUnits<-list(list(id = global_uid$organisationUnits$id))
url<-paste0(baseurl,"api/27/metadata?importStrategy=UPDATE")
r<-POST(url,body=toJSON(list(users=list(me)),auto_unbox = TRUE),content_type_json() )
assert_that(r$status_code == 200L)
## [1] TRUE

Create a data entry user role

set.seed(99377721)
userRole_UID<-generateUID()
require(XML)
## Loading required package: XML
dxf<-newXMLDoc()
metadata<-newXMLNode("metadata",namespaceDefinitions = c("http://dhis2.org/schema/dxf/2.0"),doc=dxf)
userRoles<-newXMLNode("userRoles",parent=metadata)
attribs<-c(name="Data entry clerk",id=userRole_UID)
userRole<-newXMLNode("userRole",attrs=attribs,parent=userRoles)
authorities<-newXMLNode("authorities",parent = userRole)
authorities_list<-c("F_DATAVALUE_DELETE",
"M_dhis-web-dataentry",
"M_dhis-web-mapping",
"M_dhis-web-validationrule",
"F_RUN_VALIDATION",
"M_dhis-web-dashboard-integration",
"F_DATAVALUE_ADD",
"M_dhis-web-visualizer")

for ( i in 1:length(authorities_list)) {
  authority<-newXMLNode("authority",authorities_list[i],parent=authorities)}

url<-paste0(baseurl,"/api/metadata")
r<-POST(url,body=as(dxf,"character"),content_type_xml())
dxf
## <?xml version="1.0"?>
## <metadata xmlns="http://dhis2.org/schema/dxf/2.0">
##   <userRoles>
##     <userRole name="Data entry clerk" id="heHcSyqKC3N">
##       <authorities>
##         <authority>F_DATAVALUE_DELETE</authority>
##         <authority>M_dhis-web-dataentry</authority>
##         <authority>M_dhis-web-mapping</authority>
##         <authority>M_dhis-web-validationrule</authority>
##         <authority>F_RUN_VALIDATION</authority>
##         <authority>M_dhis-web-dashboard-integration</authority>
##         <authority>F_DATAVALUE_ADD</authority>
##         <authority>M_dhis-web-visualizer</authority>
##       </authorities>
##     </userRole>
##   </userRoles>
## </metadata>
## 

Create some users

users_list<-read.csv("https://raw.githubusercontent.com/jason-p-pickering/dhis2-data-munging/master/bootstrap/users.csv")
head(users_list)
##   id first_name last_name                    email gender      ip_address
## 1  1      Jeana      Beat      jbeat0@china.com.cn Female    2.116.167.47
## 2  2    Lennard  McKerley      lmckerley1@ucsd.edu   Male 215.194.204.234
## 3  3     Cathee     Dicky cdicky2@chronoengine.com Female   162.43.77.104
## 4  4      Lanie    Tilney    ltilney3@cbslocal.com   Male   163.190.83.51
## 5  5     Vivyan    Reding    vreding4@facebook.com Female   101.169.69.42
## 6  6    Scottie     Hamon   shamon5@washington.edu   Male   97.143.37.147
## Users need passwords

genPassword<-function(passwordLength=8) {
a<-sample(LETTERS,1)
b<-sample(c(0:9),1)
c<-sample(letters,passwordLength-2)
d<-c(a,b,c)
password<-paste(sample(d,size=passwordLength,replace=FALSE),sep="",collapse="")
return(password)
}
set.seed(22884882)
users_list$password<-sapply(rep(8,nrow(users_list)),genPassword)
users_list$user_uid<-sapply(rep(11,nrow(users_list)),generateUID)
users_list$user_credentials_id<-sapply(rep(11,nrow(users_list)),generateUID)

head(users_list)
##   id first_name last_name                    email gender      ip_address
## 1  1      Jeana      Beat      jbeat0@china.com.cn Female    2.116.167.47
## 2  2    Lennard  McKerley      lmckerley1@ucsd.edu   Male 215.194.204.234
## 3  3     Cathee     Dicky cdicky2@chronoengine.com Female   162.43.77.104
## 4  4      Lanie    Tilney    ltilney3@cbslocal.com   Male   163.190.83.51
## 5  5     Vivyan    Reding    vreding4@facebook.com Female   101.169.69.42
## 6  6    Scottie     Hamon   shamon5@washington.edu   Male   97.143.37.147
##   password    user_uid user_credentials_id
## 1 pykf8haF kyfeitE2Zrl         ZGu28HkSJCF
## 2 S6mdzqkr ZvYXdpcmZAz         po1Hp47j0wS
## 3 dZuego2z O9XaQSvkNqV         OkItM4h6QYb
## 4 5xuraWtf T4n7ph2PifD         F3MJY4D6rx5
## 5 wm8gyeNd dSoM8Xn6bVw         phmUVP4J7NM
## 6 kpvn4ueO A2iF9mWetar         cXC7BgjWnS0

Create the XML for the users

require(XML)
dxf<-newXMLDoc()
metadata<-newXMLNode("metadata",namespaceDefinitions = c("http://dhis2.org/schema/dxf/2.0"),doc=dxf)
users<-newXMLNode("users",parent=metadata)

for (i in 1:nrow( users_list) ) {
  this.row<-users_list[i,]
  usercode<-paste0(this.row$first_name,this.row$last_name)
  attribs<-c(id=this.row$user_uid,code=as.character(usercode)) 
  user<-newXMLNode("user",attrs=attribs,parent=users)
  surname<-newXMLNode("surname",this.row$last_name,parent=user)
  firstName<-newXMLNode("firstName",this.row$first_name,parent=user)
  userCredentials<-newXMLNode("userCredentials",
                              attrs=c(code=usercode,
                                      id=this.row$user_credentials_id,
                                      created=format(Sys.time(),"%Y-%m-%dT%H:%M:%S+0000")),
                              parent=user)
  username.xml<-newXMLNode("username",usercode,parent=userCredentials)
  this.password<-newXMLNode("password",this.row$password,parent=userCredentials)
  selfRegistered<-newXMLNode("selfRegistered","false",parent=userCredentials)
  disabled<-newXMLNode("disabled","false",parent=userCredentials)
  userInfo_node<-newXMLNode("userInfo",attrs=c(id=this.row$user_uid),parent=userCredentials)
  user_node<-newXMLNode("user",attrs=c(id=this.row$user_uid),parent=userCredentials)
  userRoles<-newXMLNode("userRoles",parent=userCredentials)
  userAuthorityGroup<-newXMLNode("userRole",attrs=c(id=userRole_UID),parent=userRoles)
  organisationUnits<-newXMLNode("organisationUnits",parent=user)
  attribs<-c(id=as.character(global_uid$organisationUnits$id)) 
  orgunit<-newXMLNode("organisationUnit",attrs=attribs,parent=organisationUnits)
  dataViewOrganisationUnits<-newXMLNode("dataViewOrganisationUnits",parent=user)
  attribs<-c(id=as.character(global_uid$organisationUnits$id)) 
  orgunit<-newXMLNode("dataViewOrganisationUnit",attrs=attribs,parent=dataViewOrganisationUnits)

}

url<-paste0(baseurl,"/api/metadata")
r<-POST(url,body=as(dxf,"character"),content_type_xml())

Data element creation

des<-read.csv("https://extranet.who.int/tme/generateCSV.asp?ds=dictionary")
url<-paste0(baseurl,"api/schemas/dataElement/name")
de_schema<-fromJSON(content(GET(url),"text"))
des$name<-substring(des$definition,0,de_schema$length)
#Check and be sure that no names are duplicated.
assert_that(Reduce("&",duplicated(des$name)) == FALSE)
## [1] TRUE

Loading the data elements

#Data elements
set.seed(94005004)
des_import<-data.frame(name=des$name
                       ,code=des$variable_name
                       ,shortName=des$variable_name,
                       aggregationType="SUM",
                       valueType="NUMBER",
                       domainType="AGGREGATE")
des_import$id<-sapply(rep(11,nrow(des_import)),generateUID)
url<-paste0(baseurl,"api/27/metadata?importStrategy=CREATE")
#Post to the metadata API as JSON
r<-POST(url,body=toJSON(list(dataElements = des_import),
                        auto_unbox = TRUE),
                        content_type_json())
#assert_that(fromJSON(content(r,"text"))$stats$created == nrow(des))

Data element groups

#Get the groups and assign some UIDs.
de_groups<-data.frame(name=unique(des$dataset))
de_groups$dataset_id<-sapply(rep(11,nrow(de_groups)),generateUID)
names(de_groups)<-c("dataset","dataset_id")

#Lets create a map of data elements and which data element groups they belong to
des_degroups<-merge(des[,c("variable_name","dataset")],
           des_import[,c("code","id")],
           by.x="variable_name",by.y="code")
des_degroups<-merge(des_degroups,de_groups,by="dataset")

[{“name”:[“Community engagement”],“id”:[“V9b7hWU2ZPa”],“dataElements”:[{“id”:“v8gJOREcVnC”},{“id”:“Aw8mhaeS6gb”},{“id”:“Rfjg6LEMGyb”},{“id”:“OBwq1fzkT

Summary on metadata import

Transforming and importing some data

tb<-read.csv("https://extranet.who.int/tme/generateCSV.asp?ds=estimates",
             stringsAsFactors=FALSE)
tb_cases<-read.csv("https://extranet.who.int/tme/generateCSV.asp?ds=notifications",
                   stringsAsFactors = FALSE)
knitr::kable(tb[1:2,c("iso3","year","e_pop_num","e_inc_100k")])
iso3 year e_pop_num e_inc_100k
AFG 2000 19701940 190
AFG 2001 20531160 189
tb<-reshape2::melt(tb,id.vars=c("country","iso2","iso3","iso_numeric",
                                "g_whoregion","year"))
tb<-tb[!is.na(tb$value),]
tb_cases<-reshape2::melt(tb_cases,id.vars=c("country","iso2","iso3",
                                            "iso_numeric","g_whoregion","year"))
tb_cases<-tb_cases[!is.na(tb_cases$value),]
tb<-rbind(tb,tb_cases)
knitr::kable(tb_cases[1:2,])
country iso2 iso3 iso_numeric g_whoregion year variable value
18 Afghanistan AF AFG 4 EMR 1997 new_sp 618
19 Afghanistan AF AFG 4 EMR 1998 new_sp 1833

Merging the data with the organisation units

tb<-merge(tb,des_import[,c("code","id")],by.x="variable",by.y="code")
knitr::kable(tb[1:2,])
variable country iso2 iso3 iso_numeric g_whoregion year value id
all_conf_xdr Afghanistan AF AFG 4 EMR 2015 1 hkZEgnPCNis
all_conf_xdr Albania AL ALB 8 EUR 2015 0 hkZEgnPCNis
#We need to get the Country codes
r <- GET(paste0(baseurl,"api/27/organisationUnits?paging=false&filter=level:eq:3&fields=id,code"))
r<- httr::content(r, "text")
ous<-jsonlite::fromJSON(r,flatten=TRUE)$organisationUnits
names(ous)<-c("ou_code","ou_id")
#Merge/INNER JOIN the OUs with the data
tb<-merge(tb,ous,by.x="iso3",by.y="ou_code")
knitr::kable(tb[1:2,])
iso3 variable country iso2 iso_numeric g_whoregion year value id ou_id
ABW e_pop_num Aruba AW 533 AMR 2005 100031 goH6vP5XtCq wFtLm5IujWA
ABW e_pop_num Aruba AW 533 AMR 2006 100830 goH6vP5XtCq wFtLm5IujWA

Fine tuning prior to import

tb_out<-tb[,c("id","year","ou_id","value")]
tb_out<-tb_out[!is.na(tb_out$value),]
tb_out<-plyr::colwise(as.character)(tb_out)
names(tb_out)<-c("dataElement","period","orgUnit","value")
knitr::kable(head(tb_out))
dataElement period orgUnit value
goH6vP5XtCq 2005 wFtLm5IujWA 100031
goH6vP5XtCq 2006 wFtLm5IujWA 100830
HLT8faniodR 2011 wFtLm5IujWA 0
RqKNafpPB3o 2000 wFtLm5IujWA 10
VjkTb4G6rwX 2012 wFtLm5IujWA 1
XGMfURZiW2V 2008 wFtLm5IujWA 0.64

Data import and analytics

#Import the data, skipping checks for existing values
url<-paste0(baseurl,"api/27/dataValueSets?preheatCache=true&skipExistingCheck=true")
r<-POST(url,body=toJSON(list(dataValues = tb_out),auto_unbox = TRUE),content_type_json())
#Lets trigger analytics
url<-paste0(baseurl,"api/27/resourceTables/analytics")
r<-POST(url)
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.

What did we end up with?

#Period dimensions
start_year<-min(unique(tb_cases[tb_cases$variable == "c_newinc",c("year")]))
end_year<-max(unique(tb_cases[tb_cases$variable == "c_newinc",c("year")]))
years<-paste(seq(start_year,end_year),sep="",collapse=";")
#Data element dimension
c_newinc<-fromJSON(content(GET(paste0(baseurl,"api/dataElements?filter=code:eq:c_newinc&fields=[id,name]")),"text"))$dataElements$id
#Orgunit dimension
sa<-fromJSON(content(GET(paste0(baseurl,
                          "api/organisationUnits?filter=name:eq:South%20Africa&fields=[id,name]")),
                     "text"))$organisationUnits$id
sl<-fromJSON(content(GET(paste0(baseurl,
                          "api/organisationUnits?filter=name:eq:Sierra%20Leone&fields=[id,name]")),
                     "text"))$organisationUnits$id
#Assemble the URL
url<-paste0(baseurl,"api/27/analytics.json?")
url<-paste0(url,"dimension=ou:",sa,";",sl)
url<-paste0(url,"&dimension=pe:",years)
url<-paste0(url,"&filter=dx:",c_newinc)
url<-paste0(url,"&displayProperty=NAME&skipMeta=false")
data<-fromJSON(content(GET(url),"text"))
#We need to munge the data a bit to get it into a suitable form.
metadata<-do.call(rbind,lapply(data$metaData$items,
                               data.frame,stringsAsFactors=FALSE))
metadata$from<-row.names(metadata)
this_data<-data.frame(data$rows)
names(this_data)<-data$headers$name
this_data$ou<-plyr::mapvalues(this_data$ou,metadata$from,
                              metadata$name,warn_missing=FALSE)
this_data$value<-as.numeric(as.character(this_data$value))/1000

Lets look at the data

#And, create the graph
g <- ggplot(data=this_data,aes(x=pe,y=value,group=ou, color=ou)) 
g <- g + geom_line() 
g <- g + geom_point()
g <- g +ggtitle("Reported TB cases in Sierra Leone and South Africa") 
g <- g + labs(x = "Year", y = "Number of cases (thousands)",fill = NULL)
g <- g + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g

Summary goes here.


  1. https://en.wikipedia.org/wiki/Bootstrapping

  2. https://renkun.me/blog/2014/01/28/difference-between-assignment-operators-in-r.html

  3. https://github.com/pgracio/dhis2-docker