Jason Pickering
August 6, 2017
require(httr)
require(jsonlite)
require(assertthat)
require(rlist)
require(reshape2)
require(ggplot2)
We will load a few helpful libraries
generateUID <- function(codeSize = 11) {
#Generate a random seed
runif(1)
allowedLetters <- c(LETTERS, letters)
allowedChars <- c(LETTERS, letters, 0:9)
#First character must be a letter according to the DHIS2 spec
firstChar <- sample(allowedLetters, 1)
otherChars <- sample(allowedChars, codeSize - 1)
uid <- paste(c(firstChar,paste(
otherChars, sep = "", collapse = "" )), sep = "", collapse = "")
return(uid)
}
uid
like y4yFsgveSfu for you at api/system/id
startTime<-Sys.time()
setwd("/home/jason/development/dhis2-data-munging/")
baseurl<-"http://localhost:8080/dhis/"
username<-"admin"
password<-"district"
#This will give us a cookie we can use for later use.
loginDHIS2<-function(baseurl,username,password) {
url<-paste0(baseurl,"api/me")
r<-GET(url,authenticate(username,password))
assert_that(r$status_code == 200L) }
loginDHIS2(baseurl,username,password)
## [1] TRUE
admin:district
worked, so lets proceed.setwd
to a directory on your machine.#Get the OUS
ous<-fromJSON("https://raw.githubusercontent.com/jason-p-pickering/dhis2-data-munging/master/ous/ous.json")
#Create the URL
url<-paste0(baseurl,"api/27/metadata?importStrategy=CREATE&atomicMode=NONE")
#Post to the metadata API as JSON
r<-POST(url,body=toJSON(ous,auto_unbox = TRUE),content_type_json())
assert_that(r$status_code == 200L)
## [1] TRUE
POST
this to the metadata API, turning off atomicMode
for now.#Request all of the orgunit IDs from the server
url<-paste0(baseurl,"/api/organisationUnits?fields=id&paging=false")
ous_from_server<-fromJSON(content(GET(url),"text"))
assert_that(all.equal(sort(ous$organisationUnits$id) ,
sort(ous_from_server$organisationUnits$id)))
## [1] TRUE
organisationUnitLevels<-data.frame(level=c(1,2,3),
name=c("Global","Continent","Country"))
#Generate some UIDs
organisationUnitLevels$uid<-sapply(rep(11,nrow(organisationUnitLevels)),generateUID)
url<-paste0(baseurl,"api/27/metadata?importStrategy=CREATE&atomicMode=NONE")
#Post to the metadata API as JSON
r<-POST(url,body=toJSON(list(organisationUnitLevels = organisationUnitLevels),
auto_unbox = TRUE),
content_type_json())
assert_that(r$status_code == 200L)
## [1] TRUE
Global
organisation unit.POST
the metadata back and request that it be updated.#We want to be a global user
url<-paste0(baseurl,"/api/organisationUnits?filter=name:eq:Global&fields=id")
global_uid<-fromJSON(content(GET(url),"text"))
url<-paste0(baseurl,"api/me")
me<-fromJSON(content(GET(url),"text"))
url<-paste0(baseurl,"api/users/",me$id)
me<-fromJSON(content(GET(url),"text"))
me$organisationUnits<-list(list(id = global_uid$organisationUnits$id))
url<-paste0(baseurl,"api/27/metadata?importStrategy=UPDATE")
r<-POST(url,body=toJSON(list(users=list(me)),auto_unbox = TRUE),content_type_json() )
assert_that(r$status_code == 200L)
## [1] TRUE
des<-read.csv("https://extranet.who.int/tme/generateCSV.asp?ds=dictionary")
des$name<-substring(des$definition,0,230)
#Check and be sure that no names are duplicated.
assert_that(Reduce("&",duplicated(des$name)) == FALSE)
## [1] TRUE
name
,shortName
,aggregationType
and valueType
are required.api/schemas/
endpoint for the details.#Data elements
set.seed(94005004)
des_import<-data.frame(name=des$name
,code=des$variable_name
,shortName=des$variable_name,
aggregationType="SUM",
valueType="NUMBER",
domainType="AGGREGATE")
des_import$id<-sapply(rep(11,nrow(des_import)),generateUID)
url<-paste0(baseurl,"api/27/metadata?importStrategy=CREATE")
#Post to the metadata API as JSON
r<-POST(url,body=toJSON(list(dataElements = des_import),
auto_unbox = TRUE),
content_type_json())
#assert_that(fromJSON(content(r,"text"))$stats$created == nrow(des))
api/schemas/dataElement
for details.#Get the groups and assign some UIDs.
de_groups<-data.frame(name=unique(des$dataset))
de_groups$id<-sapply(rep(11,nrow(de_groups)),generateUID)
#Lets create a map of data elements and which data element groups they belong to
foo<-merge(des[,c("variable_name","dataset")],
des_import[,c("code","id")],
by.x="variable_name",by.y="code")
[{“name”:[“Community engagement”],“id”:[“V9b7hWU2ZPa”],“dataElements”:[{“id”:[“Rfjg6LEMGyb”]},{“id”:[“OBwq1fzkTm4”]},{“id”:[“v8gJOREcVnC”]},{“id”:[“A5
tb<-read.csv("https://extranet.who.int/tme/generateCSV.asp?ds=estimates",
stringsAsFactors=FALSE)
tb_cases<-read.csv("https://extranet.who.int/tme/generateCSV.asp?ds=notifications",
stringsAsFactors = FALSE)
knitr::kable(tb[1:2,c("iso3","year","e_pop_num","e_inc_100k")])
iso3 | year | e_pop_num | e_inc_100k |
---|---|---|---|
AFG | 2000 | 19701940 | 190 |
AFG | 2001 | 20531160 | 189 |
wide
format, and we need to have it in long
formatLong
format means we need a single data element/period/organisation unit combination on a single row.NULLS
will make things fastertb<-reshape2::melt(tb,id.vars=c("country","iso2","iso3","iso_numeric",
"g_whoregion","year"))
tb<-tb[!is.na(tb$value),]
tb_cases<-reshape2::melt(tb_cases,id.vars=c("country","iso2","iso3",
"iso_numeric","g_whoregion","year"))
tb_cases<-tb_cases[!is.na(tb_cases$value),]
tb<-rbind(tb,tb_cases)
knitr::kable(tb_cases[1:2,])
country | iso2 | iso3 | iso_numeric | g_whoregion | year | variable | value | |
---|---|---|---|---|---|---|---|---|
18 | Afghanistan | AF | AFG | 4 | EMR | 1997 | new_sp | 618 |
19 | Afghanistan | AF | AFG | 4 | EMR | 1998 | new_sp | 1833 |
long
formattb<-merge(tb,des_import[,c("code","id")],by.x="variable",by.y="code")
knitr::kable(tb[1:2,])
variable | country | iso2 | iso3 | iso_numeric | g_whoregion | year | value | id |
---|---|---|---|---|---|---|---|---|
all_conf_xdr | Afghanistan | AF | AFG | 4 | EMR | 2015 | 1 | hkZEgnPCNis |
all_conf_xdr | Albania | AL | ALB | 8 | EUR | 2015 | 0 | hkZEgnPCNis |
#We need to get the Country codes
r <- GET(paste0(baseurl,"api/27/organisationUnits?paging=false&filter=level:eq:3&fields=id,code"))
r<- httr::content(r, "text")
ous<-jsonlite::fromJSON(r,flatten=TRUE)$organisationUnits
names(ous)<-c("ou_code","ou_id")
#Merge/INNER JOIN the OUs with the data
tb<-merge(tb,ous,by.x="iso3",by.y="ou_code")
knitr::kable(tb[1:2,])
iso3 | variable | country | iso2 | iso_numeric | g_whoregion | year | value | id | ou_id |
---|---|---|---|---|---|---|---|---|---|
ABW | e_pop_num | Aruba | AW | 533 | AMR | 2005 | 100031 | goH6vP5XtCq | wFtLm5IujWA |
ABW | e_pop_num | Aruba | AW | 533 | AMR | 2006 | 100830 | goH6vP5XtCq | wFtLm5IujWA |
NULL
valuestb_out<-tb[,c("id","year","ou_id","value")]
tb_out<-tb_out[!is.na(tb_out$value),]
tb_out<-plyr::colwise(as.character)(tb_out)
names(tb_out)<-c("dataElement","period","orgUnit","value")
knitr::kable(head(tb_out))
dataElement | period | orgUnit | value |
---|---|---|---|
goH6vP5XtCq | 2005 | wFtLm5IujWA | 100031 |
goH6vP5XtCq | 2006 | wFtLm5IujWA | 100830 |
HLT8faniodR | 2011 | wFtLm5IujWA | 0 |
RqKNafpPB3o | 2000 | wFtLm5IujWA | 10 |
VjkTb4G6rwX | 2012 | wFtLm5IujWA | 1 |
XGMfURZiW2V | 2008 | wFtLm5IujWA | 0.64 |
#Import the data, skipping checks for existing values
url<-paste0(baseurl,"api/27/dataValueSets?preheatCache=true&skipExistingCheck=true")
r<-POST(url,body=toJSON(list(dataValues = tb_out),auto_unbox = TRUE),content_type_json())
#Lets trigger analytics
url<-paste0(baseurl,"api/27/resourceTables/analytics")
r<-POST(url)
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
#Period dimensions
start_year<-min(unique(tb_cases[tb_cases$variable == "c_newinc",c("year")]))
end_year<-max(unique(tb_cases[tb_cases$variable == "c_newinc",c("year")]))
years<-paste(seq(start_year,end_year),sep="",collapse=";")
#Data element dimension
c_newinc<-fromJSON(content(GET(paste0(baseurl,"api/dataElements?filter=code:eq:c_newinc&fields=[id,name]")),"text"))$dataElements$id
#Orgunit dimension
sa<-fromJSON(content(GET(paste0(baseurl,
"api/organisationUnits?filter=name:eq:South%20Africa&fields=[id,name]")),
"text"))$organisationUnits$id
sl<-fromJSON(content(GET(paste0(baseurl,
"api/organisationUnits?filter=name:eq:Sierra%20Leone&fields=[id,name]")),
"text"))$organisationUnits$id
#Assemble the URL
url<-paste0(baseurl,"api/27/analytics.json?")
url<-paste0(url,"dimension=ou:",sa,";",sl)
url<-paste0(url,"&dimension=pe:",years)
url<-paste0(url,"&filter=dx:",c_newinc)
url<-paste0(url,"&displayProperty=NAME&skipMeta=false")
data<-fromJSON(content(GET(url),"text"))
#We need to munge the data a bit to get it into a suitable form.
metadata<-do.call(rbind,lapply(data$metaData$items,
data.frame,stringsAsFactors=FALSE))
metadata$from<-row.names(metadata)
this_data<-data.frame(data$rows)
names(this_data)<-data$headers$name
this_data$ou<-plyr::mapvalues(this_data$ou,metadata$from,
metadata$name,warn_missing=FALSE)
this_data$value<-as.numeric(as.character(this_data$value))/1000
#And, create the graph
g <- ggplot(data=this_data,aes(x=pe,y=value,group=ou, color=ou))
g <- g + geom_line()
g <- g + geom_point()
g <- g +ggtitle("Reported TB cases in Sierra Leone and South Africa")
g <- g + labs(x = "Year", y = "Number of cases (thousands)",fill = NULL)
g <- g + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g