Jason Pickering
August 6, 2017
<-
is the same as =
(sort of)2$
is the same as the .
operator in JavaScript when working with lists.paste0
is the same as +
or concatenate
GET
and POST
refer to HTTP verbsdata.frame
is basically a flat table of dataapply
family of functions basically applies a given function over a list. Similar to JS’s map
fromJSON
should be pretty clear, but creates an R object from a JSON string.merge
is similar to a JOIN
in SQL.require(httr)
require(jsonlite)
require(assertthat)
require(rlist)
require(reshape2)
require(ggplot2)
We will load a few helpful libraries:
libcurl
generateUID <- function(codeSize = 11) {
#Generate a random seed
runif(1)
allowedLetters <- c(LETTERS, letters)
allowedChars <- c(LETTERS, letters, 0:9)
#First character must be a letter according to the DHIS2 spec
firstChar <- sample(allowedLetters, 1)
otherChars <- sample(allowedChars, codeSize - 1)
uid <- paste(c(firstChar,paste(
otherChars, sep = "", collapse = "" )), sep = "", collapse = "")
return(uid)
}
uid
like dxuDvHFPm4r for you at api/system/id
startTime<-Sys.time()
baseurl<-"http://localhost:8080/dhis/"
username<-"admin"
password<-"district"
#This will give us a cookie we can use for later use.
loginDHIS2<-function(baseurl,username,password) {
url<-paste0(baseurl,"api/me")
r<-GET(url,authenticate(username,password))
assert_that(r$status_code == 200L) }
loginDHIS2(baseurl,username,password)
## [1] TRUE
admin:district
allowed us to login.#Get the OUS
ous<-fromJSON("https://raw.githubusercontent.com/jason-p-pickering/dhis2-data-munging/master/ous/ous.json")
#Create the URL
url<-sprintf("%sapi/27/metadata?importStrategy=CREATE&atomicMode=NONE",baseurl)
#Post to the metadata API as JSON
r<-POST(url,body=toJSON(ous,auto_unbox = TRUE),content_type_json())
assert_that(r$status_code == 200L)
## [1] TRUE
POST
this to the metadata API, turning off atomicMode
for now.#Request all of the orgunit IDs from the server
url<-paste0(baseurl,"/api/organisationUnits?fields=id&paging=false")
ous_from_server<-fromJSON(content(GET(url),"text"))
assert_that(all.equal(sort(ous$organisationUnits$id) ,
sort(ous_from_server$organisationUnits$id)))
## [1] TRUE
organisationUnitLevels<-data.frame(level=c(1,2,3),
name=c("Global","Continent","Country"))
#Generate some UIDs
organisationUnitLevels$uid<-sapply(rep(11,nrow(organisationUnitLevels)),generateUID)
url<-paste0(baseurl,"api/27/metadata?importStrategy=CREATE&atomicMode=NONE")
#Post to the metadata API as JSON
r<-POST(url,body=toJSON(list(organisationUnitLevels = organisationUnitLevels),
auto_unbox = TRUE),
content_type_json())
assert_that(r$status_code == 200L)
## [1] TRUE
Global
organisation unit.POST
the metadata back and request that it be updated.#We want to be a global user
url<-paste0(baseurl,"/api/organisationUnits?filter=name:eq:Global&fields=id")
global_uid<-fromJSON(content(GET(url),"text"))
url<-paste0(baseurl,"api/me")
me<-fromJSON(content(GET(url),"text"))
url<-paste0(baseurl,"api/users/",me$id)
me<-fromJSON(content(GET(url),"text"))
me$organisationUnits<-list(list(id = global_uid$organisationUnits$id))
url<-paste0(baseurl,"api/27/metadata?importStrategy=UPDATE")
r<-POST(url,body=toJSON(list(users=list(me)),auto_unbox = TRUE),content_type_json() )
assert_that(r$status_code == 200L)
## [1] TRUE
set.seed(99377721)
userRole_UID<-generateUID()
require(XML)
## Loading required package: XML
dxf<-newXMLDoc()
metadata<-newXMLNode("metadata",namespaceDefinitions = c("http://dhis2.org/schema/dxf/2.0"),doc=dxf)
userRoles<-newXMLNode("userRoles",parent=metadata)
attribs<-c(name="Data entry clerk",id=userRole_UID)
userRole<-newXMLNode("userRole",attrs=attribs,parent=userRoles)
authorities<-newXMLNode("authorities",parent = userRole)
authorities_list<-c("F_DATAVALUE_DELETE",
"M_dhis-web-dataentry",
"M_dhis-web-mapping",
"M_dhis-web-validationrule",
"F_RUN_VALIDATION",
"M_dhis-web-dashboard-integration",
"F_DATAVALUE_ADD",
"M_dhis-web-visualizer")
for ( i in 1:length(authorities_list)) {
authority<-newXMLNode("authority",authorities_list[i],parent=authorities)}
url<-paste0(baseurl,"/api/metadata")
r<-POST(url,body=as(dxf,"character"),content_type_xml())
dxf
## <?xml version="1.0"?>
## <metadata xmlns="http://dhis2.org/schema/dxf/2.0">
## <userRoles>
## <userRole name="Data entry clerk" id="heHcSyqKC3N">
## <authorities>
## <authority>F_DATAVALUE_DELETE</authority>
## <authority>M_dhis-web-dataentry</authority>
## <authority>M_dhis-web-mapping</authority>
## <authority>M_dhis-web-validationrule</authority>
## <authority>F_RUN_VALIDATION</authority>
## <authority>M_dhis-web-dashboard-integration</authority>
## <authority>F_DATAVALUE_ADD</authority>
## <authority>M_dhis-web-visualizer</authority>
## </authorities>
## </userRole>
## </userRoles>
## </metadata>
##
users_list<-read.csv("https://raw.githubusercontent.com/jason-p-pickering/dhis2-data-munging/master/bootstrap/users.csv")
head(users_list)
## id first_name last_name email gender ip_address
## 1 1 Jeana Beat jbeat0@china.com.cn Female 2.116.167.47
## 2 2 Lennard McKerley lmckerley1@ucsd.edu Male 215.194.204.234
## 3 3 Cathee Dicky cdicky2@chronoengine.com Female 162.43.77.104
## 4 4 Lanie Tilney ltilney3@cbslocal.com Male 163.190.83.51
## 5 5 Vivyan Reding vreding4@facebook.com Female 101.169.69.42
## 6 6 Scottie Hamon shamon5@washington.edu Male 97.143.37.147
## Users need passwords
genPassword<-function(passwordLength=8) {
a<-sample(LETTERS,1)
b<-sample(c(0:9),1)
c<-sample(letters,passwordLength-2)
d<-c(a,b,c)
password<-paste(sample(d,size=passwordLength,replace=FALSE),sep="",collapse="")
return(password)
}
set.seed(22884882)
users_list$password<-sapply(rep(8,nrow(users_list)),genPassword)
users_list$user_uid<-sapply(rep(11,nrow(users_list)),generateUID)
users_list$user_credentials_id<-sapply(rep(11,nrow(users_list)),generateUID)
head(users_list)
## id first_name last_name email gender ip_address
## 1 1 Jeana Beat jbeat0@china.com.cn Female 2.116.167.47
## 2 2 Lennard McKerley lmckerley1@ucsd.edu Male 215.194.204.234
## 3 3 Cathee Dicky cdicky2@chronoengine.com Female 162.43.77.104
## 4 4 Lanie Tilney ltilney3@cbslocal.com Male 163.190.83.51
## 5 5 Vivyan Reding vreding4@facebook.com Female 101.169.69.42
## 6 6 Scottie Hamon shamon5@washington.edu Male 97.143.37.147
## password user_uid user_credentials_id
## 1 pykf8haF kyfeitE2Zrl ZGu28HkSJCF
## 2 S6mdzqkr ZvYXdpcmZAz po1Hp47j0wS
## 3 dZuego2z O9XaQSvkNqV OkItM4h6QYb
## 4 5xuraWtf T4n7ph2PifD F3MJY4D6rx5
## 5 wm8gyeNd dSoM8Xn6bVw phmUVP4J7NM
## 6 kpvn4ueO A2iF9mWetar cXC7BgjWnS0
require(XML)
dxf<-newXMLDoc()
metadata<-newXMLNode("metadata",namespaceDefinitions = c("http://dhis2.org/schema/dxf/2.0"),doc=dxf)
users<-newXMLNode("users",parent=metadata)
for (i in 1:nrow( users_list) ) {
this.row<-users_list[i,]
usercode<-paste0(this.row$first_name,this.row$last_name)
attribs<-c(id=this.row$user_uid,code=as.character(usercode))
user<-newXMLNode("user",attrs=attribs,parent=users)
surname<-newXMLNode("surname",this.row$last_name,parent=user)
firstName<-newXMLNode("firstName",this.row$first_name,parent=user)
userCredentials<-newXMLNode("userCredentials",
attrs=c(code=usercode,
id=this.row$user_credentials_id,
created=format(Sys.time(),"%Y-%m-%dT%H:%M:%S+0000")),
parent=user)
username.xml<-newXMLNode("username",usercode,parent=userCredentials)
this.password<-newXMLNode("password",this.row$password,parent=userCredentials)
selfRegistered<-newXMLNode("selfRegistered","false",parent=userCredentials)
disabled<-newXMLNode("disabled","false",parent=userCredentials)
userInfo_node<-newXMLNode("userInfo",attrs=c(id=this.row$user_uid),parent=userCredentials)
user_node<-newXMLNode("user",attrs=c(id=this.row$user_uid),parent=userCredentials)
userRoles<-newXMLNode("userRoles",parent=userCredentials)
userAuthorityGroup<-newXMLNode("userRole",attrs=c(id=userRole_UID),parent=userRoles)
organisationUnits<-newXMLNode("organisationUnits",parent=user)
attribs<-c(id=as.character(global_uid$organisationUnits$id))
orgunit<-newXMLNode("organisationUnit",attrs=attribs,parent=organisationUnits)
dataViewOrganisationUnits<-newXMLNode("dataViewOrganisationUnits",parent=user)
attribs<-c(id=as.character(global_uid$organisationUnits$id))
orgunit<-newXMLNode("dataViewOrganisationUnit",attrs=attribs,parent=dataViewOrganisationUnits)
}
url<-paste0(baseurl,"/api/metadata")
r<-POST(url,body=as(dxf,"character"),content_type_xml())
api/metadata
as usual.des<-read.csv("https://extranet.who.int/tme/generateCSV.asp?ds=dictionary")
url<-paste0(baseurl,"api/schemas/dataElement/name")
de_schema<-fromJSON(content(GET(url),"text"))
They are also restricted to 230 characters
Let’s be sure our names are not too long.
des$name<-substring(des$definition,0,de_schema$length)
#Check and be sure that no names are duplicated.
assert_that(Reduce("&",duplicated(des$name)) == FALSE)
## [1] TRUE
name
,shortName
,aggregationType
and valueType
are required.api/schemas/dataElement
endpoint for the details.#Data elements
set.seed(94005004)
des_import<-data.frame(name=des$name
,code=des$variable_name
,shortName=des$variable_name,
aggregationType="SUM",
valueType="NUMBER",
domainType="AGGREGATE")
des_import$id<-sapply(rep(11,nrow(des_import)),generateUID)
url<-paste0(baseurl,"api/27/metadata?importStrategy=CREATE")
#Post to the metadata API as JSON
r<-POST(url,body=toJSON(list(dataElements = des_import),
auto_unbox = TRUE),
content_type_json())
#assert_that(fromJSON(content(r,"text"))$stats$created == nrow(des))
set.seed
to ensure the process is reproducible.#Get the groups and assign some UIDs.
de_groups<-data.frame(name=unique(des$dataset))
de_groups$dataset_id<-sapply(rep(11,nrow(de_groups)),generateUID)
names(de_groups)<-c("dataset","dataset_id")
#Lets create a map of data elements and which data element groups they belong to
des_degroups<-merge(des[,c("variable_name","dataset")],
des_import[,c("code","id")],
by.x="variable_name",by.y="code")
des_degroups<-merge(des_degroups,de_groups,by="dataset")
[{“name”:[“Community engagement”],“id”:[“V9b7hWU2ZPa”],“dataElements”:[{“id”:“v8gJOREcVnC”},{“id”:“Aw8mhaeS6gb”},{“id”:“Rfjg6LEMGyb”},{“id”:“OBwq1fzkT
tb<-read.csv("https://extranet.who.int/tme/generateCSV.asp?ds=estimates",
stringsAsFactors=FALSE)
tb_cases<-read.csv("https://extranet.who.int/tme/generateCSV.asp?ds=notifications",
stringsAsFactors = FALSE)
knitr::kable(tb[1:2,c("iso3","year","e_pop_num","e_inc_100k")])
iso3 | year | e_pop_num | e_inc_100k |
---|---|---|---|
AFG | 2000 | 19701940 | 190 |
AFG | 2001 | 20531160 | 189 |
wide
format, and we need to have it in long
formatLong
format means we need a single data element+period+organisation unit combination for each data value.NULLS
will make things fastertb<-reshape2::melt(tb,id.vars=c("country","iso2","iso3","iso_numeric",
"g_whoregion","year"))
tb<-tb[!is.na(tb$value),]
tb_cases<-reshape2::melt(tb_cases,id.vars=c("country","iso2","iso3",
"iso_numeric","g_whoregion","year"))
tb_cases<-tb_cases[!is.na(tb_cases$value),]
tb<-rbind(tb,tb_cases)
knitr::kable(tb_cases[1:2,])
country | iso2 | iso3 | iso_numeric | g_whoregion | year | variable | value | |
---|---|---|---|---|---|---|---|---|
18 | Afghanistan | AF | AFG | 4 | EMR | 1997 | new_sp | 618 |
19 | Afghanistan | AF | AFG | 4 | EMR | 1998 | new_sp | 1833 |
long
formattb<-merge(tb,des_import[,c("code","id")],by.x="variable",by.y="code")
knitr::kable(tb[1:2,])
variable | country | iso2 | iso3 | iso_numeric | g_whoregion | year | value | id |
---|---|---|---|---|---|---|---|---|
all_conf_xdr | Afghanistan | AF | AFG | 4 | EMR | 2015 | 1 | hkZEgnPCNis |
all_conf_xdr | Albania | AL | ALB | 8 | EUR | 2015 | 0 | hkZEgnPCNis |
#We need to get the Country codes
r <- GET(paste0(baseurl,"api/27/organisationUnits?paging=false&filter=level:eq:3&fields=id,code"))
r<- httr::content(r, "text")
ous<-jsonlite::fromJSON(r,flatten=TRUE)$organisationUnits
names(ous)<-c("ou_code","ou_id")
#Merge/INNER JOIN the OUs with the data
tb<-merge(tb,ous,by.x="iso3",by.y="ou_code")
knitr::kable(tb[1:2,])
iso3 | variable | country | iso2 | iso_numeric | g_whoregion | year | value | id | ou_id |
---|---|---|---|---|---|---|---|---|---|
ABW | e_pop_num | Aruba | AW | 533 | AMR | 2005 | 100031 | goH6vP5XtCq | wFtLm5IujWA |
ABW | e_pop_num | Aruba | AW | 533 | AMR | 2006 | 100830 | goH6vP5XtCq | wFtLm5IujWA |
NULL
valuestb_out<-tb[,c("id","year","ou_id","value")]
tb_out<-tb_out[!is.na(tb_out$value),]
tb_out<-plyr::colwise(as.character)(tb_out)
names(tb_out)<-c("dataElement","period","orgUnit","value")
knitr::kable(head(tb_out))
dataElement | period | orgUnit | value |
---|---|---|---|
goH6vP5XtCq | 2005 | wFtLm5IujWA | 100031 |
goH6vP5XtCq | 2006 | wFtLm5IujWA | 100830 |
HLT8faniodR | 2011 | wFtLm5IujWA | 0 |
RqKNafpPB3o | 2000 | wFtLm5IujWA | 10 |
VjkTb4G6rwX | 2012 | wFtLm5IujWA | 1 |
XGMfURZiW2V | 2008 | wFtLm5IujWA | 0.64 |
#Import the data, skipping checks for existing values
url<-paste0(baseurl,"api/27/dataValueSets?preheatCache=true&skipExistingCheck=true")
r<-POST(url,body=toJSON(list(dataValues = tb_out),auto_unbox = TRUE),content_type_json())
#Lets trigger analytics
url<-paste0(baseurl,"api/27/resourceTables/analytics")
r<-POST(url)
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
## No encoding supplied: defaulting to UTF-8.
#Period dimensions
start_year<-min(unique(tb_cases[tb_cases$variable == "c_newinc",c("year")]))
end_year<-max(unique(tb_cases[tb_cases$variable == "c_newinc",c("year")]))
years<-paste(seq(start_year,end_year),sep="",collapse=";")
#Data element dimension
c_newinc<-fromJSON(content(GET(paste0(baseurl,"api/dataElements?filter=code:eq:c_newinc&fields=[id,name]")),"text"))$dataElements$id
#Orgunit dimension
sa<-fromJSON(content(GET(paste0(baseurl,
"api/organisationUnits?filter=name:eq:South%20Africa&fields=[id,name]")),
"text"))$organisationUnits$id
sl<-fromJSON(content(GET(paste0(baseurl,
"api/organisationUnits?filter=name:eq:Sierra%20Leone&fields=[id,name]")),
"text"))$organisationUnits$id
#Assemble the URL
url<-paste0(baseurl,"api/27/analytics.json?")
url<-paste0(url,"dimension=ou:",sa,";",sl)
url<-paste0(url,"&dimension=pe:",years)
url<-paste0(url,"&filter=dx:",c_newinc)
url<-paste0(url,"&displayProperty=NAME&skipMeta=false")
data<-fromJSON(content(GET(url),"text"))
#We need to munge the data a bit to get it into a suitable form.
metadata<-do.call(rbind,lapply(data$metaData$items,
data.frame,stringsAsFactors=FALSE))
metadata$from<-row.names(metadata)
this_data<-data.frame(data$rows)
names(this_data)<-data$headers$name
this_data$ou<-plyr::mapvalues(this_data$ou,metadata$from,
metadata$name,warn_missing=FALSE)
this_data$value<-as.numeric(as.character(this_data$value))/1000
#And, create the graph
g <- ggplot(data=this_data,aes(x=pe,y=value,group=ou, color=ou))
g <- g + geom_line()
g <- g + geom_point()
g <- g +ggtitle("Reported TB cases in Sierra Leone and South Africa")
g <- g + labs(x = "Year", y = "Number of cases (thousands)",fill = NULL)
g <- g + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g