ODM

XML format for forms has advantages. But parsing it is hard.

Examples

library(xml2);library(magrittr);library(readr);library(tibble);library(knitr)   

fname<-'https://raw.githubusercontent.com/wardblonde/ODM-to-i2b2/master/odm/examples/CDISC_ODM_example_3.xml'
fname
## [1] "https://raw.githubusercontent.com/wardblonde/ODM-to-i2b2/master/odm/examples/CDISC_ODM_example_3.xml"
x<-read_xml(fname)

#some documents have default namespace and it has to be used (hence d1: prefix)
#going after Forms
xpath <-  "//d1:FormDef"
forms <- x %>% xml_find_all(xpath, ns=xml_ns(x)) %>%  xml_attr('Name')

head(forms)
## [1] "Adverse Events"       "Concom Meds"          "Demography"          
## [4] "Treatment Assignment" "Pharmacokinetics"     "Physical Exam"
#how many forms
length(forms)
## [1] 6
write_csv(as_data_frame(forms),'forms.csv')

#questions
xpath="//d1:ItemDef"
items <- x %>% xml_find_all(xpath, ns=xml_ns(x)) %>%  xml_attr('Name')
items <- x %>% xml_find_all(xpath, ns=xml_ns(x))
items
## {xml_nodeset (94)}
##  [1] <ItemDef OID="IT.ABNORM" Name="Normal/Abnormal/Not Done" DataType=" ...
##  [2] <ItemDef OID="IT.AEACTTRT" Name="Actions taken re study drug" DataT ...
##  [3] <ItemDef OID="IT.AECONTRT" Name="Actions taken, other" DataType="te ...
##  [4] <ItemDef OID="IT.AEENDAY" Name="Stop Day - Enter Two Digits 01-31"  ...
##  [5] <ItemDef OID="IT.AEENDT" Name="Derived Stop Date" DataType="text" L ...
##  [6] <ItemDef OID="IT.AEENMON" Name="Stop Month - Enter Two Digits 01-12 ...
##  [7] <ItemDef OID="IT.AEENYR" Name="Stop Year - Enter Four Digit Year" D ...
##  [8] <ItemDef OID="IT.AEOUT" Name="Outcome" DataType="text" Length="1" S ...
##  [9] <ItemDef OID="IT.AEREL" Name="Relationship to study drug" DataType= ...
## [10] <ItemDef OID="IT.AESEV" Name="Severity" DataType="text" Length="1"  ...
## [11] <ItemDef OID="IT.AESTDAY" Name="Start Day - Enter Two Digits 01-31" ...
## [12] <ItemDef OID="IT.AESTDT" Name="Derived Start Date" DataType="text"  ...
## [13] <ItemDef OID="IT.AESTMON" Name="Start Month - Enter Two Digits 01-1 ...
## [14] <ItemDef OID="IT.AESTYR" Name="Start Year - Enter Four Digit Year"  ...
## [15] <ItemDef OID="IT.AETERM" Name="Conmed Indication" DataType="text" L ...
## [16] <ItemDef OID="IT.AE_RELAT" Name="Check If Taken for AE" DataType="t ...
## [17] <ItemDef OID="IT.BODY_SYS" Name="Physical Exam Body System" DataTyp ...
## [18] <ItemDef OID="IT.COMMT1" Name="Comment" DataType="text" Length="200 ...
## [19] <ItemDef OID="IT.DOB" Name="Date of Birth" DataType="text" Length=" ...
## [20] <ItemDef OID="IT.DOSEUNIT" Name="Dose and Unit" DataType="text" Len ...
## ...

all forms listed

kable(as_data_frame(forms))

value

Adverse Events
Concom Meds
Demography
Treatment Assignment Pharmacokinetics
Physical Exam