東京大学UMINセンター 岡田 昌史
2016.11.26
<ItemDef OID="IT.CM.CMDOSU" Name="CMDOSU" DataType="text" Length="8"
SASFieldName="CMDOSU">
<Description>
<TranslatedText xml:lang="en">Dose Units</TranslatedText>
</Description>
<CodeListRef CodeListOID="CL.C78417.CMDOSU"/>
<def:Origin Type="CRF">
<def:DocumentRef leafID="LF.blankcrf">
<def:PDFPageRef PageRefs="9 22" Type="PhysicalRef"/>
</def:DocumentRef>
</def:Origin>
</ItemDef>
library(validate)
v <- validator(Sepal.Length > 5)
result <- confront(iris,v)
summary(result)
rule items passes fails nNA error warning expression
1 V1 150 118 32 0 FALSE FALSE Sepal.Length > 5
rules:
-
expr: nchar(as.character(DOMAIN)) <= 2
name: Length of DOMAIN
description: DOMAIN変数は長さ2以下でなければならない
v <- validator(.file="rules.yaml")
# CMSTDTC変数はYYYY-MM-DDTHH:MM:SS形式でなければならない
regexpr("^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})$",as.character(CMSTDTC)) == 1
v <- validator(.file="rules.txt")
library(magrittr)
v <- validator(Sepal.Length > 5, Species %in% c("setosa", "virginica"), Petal.Width+Sepal.Width < Petal.Length*3)
iris %>% confront(v) %>% summary
rule items passes fails nNA error warning
1 V1 150 118 32 0 FALSE FALSE
2 V2 150 100 50 0 FALSE FALSE
3 V3 150 145 5 0 FALSE FALSE
expression
1 Sepal.Length > 5
2 Species %in% c("setosa", "virginica")
3 Petal.Width + Sepal.Width < Petal.Length * 3
iris %>% confront(v) %>% barplot
library(R4DSXML)
md <- getVarMD("Odm_Define.xml")
head(md)
IR_ItemOID IGD_Name IR_OrderNumber IR_Mandatory IR_KeySequence
1 IT.CM.DOMAIN CM 2 Yes NA
2 IT.CM.CMSEQ CM 4 Yes NA
3 IT.CM.CMTRT CM 5 Yes 6
4 IT.CM.CMDOSE CM 6 No 7
5 IT.CM.CMDOSU CM 7 No 8
6 IT.CM.CMSTDTC CM 8 No 3
ID_Name ID_Length ID_SignificantDigits ID_DataType
1 DOMAIN 2 <NA> text
2 CMSEQ 2 <NA> integer
3 CMTRT 23 <NA> text
4 CMDOSE 4 <NA> integer
5 CMDOSU 8 <NA> text
6 CMSTDTC <NA> <NA> date
ID_Label ID_SASFieldName
1 Domain Abbreviation DOMAIN
2 Sequence Number CMSEQ
3 Reported Name of Drug, Med, or\n\t\t\t\t\t\tTherapy CMTRT
4 Dose CMDOSE
5 Dose Units CMDOSU
6 Start Date/Time of Medication CMSTDTC
ID_SASFormatName ID_CodeListOID ID_OriginType ID_OriginDescription
1 NA CL.DOMAIN Assigned NA
2 NA <NA> Derived NA
3 NA <NA> CRF NA
4 NA <NA> CRF NA
5 NA CL.C78417.CMDOSU CRF NA
6 NA <NA> CRF NA
ds <- read.dataset.xml("Odm_LB.xml", "Odm_Define.xml")
head(ds)
DOMAIN LBSEQ LBREFID LBTESTCD LBTEST LBCAT LBORRES
1 LB 1 B232115 BILI Bilirubin CHEMISTRY 0.443434
2 LB 2 B290028 BILI Bilirubin CHEMISTRY 0.3
3 LB 3 B232115 BUN Blood Urea Nitrogen CHEMISTRY 26
4 LB 4 B290028 BUN Blood Urea Nitrogen CHEMISTRY 18
5 LB 5 B232115 GLUC Glucose CHEMISTRY 100
6 LB 6 B290028 GLUC Glucose CHEMISTRY 87
LBORRESU LBORNRLO LBORNRHI LBSTRESC LBSTRESN LBSTRESU LBSTNRLO LBSTNRHI
1 mg/dL .0 1.1 6.8 6.80 umol/L 0.00 18.80
2 mg/dL .0 1.1 5.1 5.10 umol/L 0.00 18.80
3 mg/dL 9 24 9.28 9.28 mmol/L 3.21 8.57
4 mg/dL 9 24 6.43 6.43 mmol/L 3.21 8.57
5 mg/dL 68 110 5.5 5.50 mmol/L 3.80 6.10
6 mg/dL 68 110 4.8 4.80 mmol/L 3.80 6.10
LBNRIND LBSPEC LBBLFL LBFAST VISITNUM VISIT VISITDY LBDTC
1 NORMAL BLOOD Y Y 1 SCREEN -13 2003-04-15T11:20
2 NORMAL BLOOD <NA> N 10 WEEK 24 169 2003-10-13T11:55
3 HIGH BLOOD Y Y 1 SCREEN -13 2003-04-15T11:20
4 NORMAL BLOOD <NA> N 10 WEEK 24 169 2003-10-13T11:55
5 NORMAL BLOOD Y Y 1 SCREEN -13 2003-04-15T11:20
6 NORMAL BLOOD <NA> Y 10 WEEK 24 169 2003-10-13T11:55
LBDY
1 -14
2 168
3 -14
4 168
5 -14
6 168
library(devtools)
source_gist("8c4c9d8a7b6338e11045055573a26303",filename="define2validate.R")
Domain = "CM"
define2validate(Domain, file="exampleRules.yaml", definexml="Odm_Define.xml")
v <- validator(.file="exampleRules.yaml")
data <- read.dataset.xml(paste("Odm_", Domain, ".xml", sep=""), "Odm_Define.xml")
# getCT()はR4DSXMLの関数、Define-XMLの中の用語辞書部分を読み込む
CT <- getCT("Odm_Define.xml")
cf <- confront(data,v)
options(width=80)
summary(cf)
rule items passes fails nNA error
1 Length of DOMAIN 2 2 0 0 FALSE
2 DOMAIN is mandatory 2 2 0 0 FALSE
3 DOMAIN should follow codelist CL.DOMAIN 2 2 0 0 FALSE
4 Length of CMSEQ 2 2 0 0 FALSE
5 CMSEQ is mandatory 2 2 0 0 FALSE
6 CMSEQ should be integer 2 2 0 0 FALSE
7 Length of CMTRT 2 2 0 0 FALSE
8 CMTRT is mandatory 2 2 0 0 FALSE
9 Length of CMDOSE 2 2 0 0 FALSE
10 CMDOSE should be integer 2 2 0 0 FALSE
11 Length of CMDOSU 2 2 0 0 FALSE
12 CMDOSU should follow codelist CL.C78417.CMDOSU 2 1 1 0 FALSE
13 CMSTDTC should be Date 2 1 1 0 FALSE
warning
1 FALSE
2 FALSE
3 FALSE
4 FALSE
5 FALSE
6 FALSE
7 FALSE
8 FALSE
9 FALSE
10 FALSE
11 FALSE
12 FALSE
13 FALSE
expression
1 nchar(as.character(DOMAIN)) <= 2
2 !is.na(DOMAIN)
3 as.character(DOMAIN) %in% CT[CT[, "OID"] == "CL.DOMAIN", "CodedValue"]
4 nchar(as.character(CMSEQ)) <= 2
5 !is.na(CMSEQ)
6 regexpr("^[0-9-]+$", as.character(CMSEQ)) == 1
7 nchar(as.character(CMTRT)) <= 23
8 !is.na(CMTRT)
9 nchar(as.character(CMDOSE)) <= 4
10 regexpr("^[0-9-]+$", as.character(CMDOSE)) == 1
11 nchar(as.character(CMDOSU)) <= 8
12 as.character(CMDOSU) %in% CT[CT[, "OID"] == "CL.C78417.CMDOSU", "CodedValue"]
13 regexpr("^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})$", as.character(CMSTDTC)) == 1
<ItemGroupData ItemGroupOID="IG.CM" data:ItemGroupDataSeq="2">
<ItemData ItemOID="IT.STUDYID" Value="P-OpenDolphin-sample"/>
<ItemData ItemOID="IT.CM.DOMAIN" Value="CM"/>
<ItemData ItemOID="IT.USUBJID" Value="P-OpenDolphin-sample.100008"/>
<ItemData ItemOID="IT.CM.CMSEQ" Value="2"/>
<ItemData ItemOID="IT.CM.CMTRT" Value="クレストール錠"/>
<ItemData ItemOID="IT.CM.CMDOSE" Value="5"/>
<!-- "mgg"は単位用語の辞書に載っていない -->
<ItemData ItemOID="IT.CM.CMDOSU" Value="mgg"/>
<!-- 日付の最後が...-->
<ItemData ItemOID="IT.CM.CMSTDTC" Value="2016-03-16T10:56:401"/>
</ItemGroupData>