knitr::opts_chunk$set(echo = TRUE)
library(Capr)
library(Eunomia)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::unit() masks Capr::unit()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
cohort is reserved, we will use ch as variable for cohort same with cs for concpetset so we will use xyzConceptSet
#simple cohort
ch <- cohort(
entry = entry(
drugExposure(cs(descendants(937368),name='infliximab'))
),
exit = exit(endStrategy = observationExit())
)
#as list
chList <- ch |> toCirce()
#2 options below give same/similar output
cohortJson <- ch |>
toCirce() |>
jsonlite::toJSON(pretty = TRUE, auto_unbox = TRUE)
cohortJson2 <- ch |> toCirce() |> jsonlite::toJSON(pretty = TRUE, auto_unbox = TRUE) |> as.character()
#looking at the structure
str(chList,max.level = 1)
## List of 9
## $ ConceptSets :List of 1
## $ PrimaryCriteria :List of 3
## $ QualifiedLimit :List of 1
## $ ExpressionLimit :List of 1
## $ InclusionRules : list()
## $ CensoringCriteria: list()
## $ CollapseSettings :List of 2
## $ CensorWindow : Named list()
## $ cdmVersionRange : chr ">=5.0.0"
str(chList,max.level = 2)
## List of 9
## $ ConceptSets :List of 1
## ..$ :List of 3
## $ PrimaryCriteria :List of 3
## ..$ CriteriaList :List of 1
## ..$ ObservationWindow :List of 2
## ..$ PrimaryCriteriaLimit:List of 1
## $ QualifiedLimit :List of 1
## ..$ Type: chr "First"
## $ ExpressionLimit :List of 1
## ..$ Type: chr "First"
## $ InclusionRules : list()
## $ CensoringCriteria: list()
## $ CollapseSettings :List of 2
## ..$ CollapseType: chr "ERA"
## ..$ EraPad : int 0
## $ CensorWindow : Named list()
## $ cdmVersionRange : chr ">=5.0.0"
#full listing
chList
## $ConceptSets
## $ConceptSets[[1]]
## $ConceptSets[[1]]$id
## [1] 0
##
## $ConceptSets[[1]]$name
## [1] "infliximab"
##
## $ConceptSets[[1]]$expression
## $ConceptSets[[1]]$expression$items
## $ConceptSets[[1]]$expression$items[[1]]
## $ConceptSets[[1]]$expression$items[[1]]$concept
## $ConceptSets[[1]]$expression$items[[1]]$concept$CONCEPT_ID
## [1] 937368
##
## $ConceptSets[[1]]$expression$items[[1]]$concept$CONCEPT_NAME
## [1] ""
##
## $ConceptSets[[1]]$expression$items[[1]]$concept$STANDARD_CONCEPT
## [1] ""
##
## $ConceptSets[[1]]$expression$items[[1]]$concept$STANDARD_CONCEPT_CAPTION
## [1] ""
##
## $ConceptSets[[1]]$expression$items[[1]]$concept$INVALID_REASON
## [1] ""
##
## $ConceptSets[[1]]$expression$items[[1]]$concept$INVALID_REASON_CAPTION
## [1] ""
##
## $ConceptSets[[1]]$expression$items[[1]]$concept$CONCEPT_CODE
## [1] ""
##
## $ConceptSets[[1]]$expression$items[[1]]$concept$DOMAIN_ID
## [1] ""
##
## $ConceptSets[[1]]$expression$items[[1]]$concept$VOCABULARY_ID
## [1] ""
##
## $ConceptSets[[1]]$expression$items[[1]]$concept$CONCEPT_CLASS_ID
## [1] ""
##
##
## $ConceptSets[[1]]$expression$items[[1]]$isExcluded
## [1] FALSE
##
## $ConceptSets[[1]]$expression$items[[1]]$includeDescendants
## [1] TRUE
##
## $ConceptSets[[1]]$expression$items[[1]]$includeMapped
## [1] FALSE
##
##
##
##
##
##
## $PrimaryCriteria
## $PrimaryCriteria$CriteriaList
## $PrimaryCriteria$CriteriaList[[1]]
## $PrimaryCriteria$CriteriaList[[1]]$DrugExposure
## $PrimaryCriteria$CriteriaList[[1]]$DrugExposure$CodesetId
## [1] 0
##
##
##
##
## $PrimaryCriteria$ObservationWindow
## $PrimaryCriteria$ObservationWindow$PriorDays
## [1] 0
##
## $PrimaryCriteria$ObservationWindow$PostDays
## [1] 0
##
##
## $PrimaryCriteria$PrimaryCriteriaLimit
## $PrimaryCriteria$PrimaryCriteriaLimit$Type
## [1] "First"
##
##
##
## $QualifiedLimit
## $QualifiedLimit$Type
## [1] "First"
##
##
## $ExpressionLimit
## $ExpressionLimit$Type
## [1] "First"
##
##
## $InclusionRules
## list()
##
## $CensoringCriteria
## list()
##
## $CollapseSettings
## $CollapseSettings$CollapseType
## [1] "ERA"
##
## $CollapseSettings$EraPad
## [1] 0
##
##
## $CensorWindow
## named list()
##
## $cdmVersionRange
## [1] ">=5.0.0"
#list as json fully
cat(cohortJson)
## {
## "ConceptSets": [
## {
## "id": 0,
## "name": "infliximab",
## "expression": {
## "items": [
## {
## "concept": {
## "CONCEPT_ID": 937368,
## "CONCEPT_NAME": "",
## "STANDARD_CONCEPT": "",
## "STANDARD_CONCEPT_CAPTION": "",
## "INVALID_REASON": "",
## "INVALID_REASON_CAPTION": "",
## "CONCEPT_CODE": "",
## "DOMAIN_ID": "",
## "VOCABULARY_ID": "",
## "CONCEPT_CLASS_ID": ""
## },
## "isExcluded": false,
## "includeDescendants": true,
## "includeMapped": false
## }
## ]
## }
## }
## ],
## "PrimaryCriteria": {
## "CriteriaList": [
## {
## "DrugExposure": {
## "CodesetId": 0
## }
## }
## ],
## "ObservationWindow": {
## "PriorDays": 0,
## "PostDays": 0
## },
## "PrimaryCriteriaLimit": {
## "Type": "First"
## }
## },
## "QualifiedLimit": {
## "Type": "First"
## },
## "ExpressionLimit": {
## "Type": "First"
## },
## "InclusionRules": [],
## "CensoringCriteria": [],
## "CollapseSettings": {
## "CollapseType": "ERA",
## "EraPad": 0
## },
## "CensorWindow": {},
## "cdmVersionRange": ">=5.0.0"
## }
#same output
#cat(cohortJson2)
library(Capr)
#better definition that uses first defining a conceptset
#and then in the defintiion we in fact need to refer to it twice
drugConceptSet<-cs(descendants(937368),name = "infliximab")
ch1<-cohort(
entry = entry(
drugExposure(drugConceptSet),primaryCriteriaLimit = "All",
qualifiedLimit = "All"
),
exit = exit(drugExit(drugConceptSet,persistenceWindow = 30))
)
## Loading required namespace: testthat
library(RJSONIO)
#further exploring the error
#RJSONIO::fromJSON(cohortJson2)
library(CirceR)
data(cohortDefinitionJson)
#cat(cohortDefinitionJson)
#run the cohort
library(Eunomia)
#giBleedCohortJson <- as.json(giBleedCohort)
#gives error if we use cohortJson but if casted to character and we use cohortJson2 it works
sql <- CirceR::buildCohortQuery(
expression = CirceR::cohortExpressionFromJson(cohortJson2),
options = CirceR::createGenerateOptions(generateStats = FALSE)
)
# #when using JSON from circeR it works and gives no error
sql <- CirceR::buildCohortQuery(
expression = CirceR::cohortExpressionFromJson(cohortDefinitionJson),
options = CirceR::createGenerateOptions(generateStats = FALSE)
)
# cat(sql)
# e1=CirceR::cohortExpressionFromJson(cohortDefinitionJson)
# e1=CirceR::cohortExpressionFromJson(cohortJson2)
# e1
connectionDetails <- Eunomia::getEunomiaConnectionDetails()
## attempting to download GiBleed
## attempting to extract and load: ~/vardb/eunomia_df/GiBleed_5.3.zip to: ~/vardb/eunomia_df/GiBleed_5.3.sqlite
#giBleedCohortJson <- as.json(giBleedCohort)
#empty table, we will be adding rows to it with each new cohort
cohortsToCreate <- data.frame(
cohortId = integer(),
cohortName = character(),
sql = character()
)
#adding one cohort to it
cohortsToCreate<- bind_rows(cohortsToCreate, data.frame(cohortId = 1,cohortName = "some name",sql = sql))
cohortsToCreate %>% select(1,2)
## cohortId cohortName
## 1 1 some name
#install.packages('CohortGenerator')
library(CohortGenerator)
## Loading required package: DatabaseConnector
##
## Attaching package: 'DatabaseConnector'
## The following objects are masked from 'package:lubridate':
##
## day, month, year
## Loading required package: R6
#this creates a set of several tables
cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable = "my_cohort_table")
#let's see which tables are created
str(cohortTableNames)
## List of 7
## $ cohortTable : chr "my_cohort_table"
## $ cohortSampleTable : chr "my_cohort_table"
## $ cohortInclusionTable : chr "my_cohort_table_inclusion"
## $ cohortInclusionResultTable: chr "my_cohort_table_inclusion_result"
## $ cohortInclusionStatsTable : chr "my_cohort_table_inclusion_stats"
## $ cohortSummaryStatsTable : chr "my_cohort_table_summary_stats"
## $ cohortCensorStatsTable : chr "my_cohort_table_censor_stats"
#now we create them in the database (not just the names)
CohortGenerator::createCohortTables(
connectionDetails = connectionDetails,
cohortDatabaseSchema = "main",
cohortTableNames = cohortTableNames
)
## Connecting using SQLite driver
## Creating cohort tables
## - Created table main.my_cohort_table
## - Created table main.my_cohort_table
## - Created table main.my_cohort_table_inclusion
## - Created table main.my_cohort_table_inclusion_result
## - Created table main.my_cohort_table_inclusion_stats
## - Created table main.my_cohort_table_summary_stats
## - Created table main.my_cohort_table_censor_stats
## Creating cohort tables took 0.03secs
# Generate the cohorts
cohortsGenerated <- CohortGenerator::generateCohortSet(
connectionDetails = connectionDetails,
cdmDatabaseSchema = "main",
cohortDatabaseSchema = "main",
cohortTableNames = cohortTableNames,
cohortDefinitionSet = cohortsToCreate
)
## Connecting using SQLite driver
## Initiating cluster consisting only of main thread
## 1/1- Generating cohort: some name (id = 1)
## | | | 0% | |=== | 4% | |===== | 8% | |======== | 12% | |=========== | 15% | |============= | 19% | |================ | 23% | |=================== | 27% | |====================== | 31% | |======================== | 35% | |=========================== | 38% | |============================== | 42% | |================================ | 46% | |=================================== | 50% | |====================================== | 54% | |======================================== | 58% | |=========================================== | 62% | |============================================== | 65% | |================================================ | 69% | |=================================================== | 73% | |====================================================== | 77% | |========================================================= | 81% | |=========================================================== | 85% | |============================================================== | 88% | |================================================================= | 92% | |=================================================================== | 96% | |======================================================================| 100%
## Executing SQL took 0.00508 secs
## Generating cohort set took 0.03 secs
# Get the cohort counts
cohortCounts <- CohortGenerator::getCohortCounts(
connectionDetails = connectionDetails,
cohortDatabaseSchema = "main",
cohortTable = cohortTableNames$cohortTable
)
## Connecting using SQLite driver
## Counting cohorts took 0.0203 secs
cohortCounts
## [1] cohortId cohortEntries cohortSubjects
## <0 rows> (or 0-length row.names)
library(Capr)
GIBleed <- cs(descendants(192671), name = "GIbleed")
GIBleed
## ── <Capr Concept Set> GIbleed ──────────────────────────────────────────────────
## # A tibble: 1 × 9
## conceptId conceptCode conceptName domainId vocabularyId standardConcept
## <int> <chr> <chr> <chr> <chr> <chr>
## 1 192671 "" "" "" "" ""
## # ℹ 3 more variables: includeDescendants <lgl>, isExcluded <lgl>,
## # includeMapped <lgl>
giBleedCohort <- cohort(
entry = entry(
conditionOccurrence(GIBleed),
observationWindow = continuousObservation(0L, 0L),
primaryCriteriaLimit = "First"
),
exit = exit(
endStrategy = observationExit()
)
)
giBleedCohort
## Formal class 'Cohort' [package "Capr"] with 4 slots
## ..@ entry :Formal class 'CohortEntry' [package "Capr"] with 5 slots
## ..@ attrition:Formal class 'CohortAttrition' [package "Capr"] with 2 slots
## ..@ exit :Formal class 'CohortExit' [package "Capr"] with 2 slots
## ..@ era :Formal class 'CohortEra' [package "Capr"] with 3 slots
#connectionDetails <- Eunomia::getEunomiaConnectionDetails()
giBleedCohortJson <- giBleedCohort |> toCirce() |> jsonlite::toJSON(pretty = TRUE, auto_unbox = TRUE) |> as.character()
#giBleedCohortJson <- as.json(giBleedCohort) %>% as.character()
sql <- CirceR::buildCohortQuery(
expression = CirceR::cohortExpressionFromJson(giBleedCohortJson),
options = CirceR::createGenerateOptions(generateStats = FALSE)
)
cohortsToCreate<- bind_rows(cohortsToCreate, tibble::tibble(cohortId = 2,cohortName = "GI Bleed",sql = sql))
csVisit <- cs(descendants(9202), name = "outpatient visit")
csVisit
## ── <Capr Concept Set> outpatient visit ─────────────────────────────────────────
## # A tibble: 1 × 9
## conceptId conceptCode conceptName domainId vocabularyId standardConcept
## <int> <chr> <chr> <chr> <chr> <chr>
## 1 9202 "" "" "" "" ""
## # ℹ 3 more variables: includeDescendants <lgl>, isExcluded <lgl>,
## # includeMapped <lgl>
ch2 <- cohort(
entry = entry(
visit(csVisit)
),
exit = exit(
endStrategy = observationExit()
)
)
cohortsToCreate<- bind_rows(cohortsToCreate, tibble::tibble(cohortId = 3,cohortName = "visit"
,sql = CirceR::buildCohortQuery(
expression = CirceR::cohortExpressionFromJson(ch2 |>toCirce()|>jsonlite::toJSON(pretty = TRUE, auto_unbox = TRUE) |> as.character()),
options = CirceR::createGenerateOptions(generateStats = FALSE)
)))
cohortsToCreate %>% select(1,2)
## cohortId cohortName
## 1 1 some name
## 2 2 GI Bleed
## 3 3 visit
cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable = "my_cohort_table")
CohortGenerator::createCohortTables(connectionDetails = connectionDetails,cohortDatabaseSchema = "main",cohortTableNames= cohortTableNames)
## Connecting using SQLite driver
## Creating cohort tables
## - Created table main.my_cohort_table
## - Created table main.my_cohort_table
## - Created table main.my_cohort_table_inclusion
## - Created table main.my_cohort_table_inclusion_result
## - Created table main.my_cohort_table_inclusion_stats
## - Created table main.my_cohort_table_summary_stats
## - Created table main.my_cohort_table_censor_stats
## Creating cohort tables took 0.01secs
# Generate the cohorts
cohortsGenerated <- CohortGenerator::generateCohortSet(
connectionDetails = connectionDetails,
cdmDatabaseSchema = "main",
cohortDatabaseSchema = "main",
cohortTableNames = cohortTableNames,
cohortDefinitionSet = cohortsToCreate
)
## Connecting using SQLite driver
## Initiating cluster consisting only of main thread
## 1/3- Generating cohort: some name (id = 1)
## | | | 0% | |=== | 4% | |===== | 8% | |======== | 12% | |=========== | 15% | |============= | 19% | |================ | 23% | |=================== | 27% | |====================== | 31% | |======================== | 35% | |=========================== | 38% | |============================== | 42% | |================================ | 46% | |=================================== | 50% | |====================================== | 54% | |======================================== | 58% | |=========================================== | 62% | |============================================== | 65% | |================================================ | 69% | |=================================================== | 73% | |====================================================== | 77% | |========================================================= | 81% | |=========================================================== | 85% | |============================================================== | 88% | |================================================================= | 92% | |=================================================================== | 96% | |======================================================================| 100%
## Executing SQL took 0.00546 secs
## 2/3- Generating cohort: GI Bleed (id = 2)
## | | | 0% | |=== | 4% | |===== | 8% | |======== | 12% | |=========== | 15% | |============= | 19% | |================ | 23% | |=================== | 27% | |====================== | 31% | |======================== | 35% | |=========================== | 38% | |============================== | 42% | |================================ | 46% | |=================================== | 50% | |====================================== | 54% | |======================================== | 58% | |=========================================== | 62% | |============================================== | 65% | |================================================ | 69% | |=================================================== | 73% | |====================================================== | 77% | |========================================================= | 81% | |=========================================================== | 85% | |============================================================== | 88% | |================================================================= | 92% | |=================================================================== | 96% | |======================================================================| 100%
## Executing SQL took 0.0124 secs
## 3/3- Generating cohort: visit (id = 3)
## | | | 0% | |=== | 4% | |===== | 8% | |======== | 12% | |=========== | 15% | |============= | 19% | |================ | 23% | |=================== | 27% | |====================== | 31% | |======================== | 35% | |=========================== | 38% | |============================== | 42% | |================================ | 46% | |=================================== | 50% | |====================================== | 54% | |======================================== | 58% | |=========================================== | 62% | |============================================== | 65% | |================================================ | 69% | |=================================================== | 73% | |====================================================== | 77% | |========================================================= | 81% | |=========================================================== | 85% | |============================================================== | 88% | |================================================================= | 92% | |=================================================================== | 96% | |======================================================================| 100%
## Executing SQL took 0.00587 secs
## Generating cohort set took 0.07 secs
# Get the cohort counts
cohortCounts <- CohortGenerator::getCohortCounts(
connectionDetails = connectionDetails,
cohortDatabaseSchema = "main",
cohortTable = cohortTableNames$cohortTable
)
## Connecting using SQLite driver
## Counting cohorts took 0.00849 secs
cohortCounts
## cohortId cohortEntries cohortSubjects
## 1 2 479 479
#more examples
https://forums.ohdsi.org/t/capr-error-and-phenotype-feb-idea-cohorts-by-code-may-be-faster/23268
https://forums.ohdsi.org/t/clarification-on-entry-criteria-for-cohort-definition-in-capr/23783
https://github.com/OHDSInflammation/StudyPackage/blob/main/R/support/CapRCohort.R#L20
https://github.com/ohdsi-studies/PhenotypePhebruaryCapr/blob/main/results/day3/R/afibCapr.R
For more information on sub-components of a cohort definition via
circe-be, users should watch at
https://www.youtube.com/@chrisknoll2007 created by Chris
Knoll outlining these ideas. while these videos utilize ATLAS,
Capr follows the same principles.
https://github.com/ohdsi-studies/PhenotypePhebruaryCapr/blob/main/results/day3/R/afibCapr.R