knitr::opts_chunk$set(echo = TRUE)
library(Capr)
library(Eunomia)
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ✖ ggplot2::unit() masks Capr::unit()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

Cohorts by code (Capr)

cohort is reserved, we will use ch as variable for cohort same with cs for concpetset so we will use xyzConceptSet

#simple cohort
ch <- cohort(
  entry = entry(
    drugExposure(cs(descendants(937368),name='infliximab'))
  ),
  exit = exit(endStrategy = observationExit())
)

#as list
chList <- ch |> toCirce() 


#2 options below give same/similar output
cohortJson <- ch |>
  toCirce() |>
  jsonlite::toJSON(pretty = TRUE, auto_unbox = TRUE) 

cohortJson2 <- ch |>  toCirce() |>  jsonlite::toJSON(pretty = TRUE, auto_unbox = TRUE) |>  as.character()

#looking at the structure
str(chList,max.level = 1)

## List of 9
##  $ ConceptSets      :List of 1
##  $ PrimaryCriteria  :List of 3
##  $ QualifiedLimit   :List of 1
##  $ ExpressionLimit  :List of 1
##  $ InclusionRules   : list()
##  $ CensoringCriteria: list()
##  $ CollapseSettings :List of 2
##  $ CensorWindow     : Named list()
##  $ cdmVersionRange  : chr ">=5.0.0"

str(chList,max.level = 2)

## List of 9
##  $ ConceptSets      :List of 1
##   ..$ :List of 3
##  $ PrimaryCriteria  :List of 3
##   ..$ CriteriaList        :List of 1
##   ..$ ObservationWindow   :List of 2
##   ..$ PrimaryCriteriaLimit:List of 1
##  $ QualifiedLimit   :List of 1
##   ..$ Type: chr "First"
##  $ ExpressionLimit  :List of 1
##   ..$ Type: chr "First"
##  $ InclusionRules   : list()
##  $ CensoringCriteria: list()
##  $ CollapseSettings :List of 2
##   ..$ CollapseType: chr "ERA"
##   ..$ EraPad      : int 0
##  $ CensorWindow     : Named list()
##  $ cdmVersionRange  : chr ">=5.0.0"

#full listing
chList

## $ConceptSets
## $ConceptSets[[1]]
## $ConceptSets[[1]]$id
## [1] 0
## 
## $ConceptSets[[1]]$name
## [1] "infliximab"
## 
## $ConceptSets[[1]]$expression
## $ConceptSets[[1]]$expression$items
## $ConceptSets[[1]]$expression$items[[1]]
## $ConceptSets[[1]]$expression$items[[1]]$concept
## $ConceptSets[[1]]$expression$items[[1]]$concept$CONCEPT_ID
## [1] 937368
## 
## $ConceptSets[[1]]$expression$items[[1]]$concept$CONCEPT_NAME
## [1] ""
## 
## $ConceptSets[[1]]$expression$items[[1]]$concept$STANDARD_CONCEPT
## [1] ""
## 
## $ConceptSets[[1]]$expression$items[[1]]$concept$STANDARD_CONCEPT_CAPTION
## [1] ""
## 
## $ConceptSets[[1]]$expression$items[[1]]$concept$INVALID_REASON
## [1] ""
## 
## $ConceptSets[[1]]$expression$items[[1]]$concept$INVALID_REASON_CAPTION
## [1] ""
## 
## $ConceptSets[[1]]$expression$items[[1]]$concept$CONCEPT_CODE
## [1] ""
## 
## $ConceptSets[[1]]$expression$items[[1]]$concept$DOMAIN_ID
## [1] ""
## 
## $ConceptSets[[1]]$expression$items[[1]]$concept$VOCABULARY_ID
## [1] ""
## 
## $ConceptSets[[1]]$expression$items[[1]]$concept$CONCEPT_CLASS_ID
## [1] ""
## 
## 
## $ConceptSets[[1]]$expression$items[[1]]$isExcluded
## [1] FALSE
## 
## $ConceptSets[[1]]$expression$items[[1]]$includeDescendants
## [1] TRUE
## 
## $ConceptSets[[1]]$expression$items[[1]]$includeMapped
## [1] FALSE
## 
## 
## 
## 
## 
## 
## $PrimaryCriteria
## $PrimaryCriteria$CriteriaList
## $PrimaryCriteria$CriteriaList[[1]]
## $PrimaryCriteria$CriteriaList[[1]]$DrugExposure
## $PrimaryCriteria$CriteriaList[[1]]$DrugExposure$CodesetId
## [1] 0
## 
## 
## 
## 
## $PrimaryCriteria$ObservationWindow
## $PrimaryCriteria$ObservationWindow$PriorDays
## [1] 0
## 
## $PrimaryCriteria$ObservationWindow$PostDays
## [1] 0
## 
## 
## $PrimaryCriteria$PrimaryCriteriaLimit
## $PrimaryCriteria$PrimaryCriteriaLimit$Type
## [1] "First"
## 
## 
## 
## $QualifiedLimit
## $QualifiedLimit$Type
## [1] "First"
## 
## 
## $ExpressionLimit
## $ExpressionLimit$Type
## [1] "First"
## 
## 
## $InclusionRules
## list()
## 
## $CensoringCriteria
## list()
## 
## $CollapseSettings
## $CollapseSettings$CollapseType
## [1] "ERA"
## 
## $CollapseSettings$EraPad
## [1] 0
## 
## 
## $CensorWindow
## named list()
## 
## $cdmVersionRange
## [1] ">=5.0.0"

#list as json fully
cat(cohortJson)

## {
##   "ConceptSets": [
##     {
##       "id": 0,
##       "name": "infliximab",
##       "expression": {
##         "items": [
##           {
##             "concept": {
##               "CONCEPT_ID": 937368,
##               "CONCEPT_NAME": "",
##               "STANDARD_CONCEPT": "",
##               "STANDARD_CONCEPT_CAPTION": "",
##               "INVALID_REASON": "",
##               "INVALID_REASON_CAPTION": "",
##               "CONCEPT_CODE": "",
##               "DOMAIN_ID": "",
##               "VOCABULARY_ID": "",
##               "CONCEPT_CLASS_ID": ""
##             },
##             "isExcluded": false,
##             "includeDescendants": true,
##             "includeMapped": false
##           }
##         ]
##       }
##     }
##   ],
##   "PrimaryCriteria": {
##     "CriteriaList": [
##       {
##         "DrugExposure": {
##           "CodesetId": 0
##         }
##       }
##     ],
##     "ObservationWindow": {
##       "PriorDays": 0,
##       "PostDays": 0
##     },
##     "PrimaryCriteriaLimit": {
##       "Type": "First"
##     }
##   },
##   "QualifiedLimit": {
##     "Type": "First"
##   },
##   "ExpressionLimit": {
##     "Type": "First"
##   },
##   "InclusionRules": [],
##   "CensoringCriteria": [],
##   "CollapseSettings": {
##     "CollapseType": "ERA",
##     "EraPad": 0
##   },
##   "CensorWindow": {},
##   "cdmVersionRange": ">=5.0.0"
## }

#same output
#cat(cohortJson2)


library(Capr)
#better definition that uses first defining a conceptset
#and then in the defintiion we in fact need to refer to it twice
drugConceptSet<-cs(descendants(937368),name = "infliximab")   
ch1<-cohort(
  entry = entry(
    drugExposure(drugConceptSet),primaryCriteriaLimit = "All",
    qualifiedLimit = "All"
  ),
  exit = exit(drugExit(drugConceptSet,persistenceWindow = 30))
)

## Loading required namespace: testthat

library(RJSONIO)

#further exploring the error
#RJSONIO::fromJSON(cohortJson2)
library(CirceR)

data(cohortDefinitionJson)
#cat(cohortDefinitionJson)

Executing it

#run the cohort
library(Eunomia)


#giBleedCohortJson <- as.json(giBleedCohort)

#gives error if we use cohortJson but if casted to character and we use cohortJson2 it works
 sql <- CirceR::buildCohortQuery(
   expression = CirceR::cohortExpressionFromJson(cohortJson2),
   options = CirceR::createGenerateOptions(generateStats = FALSE)
 )

# #when using JSON from circeR it works and gives no error
sql <- CirceR::buildCohortQuery(
  expression = CirceR::cohortExpressionFromJson(cohortDefinitionJson),
  options = CirceR::createGenerateOptions(generateStats = FALSE)
)
# cat(sql)

# e1=CirceR::cohortExpressionFromJson(cohortDefinitionJson)
# e1=CirceR::cohortExpressionFromJson(cohortJson2)
# e1
 


connectionDetails <- Eunomia::getEunomiaConnectionDetails()

## attempting to download GiBleed

## attempting to extract and load: ~/vardb/eunomia_df/GiBleed_5.3.zip to: ~/vardb/eunomia_df/GiBleed_5.3.sqlite

#giBleedCohortJson <- as.json(giBleedCohort)

#empty table, we will be adding rows to it with each new cohort
cohortsToCreate <- data.frame(
  cohortId = integer(),
  cohortName = character(),
  sql = character()
)
#adding one cohort to it
cohortsToCreate<- bind_rows(cohortsToCreate, data.frame(cohortId = 1,cohortName = "some name",sql = sql))
cohortsToCreate %>% select(1,2)

##   cohortId cohortName
## 1        1  some name

#install.packages('CohortGenerator')
library(CohortGenerator)

## Loading required package: DatabaseConnector

## 
## Attaching package: 'DatabaseConnector'

## The following objects are masked from 'package:lubridate':
## 
##     day, month, year

## Loading required package: R6

#this creates a set of several tables 
cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable = "my_cohort_table")
#let's see which tables are created
str(cohortTableNames)

## List of 7
##  $ cohortTable               : chr "my_cohort_table"
##  $ cohortSampleTable         : chr "my_cohort_table"
##  $ cohortInclusionTable      : chr "my_cohort_table_inclusion"
##  $ cohortInclusionResultTable: chr "my_cohort_table_inclusion_result"
##  $ cohortInclusionStatsTable : chr "my_cohort_table_inclusion_stats"
##  $ cohortSummaryStatsTable   : chr "my_cohort_table_summary_stats"
##  $ cohortCensorStatsTable    : chr "my_cohort_table_censor_stats"

#now we create them in the database (not just the names)

CohortGenerator::createCohortTables(
  connectionDetails = connectionDetails,
  cohortDatabaseSchema = "main",
  cohortTableNames = cohortTableNames
)

## Connecting using SQLite driver

## Creating cohort tables
## - Created table main.my_cohort_table
## - Created table main.my_cohort_table
## - Created table main.my_cohort_table_inclusion
## - Created table main.my_cohort_table_inclusion_result
## - Created table main.my_cohort_table_inclusion_stats
## - Created table main.my_cohort_table_summary_stats
## - Created table main.my_cohort_table_censor_stats
## Creating cohort tables took 0.03secs

# Generate the cohorts
cohortsGenerated <- CohortGenerator::generateCohortSet(
  connectionDetails = connectionDetails,
  cdmDatabaseSchema = "main",
  cohortDatabaseSchema = "main",
  cohortTableNames = cohortTableNames,
  cohortDefinitionSet = cohortsToCreate
)

## Connecting using SQLite driver

## Initiating cluster consisting only of main thread

## 1/1- Generating cohort: some name (id = 1)

##   |                                                                              |                                                                      |   0%  |                                                                              |===                                                                   |   4%  |                                                                              |=====                                                                 |   8%  |                                                                              |========                                                              |  12%  |                                                                              |===========                                                           |  15%  |                                                                              |=============                                                         |  19%  |                                                                              |================                                                      |  23%  |                                                                              |===================                                                   |  27%  |                                                                              |======================                                                |  31%  |                                                                              |========================                                              |  35%  |                                                                              |===========================                                           |  38%  |                                                                              |==============================                                        |  42%  |                                                                              |================================                                      |  46%  |                                                                              |===================================                                   |  50%  |                                                                              |======================================                                |  54%  |                                                                              |========================================                              |  58%  |                                                                              |===========================================                           |  62%  |                                                                              |==============================================                        |  65%  |                                                                              |================================================                      |  69%  |                                                                              |===================================================                   |  73%  |                                                                              |======================================================                |  77%  |                                                                              |=========================================================             |  81%  |                                                                              |===========================================================           |  85%  |                                                                              |==============================================================        |  88%  |                                                                              |=================================================================     |  92%  |                                                                              |===================================================================   |  96%  |                                                                              |======================================================================| 100%

## Executing SQL took 0.00508 secs

## Generating cohort set took 0.03 secs

# Get the cohort counts
cohortCounts <- CohortGenerator::getCohortCounts(
  connectionDetails = connectionDetails,
  cohortDatabaseSchema = "main",
  cohortTable = cohortTableNames$cohortTable
)

## Connecting using SQLite driver
## Counting cohorts took 0.0203 secs

cohortCounts

## [1] cohortId       cohortEntries  cohortSubjects
## <0 rows> (or 0-length row.names)

library(Capr)

GIBleed <- cs(descendants(192671), name = "GIbleed")
GIBleed

## ── <Capr Concept Set> GIbleed ──────────────────────────────────────────────────
## # A tibble: 1 × 9
##   conceptId conceptCode conceptName domainId vocabularyId standardConcept
##       <int> <chr>       <chr>       <chr>    <chr>        <chr>          
## 1    192671 ""          ""          ""       ""           ""             
## # ℹ 3 more variables: includeDescendants <lgl>, isExcluded <lgl>,
## #   includeMapped <lgl>

giBleedCohort <- cohort(
  entry = entry(
    conditionOccurrence(GIBleed),
    observationWindow = continuousObservation(0L, 0L),
    primaryCriteriaLimit = "First"
  ),
  exit = exit(
    endStrategy = observationExit()
  )
)

giBleedCohort

## Formal class 'Cohort' [package "Capr"] with 4 slots
##   ..@ entry    :Formal class 'CohortEntry' [package "Capr"] with 5 slots
##   ..@ attrition:Formal class 'CohortAttrition' [package "Capr"] with 2 slots
##   ..@ exit     :Formal class 'CohortExit' [package "Capr"] with 2 slots
##   ..@ era      :Formal class 'CohortEra' [package "Capr"] with 3 slots

#connectionDetails <- Eunomia::getEunomiaConnectionDetails()

giBleedCohortJson <- giBleedCohort |>  toCirce() |>  jsonlite::toJSON(pretty = TRUE, auto_unbox = TRUE) |>  as.character()

#giBleedCohortJson <- as.json(giBleedCohort) %>% as.character()

sql <- CirceR::buildCohortQuery(
  expression = CirceR::cohortExpressionFromJson(giBleedCohortJson),
  options = CirceR::createGenerateOptions(generateStats = FALSE)
)

cohortsToCreate<- bind_rows(cohortsToCreate, tibble::tibble(cohortId = 2,cohortName = "GI Bleed",sql = sql))



csVisit <- cs(descendants(9202), name = "outpatient visit")
csVisit

## ── <Capr Concept Set> outpatient visit ─────────────────────────────────────────
## # A tibble: 1 × 9
##   conceptId conceptCode conceptName domainId vocabularyId standardConcept
##       <int> <chr>       <chr>       <chr>    <chr>        <chr>          
## 1      9202 ""          ""          ""       ""           ""             
## # ℹ 3 more variables: includeDescendants <lgl>, isExcluded <lgl>,
## #   includeMapped <lgl>

ch2 <- cohort(
  entry = entry(
    visit(csVisit)
    ),
  exit = exit(
    endStrategy = observationExit()
  )
)

cohortsToCreate<- bind_rows(cohortsToCreate, tibble::tibble(cohortId = 3,cohortName = "visit"
  ,sql = CirceR::buildCohortQuery(
    expression = CirceR::cohortExpressionFromJson(ch2 |>toCirce()|>jsonlite::toJSON(pretty = TRUE, auto_unbox = TRUE) |>  as.character()),
  options = CirceR::createGenerateOptions(generateStats = FALSE)
)))






cohortsToCreate %>% select(1,2)

##   cohortId cohortName
## 1        1  some name
## 2        2   GI Bleed
## 3        3      visit

cohortTableNames <- CohortGenerator::getCohortTableNames(cohortTable = "my_cohort_table")
CohortGenerator::createCohortTables(connectionDetails = connectionDetails,cohortDatabaseSchema = "main",cohortTableNames= cohortTableNames)

## Connecting using SQLite driver
## Creating cohort tables
## - Created table main.my_cohort_table
## - Created table main.my_cohort_table
## - Created table main.my_cohort_table_inclusion
## - Created table main.my_cohort_table_inclusion_result
## - Created table main.my_cohort_table_inclusion_stats
## - Created table main.my_cohort_table_summary_stats
## - Created table main.my_cohort_table_censor_stats
## Creating cohort tables took 0.01secs

# Generate the cohorts
cohortsGenerated <- CohortGenerator::generateCohortSet(
  connectionDetails = connectionDetails,
  cdmDatabaseSchema = "main",
  cohortDatabaseSchema = "main",
  cohortTableNames = cohortTableNames,
  cohortDefinitionSet = cohortsToCreate
)

## Connecting using SQLite driver

## Initiating cluster consisting only of main thread

## 1/3- Generating cohort: some name (id = 1)

##   |                                                                              |                                                                      |   0%  |                                                                              |===                                                                   |   4%  |                                                                              |=====                                                                 |   8%  |                                                                              |========                                                              |  12%  |                                                                              |===========                                                           |  15%  |                                                                              |=============                                                         |  19%  |                                                                              |================                                                      |  23%  |                                                                              |===================                                                   |  27%  |                                                                              |======================                                                |  31%  |                                                                              |========================                                              |  35%  |                                                                              |===========================                                           |  38%  |                                                                              |==============================                                        |  42%  |                                                                              |================================                                      |  46%  |                                                                              |===================================                                   |  50%  |                                                                              |======================================                                |  54%  |                                                                              |========================================                              |  58%  |                                                                              |===========================================                           |  62%  |                                                                              |==============================================                        |  65%  |                                                                              |================================================                      |  69%  |                                                                              |===================================================                   |  73%  |                                                                              |======================================================                |  77%  |                                                                              |=========================================================             |  81%  |                                                                              |===========================================================           |  85%  |                                                                              |==============================================================        |  88%  |                                                                              |=================================================================     |  92%  |                                                                              |===================================================================   |  96%  |                                                                              |======================================================================| 100%

## Executing SQL took 0.00546 secs
## 2/3- Generating cohort: GI Bleed (id = 2)

##   |                                                                              |                                                                      |   0%  |                                                                              |===                                                                   |   4%  |                                                                              |=====                                                                 |   8%  |                                                                              |========                                                              |  12%  |                                                                              |===========                                                           |  15%  |                                                                              |=============                                                         |  19%  |                                                                              |================                                                      |  23%  |                                                                              |===================                                                   |  27%  |                                                                              |======================                                                |  31%  |                                                                              |========================                                              |  35%  |                                                                              |===========================                                           |  38%  |                                                                              |==============================                                        |  42%  |                                                                              |================================                                      |  46%  |                                                                              |===================================                                   |  50%  |                                                                              |======================================                                |  54%  |                                                                              |========================================                              |  58%  |                                                                              |===========================================                           |  62%  |                                                                              |==============================================                        |  65%  |                                                                              |================================================                      |  69%  |                                                                              |===================================================                   |  73%  |                                                                              |======================================================                |  77%  |                                                                              |=========================================================             |  81%  |                                                                              |===========================================================           |  85%  |                                                                              |==============================================================        |  88%  |                                                                              |=================================================================     |  92%  |                                                                              |===================================================================   |  96%  |                                                                              |======================================================================| 100%

## Executing SQL took 0.0124 secs
## 3/3- Generating cohort: visit (id = 3)

##   |                                                                              |                                                                      |   0%  |                                                                              |===                                                                   |   4%  |                                                                              |=====                                                                 |   8%  |                                                                              |========                                                              |  12%  |                                                                              |===========                                                           |  15%  |                                                                              |=============                                                         |  19%  |                                                                              |================                                                      |  23%  |                                                                              |===================                                                   |  27%  |                                                                              |======================                                                |  31%  |                                                                              |========================                                              |  35%  |                                                                              |===========================                                           |  38%  |                                                                              |==============================                                        |  42%  |                                                                              |================================                                      |  46%  |                                                                              |===================================                                   |  50%  |                                                                              |======================================                                |  54%  |                                                                              |========================================                              |  58%  |                                                                              |===========================================                           |  62%  |                                                                              |==============================================                        |  65%  |                                                                              |================================================                      |  69%  |                                                                              |===================================================                   |  73%  |                                                                              |======================================================                |  77%  |                                                                              |=========================================================             |  81%  |                                                                              |===========================================================           |  85%  |                                                                              |==============================================================        |  88%  |                                                                              |=================================================================     |  92%  |                                                                              |===================================================================   |  96%  |                                                                              |======================================================================| 100%

## Executing SQL took 0.00587 secs

## Generating cohort set took 0.07 secs

# Get the cohort counts
cohortCounts <- CohortGenerator::getCohortCounts(
  connectionDetails = connectionDetails,
  cohortDatabaseSchema = "main",
  cohortTable = cohortTableNames$cohortTable
)

## Connecting using SQLite driver
## Counting cohorts took 0.00849 secs

cohortCounts

##   cohortId cohortEntries cohortSubjects
## 1        2           479            479

#more examples

Relevant forum posts

https://forums.ohdsi.org/t/capr-error-and-phenotype-feb-idea-cohorts-by-code-may-be-faster/23268

https://forums.ohdsi.org/t/clarification-on-entry-criteria-for-cohort-definition-in-capr/23783

Eli’s 2025 examples

https://github.com/OHDSInflammation/StudyPackage/blob/main/R/support/CapRCohort.R#L20

2022 code

https://github.com/ohdsi-studies/PhenotypePhebruaryCapr/blob/main/results/day3/R/afibCapr.R

https://github.com/ohdsi-studies/PhenotypePhebruaryCapr/blob/main/results/day1/R/Type%202%20diabetes%20mellitus.R

Concluding Remarks

For more information on sub-components of a cohort definition via circe-be, users should watch at

https://www.youtube.com/@chrisknoll2007 created by Chris Knoll outlining these ideas. while these videos utilize ATLAS, Capr follows the same principles.