#--specialty paper - OMOP code, to be executed on OMOPed Medicare
library(DatabaseConnector)
#install.packages('Eunomia')
library(Eunomia)
## Warning: package 'Eunomia' was built under R version 4.0.5
connectionDetails <- getEunomiaConnectionDetails()
connection <- connect(connectionDetails)
## Connecting using SQLite driver
#querySql(connection, "SELECT COUNT(*) FROM person;")
# COUNT(*)
#1 2694
t=getTableNames(connection,databaseSchema = 'main')
#disconnect(connection)
library(SqlRender)
p=querySql(connection, "SELECT * FROM provider;")
names(p)
## [1] "PROVIDER_ID" "PROVIDER_NAME"
## [3] "NPI" "DEA"
## [5] "SPECIALTY_CONCEPT_ID" "CARE_SITE_ID"
## [7] "YEAR_OF_BIRTH" "GENDER_CONCEPT_ID"
## [9] "PROVIDER_SOURCE_VALUE" "SPECIALTY_SOURCE_VALUE"
## [11] "SPECIALTY_SOURCE_CONCEPT_ID" "GENDER_SOURCE_VALUE"
## [13] "GENDER_SOURCE_CONCEPT_ID"
co=querySql(connection, "SELECT * FROM condition_occurrence limit 10;")
names(co)
## [1] "CONDITION_OCCURRENCE_ID" "PERSON_ID"
## [3] "CONDITION_CONCEPT_ID" "CONDITION_START_DATE"
## [5] "CONDITION_START_DATETIME" "CONDITION_END_DATE"
## [7] "CONDITION_END_DATETIME" "CONDITION_TYPE_CONCEPT_ID"
## [9] "STOP_REASON" "PROVIDER_ID"
## [11] "VISIT_OCCURRENCE_ID" "VISIT_DETAIL_ID"
## [13] "CONDITION_SOURCE_VALUE" "CONDITION_SOURCE_CONCEPT_ID"
## [15] "CONDITION_STATUS_SOURCE_VALUE" "CONDITION_STATUS_CONCEPT_ID"
c=querySql(connection, "SELECT * from concept")
vo=querySql(connection, "SELECT * from visit_occurrence limit 100000000")
# querySql(connection, "SELECT co.condition_concept_id,p.specialty_concept_id
# from condition_occurrence co
# join provider p on co.provider_id=p.provider_id
# join visit_occurrence vo on co.visit_occurrence_id = vo.visit_occurence_id
# limit 1;")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.0 v dplyr 1.0.5
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#co %>% count(CONDITION_CONCEPT_ID) %>% rename(CONCEPT_ID=CONDITION_CONCEPT_ID) %>% left_join(c)
#only outpatient
# querySql(connection, "
# SELECT co.condition_concept_id,p.specialty_concept_id
# from condition_occurrence co join provider p on co.provider_id=p.provider_id
# -- where co.
# limit 10;")
# querySql(connection, "SELECT co.condition_concept_id,p.specialty_concept_id
# from condition_occurrence co
# join provider p on co.provider_id=p.provider_id
# join visit_occurrence vo on co.visit_occurrence_id = vo.visit_occurrence_id
# limit 1;")
#two version are made, for with nice OMOP columns and one with source value columns (made look like nice)
one_cid<-"SELECT co.condition_concept_id,p.specialty_concept_id \n from condition_occurrence co join provider p on co.provider_id=p.provider_id join visit_occurrence vo on co.visit_occurrence_id = vo.visit_occurrence_id \n where vo.visit_concept_id=9202 "
#cat(one_cid)
names(co)
## [1] "CONDITION_OCCURRENCE_ID" "PERSON_ID"
## [3] "CONDITION_CONCEPT_ID" "CONDITION_START_DATE"
## [5] "CONDITION_START_DATETIME" "CONDITION_END_DATE"
## [7] "CONDITION_END_DATETIME" "CONDITION_TYPE_CONCEPT_ID"
## [9] "STOP_REASON" "PROVIDER_ID"
## [11] "VISIT_OCCURRENCE_ID" "VISIT_DETAIL_ID"
## [13] "CONDITION_SOURCE_VALUE" "CONDITION_SOURCE_CONCEPT_ID"
## [15] "CONDITION_STATUS_SOURCE_VALUE" "CONDITION_STATUS_CONCEPT_ID"
names(p)
## [1] "PROVIDER_ID" "PROVIDER_NAME"
## [3] "NPI" "DEA"
## [5] "SPECIALTY_CONCEPT_ID" "CARE_SITE_ID"
## [7] "YEAR_OF_BIRTH" "GENDER_CONCEPT_ID"
## [9] "PROVIDER_SOURCE_VALUE" "SPECIALTY_SOURCE_VALUE"
## [11] "SPECIALTY_SOURCE_CONCEPT_ID" "GENDER_SOURCE_VALUE"
## [13] "GENDER_SOURCE_CONCEPT_ID"
one_sv<-"SELECT co.condition_source_value as condition_concept_id,p.specialty_source_value as specialty_concept_id \n from condition_occurrence co \n join provider p on co.provider_id=p.provider_id join visit_occurrence vo on co.visit_occurrence_id = vo.visit_occurrence_id \n where vo.visit_concept_id=9202 "
#you make the switch below
#one=one_cid
one=one_sv
#ba=querySql(connection, one)
#vo %>% count(VISIT_CONCEPT_ID) %>% rename(CONCEPT_ID=VISIT_CONCEPT_ID) %>% left_join(c)
names(c)
## [1] "CONCEPT_ID" "CONCEPT_NAME" "DOMAIN_ID" "VOCABULARY_ID"
## [5] "CONCEPT_CLASS_ID" "STANDARD_CONCEPT" "CONCEPT_CODE" "VALID_START_DATE"
## [9] "VALID_END_DATE" "INVALID_REASON"
#KEY CODE HERE TO DETERMINE THE SPANS
#use chunk option results = 'asis'
#specialty span of a dx
# glue::glue("select distinct condition_concept_id, specialty_concept_id FROM
# ({one}) one")
#put two spaces before new line to format it nice
span_dx=glue::glue("select condition_concept_id, count (*) as cnt from
(select distinct condition_concept_id, specialty_concept_id FROM
({one}) one
) two
group by condition_concept_id")
cat(span_dx)
select condition_concept_id, count (*) as cnt from
(select distinct condition_concept_id, specialty_concept_id FROM
(SELECT co.condition_source_value as condition_concept_id,p.specialty_source_value as specialty_concept_id
from condition_occurrence co
join provider p on co.provider_id=p.provider_id join visit_occurrence vo on co.visit_occurrence_id = vo.visit_occurrence_id
where vo.visit_concept_id=9202 ) one
) two
group by condition_concept_id
r1=querySql(connection,span_dx)
nrow(r1)
[1] 0
#diagnostic span of specialty (sx)
span_sx=glue::glue("select specialty_concept_id, count(*) as cnt from
(select distinct condition_concept_id, specialty_concept_id FROM
({one}) one
) two
group by specialty_concept_id")
cat(span_sx)
select specialty_concept_id, count(*) as cnt from
(select distinct condition_concept_id, specialty_concept_id FROM
(SELECT co.condition_source_value as condition_concept_id,p.specialty_source_value as specialty_concept_id
from condition_occurrence co
join provider p on co.provider_id=p.provider_id join visit_occurrence vo on co.visit_occurrence_id = vo.visit_occurrence_id
where vo.visit_concept_id=9202 ) one
) two
group by specialty_concept_id
#writeLines(span_sx)
r2=querySql(connection,span_dx)
nrow(r2)
[1] 0
#sx fingerprint (no age)
#glue is respecting new lines, no need to slash n
sxf_cid<-glue::glue("SELECT p.specialty_concept_id
from visit_occurrence vo join provider p on vo.provider_id=p.provider_id
where vo.visit_concept_id=9202 ")
names(p)
[1] “PROVIDER_ID” “PROVIDER_NAME”
[3] “NPI” “DEA”
[5] “SPECIALTY_CONCEPT_ID” “CARE_SITE_ID”
[7] “YEAR_OF_BIRTH” “GENDER_CONCEPT_ID”
[9] “PROVIDER_SOURCE_VALUE” “SPECIALTY_SOURCE_VALUE”
[11] “SPECIALTY_SOURCE_CONCEPT_ID” “GENDER_SOURCE_VALUE”
[13] “GENDER_SOURCE_CONCEPT_ID”
sxf_sv<-glue::glue("SELECT p.specialty_source_value as p.specialty_concept_id
from visit_occurrence vo join provider p on vo.provider_id=p.provider_id
where vo.visit_concept_id=9202")
sxf=sxf_cid
sxf2=glue::glue("select specialty_concept_id, count(*)
from (
{sxf}
) one
group by specialty_concept_id
")
cat(sxf2)
select specialty_concept_id, count(*) from ( SELECT p.specialty_concept_id
from visit_occurrence vo join provider p on vo.provider_id=p.provider_id where vo.visit_concept_id=9202
) one group by specialty_concept_id
r2=querySql(connection,sxf2)
nrow(r2)
[1] 0
#301 analysis is simpler fingerprint on provider table (325 is the same for specialty_source_concept_id)
disconnect(connection)