Specialty and diagnosis

#--specialty paper - OMOP code, to be executed on OMOPed Medicare
library(DatabaseConnector)


#install.packages('Eunomia')
library(Eunomia)
## Warning: package 'Eunomia' was built under R version 4.0.5
connectionDetails <- getEunomiaConnectionDetails()
connection <- connect(connectionDetails)
## Connecting using SQLite driver
#querySql(connection, "SELECT COUNT(*) FROM person;")
#  COUNT(*)
#1     2694

t=getTableNames(connection,databaseSchema = 'main')
#disconnect(connection)
library(SqlRender)

p=querySql(connection, "SELECT * FROM provider;")
names(p)
##  [1] "PROVIDER_ID"                 "PROVIDER_NAME"              
##  [3] "NPI"                         "DEA"                        
##  [5] "SPECIALTY_CONCEPT_ID"        "CARE_SITE_ID"               
##  [7] "YEAR_OF_BIRTH"               "GENDER_CONCEPT_ID"          
##  [9] "PROVIDER_SOURCE_VALUE"       "SPECIALTY_SOURCE_VALUE"     
## [11] "SPECIALTY_SOURCE_CONCEPT_ID" "GENDER_SOURCE_VALUE"        
## [13] "GENDER_SOURCE_CONCEPT_ID"
co=querySql(connection, "SELECT * FROM condition_occurrence limit 10;")
names(co)
##  [1] "CONDITION_OCCURRENCE_ID"       "PERSON_ID"                    
##  [3] "CONDITION_CONCEPT_ID"          "CONDITION_START_DATE"         
##  [5] "CONDITION_START_DATETIME"      "CONDITION_END_DATE"           
##  [7] "CONDITION_END_DATETIME"        "CONDITION_TYPE_CONCEPT_ID"    
##  [9] "STOP_REASON"                   "PROVIDER_ID"                  
## [11] "VISIT_OCCURRENCE_ID"           "VISIT_DETAIL_ID"              
## [13] "CONDITION_SOURCE_VALUE"        "CONDITION_SOURCE_CONCEPT_ID"  
## [15] "CONDITION_STATUS_SOURCE_VALUE" "CONDITION_STATUS_CONCEPT_ID"
c=querySql(connection, "SELECT * from concept") 

vo=querySql(connection, "SELECT * from visit_occurrence limit 100000000") 

# querySql(connection, "SELECT co.condition_concept_id,p.specialty_concept_id 
# from condition_occurrence co 
# join provider p  on co.provider_id=p.provider_id 
# join visit_occurrence vo on co.visit_occurrence_id = vo.visit_occurence_id
#          limit 1;")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v dplyr   1.0.5
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
#co %>% count(CONDITION_CONCEPT_ID) %>% rename(CONCEPT_ID=CONDITION_CONCEPT_ID) %>%  left_join(c)
#only outpatient
# querySql(connection, "
# SELECT co.condition_concept_id,p.specialty_concept_id 
# from condition_occurrence co join provider p  on co.provider_id=p.provider_id 
# --  where co.
#   limit 10;")


# querySql(connection, "SELECT co.condition_concept_id,p.specialty_concept_id 
# from condition_occurrence co 
# join provider p  on co.provider_id=p.provider_id 
# join visit_occurrence vo on co.visit_occurrence_id = vo.visit_occurrence_id
#          limit 1;")




#two version are made, for with nice OMOP columns and one with source value columns (made look like nice)

one_cid<-"SELECT co.condition_concept_id,p.specialty_concept_id   \n from condition_occurrence co join provider p  on co.provider_id=p.provider_id join visit_occurrence vo on co.visit_occurrence_id = vo.visit_occurrence_id   \n where vo.visit_concept_id=9202 "
#cat(one_cid)
names(co)
##  [1] "CONDITION_OCCURRENCE_ID"       "PERSON_ID"                    
##  [3] "CONDITION_CONCEPT_ID"          "CONDITION_START_DATE"         
##  [5] "CONDITION_START_DATETIME"      "CONDITION_END_DATE"           
##  [7] "CONDITION_END_DATETIME"        "CONDITION_TYPE_CONCEPT_ID"    
##  [9] "STOP_REASON"                   "PROVIDER_ID"                  
## [11] "VISIT_OCCURRENCE_ID"           "VISIT_DETAIL_ID"              
## [13] "CONDITION_SOURCE_VALUE"        "CONDITION_SOURCE_CONCEPT_ID"  
## [15] "CONDITION_STATUS_SOURCE_VALUE" "CONDITION_STATUS_CONCEPT_ID"
names(p)
##  [1] "PROVIDER_ID"                 "PROVIDER_NAME"              
##  [3] "NPI"                         "DEA"                        
##  [5] "SPECIALTY_CONCEPT_ID"        "CARE_SITE_ID"               
##  [7] "YEAR_OF_BIRTH"               "GENDER_CONCEPT_ID"          
##  [9] "PROVIDER_SOURCE_VALUE"       "SPECIALTY_SOURCE_VALUE"     
## [11] "SPECIALTY_SOURCE_CONCEPT_ID" "GENDER_SOURCE_VALUE"        
## [13] "GENDER_SOURCE_CONCEPT_ID"
one_sv<-"SELECT  co.condition_source_value as condition_concept_id,p.specialty_source_value as specialty_concept_id  \n from condition_occurrence co   \n join provider p  on co.provider_id=p.provider_id join visit_occurrence vo on co.visit_occurrence_id = vo.visit_occurrence_id   \n where vo.visit_concept_id=9202  "


#you make the switch below
#one=one_cid
one=one_sv


#ba=querySql(connection, one)
#vo %>% count(VISIT_CONCEPT_ID) %>% rename(CONCEPT_ID=VISIT_CONCEPT_ID) %>%  left_join(c)
names(c)
##  [1] "CONCEPT_ID"       "CONCEPT_NAME"     "DOMAIN_ID"        "VOCABULARY_ID"   
##  [5] "CONCEPT_CLASS_ID" "STANDARD_CONCEPT" "CONCEPT_CODE"     "VALID_START_DATE"
##  [9] "VALID_END_DATE"   "INVALID_REASON"
#KEY CODE HERE TO DETERMINE THE SPANS

Spans

#use chunk option results = 'asis'

#specialty span of a dx
# glue::glue("select distinct condition_concept_id, specialty_concept_id FROM 
#            ({one}) one")


#put two spaces before new line to format it nice
span_dx=glue::glue("select condition_concept_id, count (*) as cnt from   
(select distinct condition_concept_id, specialty_concept_id FROM   
           ({one}) one  
) two  
group by condition_concept_id")

cat(span_dx)

select condition_concept_id, count (*) as cnt from
(select distinct condition_concept_id, specialty_concept_id FROM
(SELECT co.condition_source_value as condition_concept_id,p.specialty_source_value as specialty_concept_id
from condition_occurrence co
join provider p on co.provider_id=p.provider_id join visit_occurrence vo on co.visit_occurrence_id = vo.visit_occurrence_id
where vo.visit_concept_id=9202 ) one
) two
group by condition_concept_id

r1=querySql(connection,span_dx)
nrow(r1)

[1] 0

#diagnostic span of specialty (sx)

span_sx=glue::glue("select specialty_concept_id, count(*) as cnt from   
(select distinct condition_concept_id, specialty_concept_id FROM   
           ({one}) one  
) two  
group by specialty_concept_id")

cat(span_sx)

select specialty_concept_id, count(*) as cnt from
(select distinct condition_concept_id, specialty_concept_id FROM
(SELECT co.condition_source_value as condition_concept_id,p.specialty_source_value as specialty_concept_id
from condition_occurrence co
join provider p on co.provider_id=p.provider_id join visit_occurrence vo on co.visit_occurrence_id = vo.visit_occurrence_id
where vo.visit_concept_id=9202 ) one
) two
group by specialty_concept_id

#writeLines(span_sx)
r2=querySql(connection,span_dx)
nrow(r2)

[1] 0

#sx fingerprint (no age)
#glue is respecting new lines, no need to slash n
sxf_cid<-glue::glue("SELECT p.specialty_concept_id   
                    from visit_occurrence vo  join provider p  on vo.provider_id=p.provider_id 
                    where vo.visit_concept_id=9202  ")
names(p)

[1] “PROVIDER_ID” “PROVIDER_NAME”
[3] “NPI” “DEA”
[5] “SPECIALTY_CONCEPT_ID” “CARE_SITE_ID”
[7] “YEAR_OF_BIRTH” “GENDER_CONCEPT_ID”
[9] “PROVIDER_SOURCE_VALUE” “SPECIALTY_SOURCE_VALUE”
[11] “SPECIALTY_SOURCE_CONCEPT_ID” “GENDER_SOURCE_VALUE”
[13] “GENDER_SOURCE_CONCEPT_ID”

sxf_sv<-glue::glue("SELECT p.specialty_source_value as p.specialty_concept_id 
                   from visit_occurrence vo  join provider p  on vo.provider_id=p.provider_id
                   where vo.visit_concept_id=9202")
sxf=sxf_cid

sxf2=glue::glue("select specialty_concept_id, count(*) 
from (
      {sxf}
      ) one
group by specialty_concept_id
                ")
cat(sxf2)

select specialty_concept_id, count(*) from ( SELECT p.specialty_concept_id
from visit_occurrence vo join provider p on vo.provider_id=p.provider_id where vo.visit_concept_id=9202
) one group by specialty_concept_id

r2=querySql(connection,sxf2)
nrow(r2)

[1] 0

#301 analysis is simpler fingerprint on provider table (325 is the same for specialty_source_concept_id)

disconnect(connection)