Intro

Some data below are subject to copyright by American Medical Association. Their website states that Permission is granted to any individual to copy and use this material as long as the copyright statement is included, the contents are not changed, and the copies are not sold or licensed. We display the copyright in this Intro. We made no changes to the data.

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.1.0     v dplyr   1.0.5
## v tidyr   1.1.3     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(DT)
#crosswalk
url='https://data.cms.gov/api/views/j75i-rw8y/rows.csv?accessType=DOWNLOAD'
d=read_csv(url)
## 
## -- Column specification --------------------------------------------------------
## cols(
##   `MEDICARE SPECIALTY CODE` = col_character(),
##   `MEDICARE PROVIDER/SUPPLIER TYPE DESCRIPTION` = col_character(),
##   `PROVIDER TAXONOMY CODE` = col_character(),
##   `PROVIDER TAXONOMY DESCRIPTION` = col_character()
## )
d = d %>% rename_all(make.names)
head(d)
## # A tibble: 6 x 4
##   MEDICARE.SPECIAL~ MEDICARE.PROVIDER.~ PROVIDER.TAXONOM~ PROVIDER.TAXONOMY.DES~
##   <chr>             <chr>               <chr>             <chr>                 
## 1 01                Physician/General ~ 208D00000X        Allopathic & Osteopat~
## 2 02                Physician/General ~ 208600000X        Allopathic & Osteopat~
## 3 02                Physician/General ~ 2086H0002X        Allopathic & Osteopat~
## 4 02                Physician/General ~ 2086S0120X        Allopathic & Osteopat~
## 5 02                Physician/General ~ 2086S0122X        Allopathic & Osteopat~
## 6 02                Physician/General ~ 2086S0105X        Allopathic & Osteopat~
names(d)
## [1] "MEDICARE.SPECIALTY.CODE"                    
## [2] "MEDICARE.PROVIDER.SUPPLIER.TYPE.DESCRIPTION"
## [3] "PROVIDER.TAXONOMY.CODE"                     
## [4] "PROVIDER.TAXONOMY.DESCRIPTION"
nrow(d)
## [1] 568
dc=d$PROVIDER.TAXONOMY.CODE %>% unique()

#hptc (has 3 levels)
url2='https://nucc.org/images/stories/CSV/nucc_taxonomy_210.csv'
h=read_csv(url2)
## 
## -- Column specification --------------------------------------------------------
## cols(
##   Code = col_character(),
##   Grouping = col_character(),
##   Classification = col_character(),
##   Specialization = col_character(),
##   Definition = col_character(),
##   `Effective Date` = col_character(),
##   `Deactivation Date` = col_character(),
##   `Last Modified Date` = col_character(),
##   Notes = col_character(),
##   `Display Name` = col_character()
## )
h = h %>% rename_all(make.names)
hc = h$Code %>% unique()
names(h)
##  [1] "Code"               "Grouping"           "Classification"    
##  [4] "Specialization"     "Definition"         "Effective.Date"    
##  [7] "Deactivation.Date"  "Last.Modified.Date" "Notes"             
## [10] "Display.Name"
length(dc)
## [1] 450
length(hc)
## [1] 865
a=d %>% count(MEDICARE.SPECIALTY.CODE,MEDICARE.PROVIDER.SUPPLIER.TYPE.DESCRIPTION)
#568 HPTC are simplified to fewer specialty codes
#note, data is provided with footnotes and footnotes are not true data rows
nrow(a)
## [1] 166
a %>% arrange(desc(MEDICARE.SPECIALTY.CODE)) %>% datatable()
b= d %>% count(PROVIDER.TAXONOMY.CODE,PROVIDER.TAXONOMY.DESCRIPTION)
b %>% datatable()
h2=h %>% select(Classification,Specialization) %>% distinct()
h2 %>% datatable()
h3=h2 %>% group_by(Classification) %>% summarise(spec=paste(Specialization,collapse =  '|'))
h3 %>% datatable()
#does not result in good list