This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

#getting some packages
#install.packages("readxl")
library(readxl)
#library(xlsx)
#detach(package:readxl)
#devtools::install_github("hadley/readxl")

#getting the file
url='http://dashboard.healthit.gov/data/data/MU_REPORT.xlsx'
#a tempororary file; it has to have the right extension
file=tempfile(fileext = '.xlsx')
#to download the file, we have to use the right mode (otherwise the import will fail)
download.file(url,destfile = file,mode="wb")
#just checking what is the path to the file
file
## [1] "C:\\Users\\huserv\\AppData\\Local\\Temp\\1\\Rtmpw7mPFZ\\file23d8702b4700.xlsx"
#reading the actual data into data.frame
d=read_excel(file,sheet = 1)
#d=read.xlsx(file,sheetIndex = 1)

Now we have the data in variable d

#some quick lookups
nrow(d)
## [1] 1028716
table(d$`Provider Stage Number`)
## 
## Stage 1 Stage 2 
##  934142   94574
table(d$`Provider Type`)
## 
##       EP Hospital 
##   965358    63358
table(d$`Product Setting`)
## 
## Ambulatory  Inpatient 
##     932268      96448
#what vendors there are
vendors=as.data.frame(table(d$`Vendor Name`))
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(magrittr)
vendors %<>%  arrange(desc(Freq))

#all vendor names (too long to output, commented out)
#unique(d$`Vendor Name`)
#http://stackoverflow.com/questions/3744178/ggplot2-sorting-a-plot
#plotting vendors
library(ggplot2)
#we have to order it for ggplot correctly
vendors <- transform(vendors, Var1=reorder(Var1, -Freq) ) 
ggplot(head(vendors,30),aes(y=Freq,x=Var1))+geom_point()+ geom_point() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

#better way (more complex graph)
tta<-d %>% group_by(`Provider Type`,`Vendor Name`,`Attestation Year`) %>% summarise(count = n()) %>% arrange(-count) %>% filter(count>800) %>% rename(pt=`Provider Type`,year=`Attestation Year`)

tta
## Source: local data frame [142 x 4]
## Groups: pt, Vendor Name
## 
##    pt                Vendor Name year count
## 1  EP                ACOM Health 2014   937
## 2  EP Acumen Physician Solutions 2014   955
## 3  EP                AllMeds Inc 2014  1340
## 4  EP                 Allscripts 2014 38033
## 5  EP                 Allscripts 2015 21387
## 6  EP                 Allscripts 2013 19476
## 7  EP                 Allscripts 2012 17215
## 8  EP                 Allscripts 2011  3732
## 9  EP        Altos Solutions Inc 2014   925
## 10 EP      AmazingCharts.com Inc 2014  1693
## .. ..                        ...  ...   ...
ggplot(tta,aes(y=count,x=`Vendor Name`))+geom_point()+ geom_point() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

#color
ggplot(tta,aes(y=count,x=`Vendor Name`,color=pt))+geom_point()+ geom_point() + theme(axis.text.x = element_text(angle = 90, hjust = 1))

#facet
#backticks do not work for faceitn
p<-ggplot(tta,aes(y=count,x=`Vendor Name`)) +geom_point()       + geom_point() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 

p<-ggplot(tta,aes(y=count,x=`Vendor Name`)) +geom_point()       + geom_point() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 

#p +facet_wrap(~pt )
#make the x axis different for each facet
p +facet_wrap(~pt,scales='free' )

#not so good
p + facet_grid(year~pt, scales = 'free')

#better for year will be color
p<-ggplot(tta,aes(y=count,x=`Vendor Name`,color=year)) +geom_point()       + geom_point() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) 

p +facet_wrap(~pt,scales='free' )

#dplyr  is a good data science library to use
library(dplyr)
#looking at one vendor data
d %>% filter(`Vendor Name`=='Allscripts')
## Source: local data frame [101,205 x 21]
## 
##    EHR Certification Number Vendor Name EHR Product CHP Id
## 1           A014E01O5FAMEAN  Allscripts         CHP-021796
## 2           30000004TQGMEAQ  Allscripts         CHP-008032
## 3           30000004TQGMEAQ  Allscripts         CHP-008032
## 4           1314E01P3YKUEAB  Allscripts         CHP-021796
## 5           A000001DYO2NEAT  Allscripts         CHP-009573
## 6           A000001DYO2NEAT  Allscripts         CHP-009573
## 7           30000001SVGHEAS  Allscripts         CHP-007427
## 8           1314E01OUSICEAV  Allscripts         CHP-023059
## 9           30000004QR1ZEAS  Allscripts         CHP-006453
## 10          30000004QR1ZEAS  Allscripts         CHP-006453
## ..                      ...         ...                ...
## Variables not shown: EHR Product Name (chr), EHR Product Version (chr),
##   Product Classification (chr), Product Setting (chr), Product
##   Certification Edition Yr (dbl), Attestation Month (dbl), Attestation
##   Year (dbl), Business State/Territory (chr), Provider Type (chr),
##   Specialty (chr), Program Year (dbl), Payment Year (dbl), Provider Stage
##   Number (chr), Program Type (chr), Stage 2 Scheduled 2014 (dbl), MU
##   Definition 2014 (dbl), Attestation_ID (dbl), NPI (chr)
#simpler view and only hospitals
oneVendor= d %>% filter(`Vendor Name`=='Allscripts' & `Provider Type`=='Hospital') %>% select(`Vendor Name`,`EHR Product Name`) %>% distinct()
#dplyr sometimes shortens the list to top 10, so we convert to traditional data.frame
as.data.frame(oneVendor)
##    Vendor Name                                  EHR Product Name
## 1   Allscripts                                Sunrise Acute Care
## 2   Allscripts                         Allscripts FollowMyHealth
## 3   Allscripts                           Sunrise Ambulatory Care
## 4   Allscripts                                     Allscripts ED
## 5   Allscripts                     Sunrise Acute Care Module Set
## 6   Allscripts Sunrise Patient Portal and Sunrise Acute Care EHR
## 7   Allscripts                 Sunrise Emergency Care Module Set
## 8   Allscripts                            Sunrise Emergency Care
## 9   Allscripts                                         dbMotion?
## 10  Allscripts                                          dbMotion
#we see 138 products from this vendor

oneVendorData = d %>% filter(`Vendor Name`=='Allscripts' & `Provider Type`=='Hospital'& `EHR Product Name` =='Sunrise Acute Care') %>% arrange(`Attestation Year`)
oneVendorData
## Source: local data frame [686 x 21]
## 
##    EHR Certification Number Vendor Name EHR Product CHP Id
## 1           30000001SWXCEA0  Allscripts         CHP-007983
## 2           30000001SWXCEA0  Allscripts         CHP-007983
## 3           30000001SWXCEA0  Allscripts         CHP-007983
## 4           30000001SWXCEA0  Allscripts         CHP-007983
## 5           30000001SWXCEA0  Allscripts         CHP-007983
## 6           30000001SWXCEA0  Allscripts         CHP-007983
## 7           30000002EDF8EAA  Allscripts         CHP-007983
## 8           30000002EDF8EAA  Allscripts         CHP-007983
## 9           30000001SWXCEA0  Allscripts         CHP-007983
## 10          30000001SWXCEA0  Allscripts         CHP-007983
## ..                      ...         ...                ...
## Variables not shown: EHR Product Name (chr), EHR Product Version (chr),
##   Product Classification (chr), Product Setting (chr), Product
##   Certification Edition Yr (dbl), Attestation Month (dbl), Attestation
##   Year (dbl), Business State/Territory (chr), Provider Type (chr),
##   Specialty (chr), Program Year (dbl), Payment Year (dbl), Provider Stage
##   Number (chr), Program Type (chr), Stage 2 Scheduled 2014 (dbl), MU
##   Definition 2014 (dbl), Attestation_ID (dbl), NPI (chr)
library(gmodels)
CrossTable(d$`Payment Year`,d$`Provider Type`)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1028716 
## 
##  
##                  | d$`Provider Type` 
## d$`Payment Year` |        EP |  Hospital | Row Total | 
## -----------------|-----------|-----------|-----------|
##                1 |    378702 |     18122 |    396824 | 
##                  |   107.199 |  1633.338 |           | 
##                  |     0.954 |     0.046 |     0.386 | 
##                  |     0.392 |     0.286 |           | 
##                  |     0.368 |     0.018 |           | 
## -----------------|-----------|-----------|-----------|
##                2 |    318385 |     22354 |    340739 | 
##                  |     5.853 |    89.187 |           | 
##                  |     0.934 |     0.066 |     0.331 | 
##                  |     0.330 |     0.353 |           | 
##                  |     0.309 |     0.022 |           | 
## -----------------|-----------|-----------|-----------|
##                3 |    212309 |     17262 |    229571 | 
##                  |    45.268 |   689.735 |           | 
##                  |     0.925 |     0.075 |     0.223 | 
##                  |     0.220 |     0.272 |           | 
##                  |     0.206 |     0.017 |           | 
## -----------------|-----------|-----------|-----------|
##                4 |     55962 |      5620 |     61582 | 
##                  |    57.773 |   880.265 |           | 
##                  |     0.909 |     0.091 |     0.060 | 
##                  |     0.058 |     0.089 |           | 
##                  |     0.054 |     0.005 |           | 
## -----------------|-----------|-----------|-----------|
##     Column Total |    965358 |     63358 |   1028716 | 
##                  |     0.938 |     0.062 |           | 
## -----------------|-----------|-----------|-----------|
## 
## 
#nicer format
CrossTable(d$`Payment Year`,d$`Provider Type`,prop.chisq=F,format='SPSS',digits=1)
## 
##    Cell Contents
## |-------------------------|
## |                   Count |
## |             Row Percent |
## |          Column Percent |
## |           Total Percent |
## |-------------------------|
## 
## Total Observations in Table:  1028716 
## 
##                  | d$`Provider Type` 
## d$`Payment Year` |       EP  | Hospital  | Row Total | 
## -----------------|-----------|-----------|-----------|
##                1 |   378702  |    18122  |   396824  | 
##                  |     95.4% |      4.6% |     38.6% | 
##                  |     39.2% |     28.6% |           | 
##                  |     36.8% |      1.8% |           | 
## -----------------|-----------|-----------|-----------|
##                2 |   318385  |    22354  |   340739  | 
##                  |     93.4% |      6.6% |     33.1% | 
##                  |     33.0% |     35.3% |           | 
##                  |     30.9% |      2.2% |           | 
## -----------------|-----------|-----------|-----------|
##                3 |   212309  |    17262  |   229571  | 
##                  |     92.5% |      7.5% |     22.3% | 
##                  |     22.0% |     27.2% |           | 
##                  |     20.6% |      1.7% |           | 
## -----------------|-----------|-----------|-----------|
##                4 |    55962  |     5620  |    61582  | 
##                  |     90.9% |      9.1% |      6.0% | 
##                  |      5.8% |      8.9% |           | 
##                  |      5.4% |      0.5% |           | 
## -----------------|-----------|-----------|-----------|
##     Column Total |   965358  |    63358  |  1028716  | 
##                  |     93.8% |      6.2% |           | 
## -----------------|-----------|-----------|-----------|
## 
##