Install libraries, pull in dataset.

#install.packages("tidyverse")
#install.packages("plotly")
#install.packages("ggplot2")
#install.packages("dplyr")
#install.packages("lubridate")
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(ggplot2)
library(dplyr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
participation <- read.csv("BPCIAparticipants.csv")
dim(participation)
## [1] 1086   10
summary(participation)
##         bpid                                orglegalname
##  1001-0000:   1   REMEDY BPCI PARTNERS, LLC       :228  
##  1006-0000:   1   NAVIHEALTH, INC.                :104  
##  1011-0001:   1   FUSION5, INC.                   : 91  
##  1019-0000:   1   STRYKER                         : 81  
##  1045-0000:   1   UNITED HEALTHCARE SERVICES, INC.: 64  
##  1048-0000:   1   SIGNATURE MEDICAL GROUP, INC.   : 39  
##  (Other)  :1080   (Other)                         :479  
##                             orgdbaname           role     participanttype
##  REMEDY PARTNERS                 :228   Convener   :835   ACH  :131      
##  NAVIHEALTH, INC.                :104   Participant:251   Other:771      
##  FUSION5                         : 91                     PGP  :184      
##  STRYKER PERFORMANCE SOLUTIONS   : 81                                    
##  UNITED HEALTHCARE SERVICES, INC.: 64                                    
##  SIGNATURE CARE MANAGEMENT       : 39                                    
##  (Other)                         :479                                    
##                              address1        address2            city    
##  1120 POST ROAD                  :164            :822   DARIEN     :228  
##  210 WESTWOOD PLACE, SUITE 400   :104   SUITE 300:106   BRENTWOOD  :124  
##  117 SOUTH MAIN STREET           : 91   SUITE 100: 36   CHICAGO    :102  
##  350 NORTH ORLEANS ST., SUITE 650: 70   N/A      : 22   ST. CHARLES: 91  
##  1120 POST RD                    : 64   SUITE 120: 19   MINNETONKA : 64  
##  9900 BREN ROAD EAST             : 64   SUITE 270: 13   LOS ANGELES: 55  
##  (Other)                         :529   (Other)  : 68   (Other)    :422  
##      state          zip       
##  CT     :230   Min.   : 1605  
##  MO     :135   1st Qu.: 6820  
##  IL     :130   Median :43215  
##  TN     :130   Mean   :42034  
##  CA     : 79   3rd Qu.:63141  
##  MN     : 68   Max.   :99508  
##  (Other):314

Select the variables we want to look at and assign them back to participation dataset.

#install.packages("car")
#library(car)
#install.packages("forcats")
#library(forcats)
#install.packages("stringr")
#library(stringr)
participation <- participation %>%
  select(bpid, orglegalname, orgdbaname, role, participanttype, city, state) %>%
  mutate(role = case_when(role=="Participant"~"Non-Convener", role=="Convener"~"Convener"))

Count the number of Conveners and Participants

#participantsbyrole <- participation %>%
  #summarize(role=n())

Chart the top 5 conveners (Conveners with the largest number of BPIDs).

#Conveners5 <- participation %>%
 # group_by(orglegalname)
#Conveners5

Create a histogram

roleplot <- qplot(data=participation, role, fill=role, geom_bar = "histogram", bins=20) +
  xlab("Participant Role") +
  ylab("Number of Participants")
## Warning: Ignoring unknown parameters: geom_bar, bins
roleplot

Plot the Participant Types

typesplot <- qplot(data=participation, participanttype, fill=participanttype, geom_bar = "histogram", bins=20) +
   xlab("Participant Type") +
  ylab("Number of Participants")
## Warning: Ignoring unknown parameters: geom_bar, bins
typesplot

Plot the number of Conveners by type (ACHs, PGPs, and Other).

allconveners <- participation %>%
  filter(role =="Convener")
allconvenersplot <- qplot(data=allconveners, participanttype, fill=participanttype, geom_bar = "histogram", bins=20)
## Warning: Ignoring unknown parameters: geom_bar, bins
allconvenersplot

Plot the number of Non-Conveners by type (ACHs, PGPs, and Other).

allparticipants <- participation %>%
  filter(role =="Non-Convener")
allparticipantsplot <- qplot(data=allparticipants, participanttype, fill=participanttype, geom_bar = "histogram", bins=20)
## Warning: Ignoring unknown parameters: geom_bar, bins
allparticipantsplot