The objective of this notebook is to create an analytic infrastructure for a project that considers the relationship between contracted insurer-provider networks and referral networks among physician subspecialties.
# Set up R environment
library(devtools)
library(knitr)
library(tidyverse)
library(rlang)
library(readxl)
library(network)
library(tidygraph)
library(ggraph)
library(igraph)
#library(jagmisc) # !!!!!! internal only.
library(tidyr)
library(patchwork) # devtools::install_github("thomasp85/patchwork")
library(plotly)
load_all("../network.referral")
The basis for our analysis of insurance networks is the TK 2017 Vericred data. These data provide information on which plans an individual
We further limit our anlayses to office-based physicians based on the SK&A market research data for TK.
# Download the CMS referral data from the URL (will only be done once)
# get_cms_referral_data(year = 2015, days = 30)
# get_cms_referral_data(year = 2015, days = 60)
# get_cms_referral_data(year = 2015, days = 90)
# Load CMS edge and node list data.
EE30 <- readRDS("../data/cms-referral/edge-list-2015-d30.rds")
VV30 <- readRDS("../data/cms-referral/node-list-2015-d30.rds") %>%
mutate(npi = paste0(npi)) %>%
rename(id = node_id)
EE60 <- readRDS("../data/cms-referral/edge-list-2015-d60.rds")
VV60 <- readRDS("../data/cms-referral/node-list-2015-d60.rds") %>%
mutate(npi = paste0(npi)) %>%
rename(id = node_id)
# Load Penn Provider and Provider Network Data (basis is SK&A and Vericred data)
penn_npi <- read_xls("../data/Data_Share_Graves/OH_2017/npi_oh2017.xls",sheet=1) %>%
select(npi , specialty = spec1_cat, city) %>%
mutate(primcare = as.integer(specialty %in%
c("FAMILY MEDICINE","GENERAL PRACTITIONER","INTERNIST"))) %>%
mutate(npi = as.character(npi))
penn_network <- read_xls("../data/Data_Share_Graves/OH_2017/npi_network_oh2017.xls") %>%
mutate(npi = as.character(npi))
The raw CMS shared patient files are constructed as an “edge-list” file containing data on each physician pair and the number of shared encounters and unique patients between them over the specified time window (e.g., 30, 60, 90 or 180 days). I chose to use the 30-day window as the basis for these analyses, though we can easily refine using a different window if we so choose. Below, I show that the choice of 30 or 60 days, for example, doesn’t appear to make any meaningful difference. One benefit of a longer winder is that it provides more time for a relationship to be revealed given the 11-patient CMS cell reporting rule.
The bipartite physician-plan network matricies provide binary information on which plans an individual NPI is in. The rows are NPI numbers (both PCP and subspecialty) and the columns are for individual plans available in the geographic area.
penn_bipart_network <- penn_network %>%
select(-net_onmarket, -state) %>%
mutate(innet = 1) %>%
tidyr::gather(key,value,-npi,-network_id) %>%
tidyr::unite(col = key , key , network_id) %>%
tidyr::spread(key,value) %>%
select(-contains("innet_NA"))
penn_bipart_network[is.na(penn_bipart_network)] <- 0
penn_bipart_network <- penn_bipart_network %>%
mutate_at(vars(contains("innet_")),funs(as.factor))
penn_bipart_network %>%
head() %>%
select(1:6) %>%
kable(caption = "First 6 rows and columns of Bipartite MD-Plan Network for Ohio")
| npi | innet_100081 | innet_100162 | innet_100226 | innet_100236 | innet_100255 |
|---|---|---|---|---|---|
| 1003007741 | 0 | 0 | 0 | 0 | 0 |
| 1003013004 | 1 | 0 | 0 | 0 | 0 |
| 1003034281 | 0 | 0 | 1 | 1 | 0 |
| 1003059197 | 0 | 1 | 1 | 1 | 0 |
| 1003059684 | 0 | 0 | 1 | 1 | 0 |
| 1003066390 | 0 | 0 | 1 | 1 | 0 |
Next, we will construct a bipartite network that compares referreal relationships between primary care physicians and cardiologists. Each “connection” reflects at least 11 shared FFS Medicare patients over a 30 day period in 2015, though in sensitivity analyses we can also consider alternative time windows (e.g., 30, 60, 90 or 180 days).
Items to consider:
What geographic scope to we consider? The state networks are very large to visualize, though there may be some value in estimating various network measures at the stae level (e.g., overall network size).
I would propose we include rating area indicators in the underlying Penn data, so that we can consider networks within each rating area. It is very likely that, for a given network, we will find heterogeneity across rating areas (e.g., few overlapping in-network referrals in one area, but more in another).
# The EE and VV data frames are the global frames.
# at this point this means all PCPs and Cardiologists
# in Ohio, but in the future this could be a data frame
# with national scope and that contains all kinds of
# physician subspecialties.
# Cities with at least 25 cardiologists.
geography <-
penn_npi %>%
filter(primcare==0) %>%
count(city) %>%
arrange(desc(n)) %>%
filter(n>25) %>%
pull(city)
# Function to get all physicians with a subspecialty by geographic area.
ee_get_subspec <- function(ee_city,ee_primcare) {
penn_npi %>% filter(primcare == ee_primcare) %>%
## !!!!! NOTE WHEN WE SWITCH TO RATING AREA THIS WILL NEED TO CHANGE
filter(city == ee_city) %>%
left_join(penn_bipart_network,"npi")
}
# Get the primary care physicians for each city.
ee_pcp_geo <- geography %>%
map(~ee_get_subspec(ee_city = .x, ee_primcare = 1)) %>%
set_names(geography)
# Ge the cardiologists for each city.
ee_card_geo <- geography %>%
map(~ee_get_subspec(ee_city = .x, ee_primcare = 0)) %>%
set_names(geography)
# Construct bipartite referral matricies separately by city.
# 30-day window
net_pcp_card_d30 <-
geography %>%
map(~construct_bipartite_referral_network(EE = EE30 ,
VV = VV30 ,
ee_from = ee_pcp_geo[[.x]],
ee_to = ee_card_geo[[.x]])) %>%
set_names(geography)
# 60-day window
net_pcp_card_d60 <-
geography %>%
map(~construct_bipartite_referral_network(EE = EE60 ,
VV = VV60 ,
ee_from = ee_pcp_geo[[.x]],
ee_to = ee_card_geo[[.x]])) %>%
set_names(geography)
# Example Network
p.example_net_d30 <- net_pcp_card_d30[["CLEVELAND"]] %>%
visualize_referral_network(plan = "100528") +
scale_shape_manual(name = "Physician Type", values= c(19,17,2),
labels = c("IN-NETWORK PCP","IN-NETWORK CARDIOLOGIST","OUT-OF-NETWORK\nCARDIOLOGIST")) +
ggtitle("30-Day Window\nNetwork ID = 100528") +
geom_edge_density(aes(fill = innet_100528.y)) +
scale_edge_fill_manual(name= "Density of Connections", values = c("red","blue"),labels= c("Out of Network","In Network"))
p.example_net_d60 <- net_pcp_card_d60[["CLEVELAND"]] %>%
visualize_referral_network(plan = "100528") +
scale_shape_manual(name = "Physician Type", values= c(19,17,2),
labels = c("IN-NETWORK PCP","IN-NETWORK CARDIOLOGIST","OUT-OF-NETWORK\nCARDIOLOGIST")) +
ggtitle("60-Day Window\nNetwork ID = 100528") +
geom_edge_density(aes(fill = innet_100528.y)) +
scale_edge_fill_manual(name= "Density of Connections", values = c("red","blue"),labels= c("Out of Network","In Network"))
p.example_net_d30 + p.example_net_d60 + plot_layout(ncol = 1)
Visualization of Bipartite Subspecialty Network (PCP-Cardiology) for Cleveland, OH plan ID 100528
# Example Network 2
p.example_net_2 <- net_pcp_card_d30[["CLEVELAND"]] %>%
visualize_referral_network(plan = "100953") +
scale_shape_manual(name = "Physician Type", values= c(19,17,2),
labels = c("IN-NETWORK PCP","IN-NETWORK CARDIOLOGIST","OUT-OF-NETWORK\nCARDIOLOGIST")) +
ggtitle("Broad Network Example\nNetwork ID = 100953") +
geom_edge_density(aes(fill = innet_100953.y)) +
scale_edge_fill_manual(name= "Density of Connections", values = c("red","blue"),labels= c("Out of Network","In Network"))
p.example_net_2
Visualization of Bipartite Subspecialty Network (PCP-Cardiology) for Cleveland, OH plan ID 100953
# Show how the networks vary dramatically geographically for the same plan.
tmp_fnc <- function(x) {
net_pcp_card_d30[[x]] %>%
visualize_referral_network(plan = "100528") +
scale_shape_manual(name = "Physician Type", values= c(19,17,2),
labels = c("IN-NETWORK PCP","IN-NETWORK CARDIOLOGIST","OUT-OF-NETWORK\nCARDIOLOGIST")) +
ggtitle(paste0(x) )+
geom_edge_density(aes(fill = innet_100528.y)) +
scale_edge_fill_manual(name= "Density of Connections", values = c("red","blue"),labels= c("Out of Network","In Network")) +
theme(legend.position="none")
}
p.geog <-
geography %>% map(~tmp_fnc(x = .x)) %>%
set_names(geography)
p.geog[["CINCINNATI"]] + p.geog[["COLUMBUS"]] +
p.geog[["CLEVELAND"]] + p.geog[["DAYTON"]] + p.geog[["TOLEDO"]] +
p.geog[["AKRON"]] + plot_layout(ncol = 3, nrow = 2)
Even for the same plan (100528) we see significant heterogeneity in both network size and in-network referrals
While the network visualizations can be helpful, ultimately we will need to settle on some key measures we can then summarize across all plans/networks, nationally or by state.
# Temporary function to filter data to plans with at least three of each provider type in
# the geographic area.
tmp_fnc <- function(x) {
net_pcp_card_d30[[x]] %>%
filter_to_area_networks(minimum_in_net=3) %>%
activate(nodes) %>%
select(contains("innet_")) %>%
tbl_df() %>%
names() %>%
gsub("innet_","",.)
}
plans <-
geography %>%
map(~tmp_fnc(.x)) %>%
set_names(geography)
tmp_fnc <- function(g) {
plans[[g]] %>%
map(~ estimate_shared_referrals(.x,network = net_pcp_card_d30[[g]])) %>%
bind_rows() %>%
mutate(geography = g)
}
shared_referrals <-
geography %>% map(~ tmp_fnc(.x)) %>%
bind_rows() %>%
arrange(geography,desc(frac_shared))
# Summary stats.
summ_geo_shared_referrals <-
shared_referrals %>%
group_by(geography) %>%
summarize(weighted_mean = weighted.mean(frac_shared,total),
mean = mean(frac_shared))
summ_shared_referrals <-
shared_referrals %>%
summarize(weighted_mean= weighted.mean(frac_shared,total),
mean = mean(frac_shared))
shared_referrals %>% filter(geography == "CLEVELAND") %>% kable(caption = "Fraction of Shared Referrals by Plan and Area" )
| total | shared | frac_shared | frac_shared_sq | network_id | geography |
|---|---|---|---|---|---|
| 13953 | 13896 | 0.9959149 | 0.9918464 | 100953 | CLEVELAND |
| 11073 | 9157 | 0.8269665 | 0.6838736 | 100950 | CLEVELAND |
| 8083 | 6419 | 0.7941358 | 0.6306517 | 101127 | CLEVELAND |
| 9551 | 7020 | 0.7350016 | 0.5402273 | 100236 | CLEVELAND |
| 5892 | 3736 | 0.6340801 | 0.4020576 | 100507 | CLEVELAND |
| 5871 | 3392 | 0.5777551 | 0.3338009 | 100972 | CLEVELAND |
| 6629 | 3806 | 0.5741439 | 0.3296412 | 100226 | CLEVELAND |
| 5303 | 2712 | 0.5114086 | 0.2615388 | 100162 | CLEVELAND |
| 2188 | 320 | 0.1462523 | 0.0213897 | 100528 | CLEVELAND |
tmp_fnc <- function(g) {
plans[[g]] %>%
map(~ get_concentration_index(.x,network = net_pcp_card_d30[[g]])) %>%
bind_rows() %>%
mutate(geography = g)
}
concentration <-
geography %>% map(~ tmp_fnc(.x)) %>%
bind_rows() %>%
ungroup()
# Plot the referral concentration distribution.
concentration %>%
select(geography,network_id,conc_in_0,conc_in_1) %>%
gather(key, value, -geography, -network_id) %>%
filter(value > 0) %>%
with(Hmisc::histboxp(x=value,group=key,sd=TRUE,bins=100,xlab="Referral Concentration Index"))
tmp_fnc <- function(g) {
plans[[g]] %>% map(~get_network_size(network = net_pcp_card_d30[[g]], plan = .x)) %>%
bind_rows() %>%
mutate(geography = g)
}
network_size <- geography %>% map(~tmp_fnc(.x)) %>% bind_rows()
shared_referrals %>% left_join(network_size, c("geography","network_id")) %>%
ggplot(aes(x = network_size_from,y = frac_shared)) + geom_point() + theme_bw() +
facet_wrap(~geography)
Relationship between network size and fraction of in-network referrals.
# Temporary function to obtain the modularity scores by plan in each area.
tmp_fnc <- function(g) {
plans[[g]] %>%
map( ~ get_modularity_by_plan(network = net_pcp_card_d30[[g]] , plan = .x)) %>%
bind_rows() %>%
mutate(network_id = plans[[g]]) %>% set_names(c("mod_all","mod_in_net", "network_id")) %>%
select(network_id, everything()) %>% mutate(geography = g)
}
modularity_est <-
geography %>% map(~tmp_fnc(.x)) %>%
bind_rows()
modularity_est %>% ggplot(aes(y=network_id,x=mod_all)) + geom_point(aes(colour=geography)) +
theme_bw()
Heterogeneity in Modularity Scores by Plan and Geographic Area
tmp_fnc <- function(g) {
foo <- plans[[g]] %>%
map(~get_community_membership(plan = ., network = net_pcp_card_d30[[g]])) %>%
bind_rows() %>%
mutate(geography = g) %>%
select(network_id, geography, everything())
}
community_membership <-
geography %>% map(~tmp_fnc(.x)) %>%
bind_rows() %>%
group_by(geography,network_id,community) %>%
mutate(n = n()) %>%
select(-geography) %>%
gather(key, value, -name,-network_id) %>%
unite(key,c("key","network_id"),sep = "_") %>%
spread(key,value)