Summary

The Affordable Care Act (ACA), signed into law in 2010, along with other recent changes affecting healthcare reimbursement models, has caused significant shifts in incentives in the operations of large hospitals and health care systems. For this project, we will examine annual operational statistics related to California hospitals, including numbers of inpatient beds and inpatient patient census days and surgical procedure volume, to analyze trends from 2013 to 2016. In particular, we will focus on changes in inpatient and outpatient surgical volumes.

1. Data Preparation

  • Load libraries
library(knitr)
library(kableExtra)
library(prettydoc)
library(RCurl)
library(dplyr)
library(ggplot2)
library(tidyr)
library(tidyverse)
library(sqldf)
  • Load the data as a dataframe

Note: The file as downloaded from HealthData.gov exceed 85 MB. To fit the requirements of being under 25 MB for posting on Github, columns with extraneous information such as the contact information for the person who compiled data at each facility was removed. Additionally, measures and metrics that are not of interest for this analysis were removed. The full dataset can be view here.

#load the csv into R
df_hosputil <- read.csv("https://raw.githubusercontent.com/littlejohnjeff/Data_606/master/2012-2016-hosputilmr_edited.csv")
#Display top rows of dataframe
head(df_hosputil)
##   Year  OSHPD_ID         FAC_NAME FAC_ADDRESS_ONE FAC_ADDRESS_TWO FAC_CITY
## 1 2012 106010735 ALAMEDA HOSPITAL    2070 CLINTON                  ALAMEDA
## 2 2012 106010735 ALAMEDA HOSPITAL    2070 CLINTON                  ALAMEDA
## 3 2012 106010735 ALAMEDA HOSPITAL    2070 CLINTON                  ALAMEDA
## 4 2012 106010735 ALAMEDA HOSPITAL    2070 CLINTON                  ALAMEDA
## 5 2012 106010735 ALAMEDA HOSPITAL    2070 CLINTON                  ALAMEDA
## 6 2012 106010735 ALAMEDA HOSPITAL    2070 CLINTON                  ALAMEDA
##   FAC_ZIPCODE FAC_OPER_CURRYR BEG_DATE   END_DATE PARENT_NAME PARENT_ZIP_9
## 1       94501             Yes 1/1/2012 12/31/2012                         
## 2       94501             Yes 1/1/2012 12/31/2012                         
## 3       94501             Yes 1/1/2012 12/31/2012                         
## 4       94501             Yes 1/1/2012 12/31/2012                         
## 5       94501             Yes 1/1/2012 12/31/2012                         
## 6       94501             Yes 1/1/2012 12/31/2012                         
##   LIC_STATUS LIC_STATUS_DATE TRAUMA_CTR TEACH_HOSP ACLAIMS_NO
## 1       Open        1/1/1946                    NO  140000011
## 2       Open        1/1/1946                    NO  140000011
## 3       Open        1/1/1946                    NO  140000011
## 4       Open        1/1/1946                    NO  140000011
## 5       Open        1/1/1946                    NO  140000011
## 6       Open        1/1/1946                    NO  140000011
##   ASSEMBLY_DIST SENATE_DIST CONGRESS_DIST CENS_TRACT HEALTH_SVC_AREA
## 1            16           9            13 6001428500   05 - East Bay
## 2            16           9            13 6001428500   05 - East Bay
## 3            16           9            13 6001428500   05 - East Bay
## 4            16           9            13 6001428500   05 - East Bay
## 5            16           9            13 6001428500   05 - East Bay
## 6            16           9            13 6001428500   05 - East Bay
##    COUNTY LICENSE_NO  FACILITY_LEVEL LONGITUDE LATITUDE           TYPE_LIC
## 1 Alameda  140000002 Parent Facility -122.2536 37.76295 General Acute Care
## 2 Alameda  140000002 Parent Facility -122.2536 37.76295 General Acute Care
## 3 Alameda  140000002 Parent Facility -122.2536 37.76295 General Acute Care
## 4 Alameda  140000002 Parent Facility -122.2536 37.76295 General Acute Care
## 5 Alameda  140000002 Parent Facility -122.2536 37.76295 General Acute Care
## 6 Alameda  140000002 Parent Facility -122.2536 37.76295 General Acute Care
##           TYPE_SVC_PRINCIPAL Measure.Variable          Description
## 1 General Medical / Surgical            3.1.1     MED_SURG_BED_LIC
## 2 General Medical / Surgical            3.1.2  MED_SURG_LICBED_DAY
## 3 General Medical / Surgical            3.1.3         MED_SURG_DIS
## 4 General Medical / Surgical            3.1.5    MED_SURG_CENS_DAY
## 5 General Medical / Surgical           3.25.1    HOSP_BED_LIC_TOTL
## 6 General Medical / Surgical           3.25.2 HOSP_LICBED_DAY_TOTL
##   Response Amount
## 1             204
## 2           49104
## 3            2619
## 4            9457
## 5             401
## 6          121206
  • Subset columns
#subset to our columns of interest
df_hosputil_2 <- df_hosputil[c(2,3,1,31,33)]
#remove rows with certain measure
df_hosputil_2 <- df_hosputil_2[df_hosputil_2$Description != "AMB_SURG_PROG",]
head(df_hosputil_2)
##    OSHPD_ID         FAC_NAME Year          Description Amount
## 1 106010735 ALAMEDA HOSPITAL 2012     MED_SURG_BED_LIC    204
## 2 106010735 ALAMEDA HOSPITAL 2012  MED_SURG_LICBED_DAY  49104
## 3 106010735 ALAMEDA HOSPITAL 2012         MED_SURG_DIS   2619
## 4 106010735 ALAMEDA HOSPITAL 2012    MED_SURG_CENS_DAY   9457
## 5 106010735 ALAMEDA HOSPITAL 2012    HOSP_BED_LIC_TOTL    401
## 6 106010735 ALAMEDA HOSPITAL 2012 HOSP_LICBED_DAY_TOTL 121206
  • Tidy data
#turn Description field into columns; amount as Key using Spread 
df_hosputil_3 <- df_hosputil_2 %>%
  spread(key = Description, value = Amount)

head(df_hosputil_3)
##    OSHPD_ID                                    FAC_NAME Year
## 1 106010735                            ALAMEDA HOSPITAL 2012
## 2 106010735                            ALAMEDA HOSPITAL 2013
## 3 106010735                            ALAMEDA HOSPITAL 2014
## 4 106010735                            ALAMEDA HOSPITAL 2015
## 5 106010735                            ALAMEDA HOSPITAL 2016
## 6 106010739 ALTA BATES SUMMIT MED CTR-ALTA BATES CAMPUS 2012
##   HOSP_BED_LIC_TOTL HOSP_CENS_DAY_TOTL HOSP_DIS_TOTL HOSP_LICBED_DAY_TOTL
## 1               401              46073          3006               121206
## 2               281              72058          3026               102565
## 3               281              47310          1882               102565
## 4               281              70488          2476               102565
## 5               281              71354          2154               102846
## 6               347              77913         17003               127002
##   MED_SURG_BED_LIC MED_SURG_CENS_DAY MED_SURG_DIS MED_SURG_LICBED_DAY
## 1              204              9457         2619               49104
## 2               84              9853         2575               30660
## 3               84              5619         1547               30660
## 4               84              6410         2045               30660
## 5               84              7140         1964               30744
## 6              146             34109         7896               53436
##   SURG_IP SURG_OP
## 1   43099   74637
## 2     568    1477
## 3     544     843
## 4     630     840
## 5     635    1560
## 6  394287  321180

We will likely do some more analysis with above data. For now, our big question is how annual inpatient and outpatient surgical volumes changed from 2013 to 2016, so we’re going to create some calculations to show the delta between 2013 and 2016 for these measures for each facility. This will likely take some time and require a smarter approach, but here is a start.

#objective - get difference between 2016 and 2012 values for these measures
df_hosputil_4 <- df_hosputil_3 %>%
  group_by(FAC_NAME) %>%
  mutate(HOSP_CENS_DAY_DIFF = HOSP_CENS_DAY_TOTL - lag(HOSP_CENS_DAY_TOTL, default = HOSP_CENS_DAY_TOTL[4]))

df_hosputil_4
## Source: local data frame [2,547 x 14]
## Groups: FAC_NAME [661]
## 
## # A tibble: 2,547 x 14
##    OSHPD_ID FAC_NAME  Year HOSP_BED_LIC_TO~ HOSP_CENS_DAY_T~ HOSP_DIS_TOTL
##       <int> <fct>    <int>            <int>            <int>         <int>
##  1   1.06e8 ALAMEDA~  2012              401            46073          3006
##  2   1.06e8 ALAMEDA~  2013              281            72058          3026
##  3   1.06e8 ALAMEDA~  2014              281            47310          1882
##  4   1.06e8 ALAMEDA~  2015              281            70488          2476
##  5   1.06e8 ALAMEDA~  2016              281            71354          2154
##  6   1.06e8 ALTA BA~  2012              347            77913         17003
##  7   1.06e8 ALTA BA~  2013              347            75824         16428
##  8   1.06e8 ALTA BA~  2014              347            76018         15039
##  9   1.06e8 ALTA BA~  2015              347            67924         13725
## 10   1.06e8 ALTA BA~  2016              347            66822         15809
## # ... with 2,537 more rows, and 8 more variables:
## #   HOSP_LICBED_DAY_TOTL <int>, MED_SURG_BED_LIC <int>,
## #   MED_SURG_CENS_DAY <int>, MED_SURG_DIS <int>,
## #   MED_SURG_LICBED_DAY <int>, SURG_IP <int>, SURG_OP <int>,
## #   HOSP_CENS_DAY_DIFF <int>

2. Research Question

Are changes in inpatient surgical volumes from 2012 to 2016 predictive of changes to outpatient surgical volumes at the same hospital?

3. Cases

#count number of facilities in dataset
sqldf("select count(distinct(FAC_NAME)) from df_hosputil_3")
## Loading required package: tcltk
##   count(distinct(FAC_NAME))
## 1                       661

Each case represents a California hospital. Note that a multi-facility health system will be represented by one case per facility. There are 661 distinct facilities in the dataset that met our initial criteria. We might later exclude certain facilities due to missing or suspicious data. Thus, we start with 661 cases but expect to have fewer at the conclusion of our analysis.

4. Data Collection

Individual hospitals and health systems are required to self-report this facility data on an annual basis. Reported numbers do receive regulatory and other review, so there are strong incentives to report accurate information. However, possibilities of error and ambiguities of interpretation are entirely possible.

Annual utilization data are published here:

https://healthdata.gov/dataset/hospital-annual-utilization-report-pivot-tables

Note that for the purposes of this exercise we will be using the Hospital Annual Utilization Data - Machine-Readable Format CSV, which combines annual data from 2012 to 2016 in a format more friendly to analysis than the individual annual files.

Why are we looking at 2013 instead of 2012? The 2012 values for certain hospitals and measures seem abnormally high, either due to error or them being reported as an aggregation of pre-2013 annual values.

5. Response Variable

What is the response variable, and what type is it (numerical/categorical)?

The response variable is the change in outpatient surgical volume from 2013 to 2016.

6. Explanatory Variable

What is the response variable, and what type is it (numerical/categorical)?

The explanatory variable is the change in inpatient surgical volume from 2013 to 2016.

7. Relevant Summary Statistics

Summary 2013 and 2016 statistics

df_hosp_13 <- df_hosputil_3[df_hosputil_3$Year == 2013,]
df_hosp_16 <- df_hosputil_3[df_hosputil_3$Year == 2016,]

summary(df_hosp_13)
##     OSHPD_ID                                         FAC_NAME  
##  Min.   :106010735   ALVARADO HOSPITAL MEDICAL CENTER    :  2  
##  1st Qu.:106190328   ADVENTIST MEDICAL CENTER            :  1  
##  Median :106301279   ADVENTIST MEDICAL CENTER-SELMA      :  1  
##  Mean   :106871906   ADVENTIST MEDICAL CENTER - REEDLEY  :  1  
##  3rd Qu.:106380842   AHMC ANAHEIM REGIONAL MEDICAL CENTER:  1  
##  Max.   :206351814   ALAMEDA COUNTY MEDICAL CENTER       :  1  
##                      (Other)                             :502  
##       Year      HOSP_BED_LIC_TOTL HOSP_CENS_DAY_TOTL HOSP_DIS_TOTL  
##  Min.   :2013   Min.   :  10.0    Min.   :   300     Min.   :   15  
##  1st Qu.:2013   1st Qu.:  67.0    1st Qu.: 12299     1st Qu.: 1194  
##  Median :2013   Median : 140.0    Median : 27024     Median : 4428  
##  Mean   :2013   Mean   : 202.5    Mean   : 41813     Mean   : 6969  
##  3rd Qu.:2013   3rd Qu.: 269.0    3rd Qu.: 56060     3rd Qu.:10940  
##  Max.   :2013   Max.   :1500.0    Max.   :511585     Max.   :47332  
##                                   NA's   :24         NA's   :24     
##  HOSP_LICBED_DAY_TOTL MED_SURG_BED_LIC MED_SURG_CENS_DAY  MED_SURG_DIS  
##  Min.   :   288       Min.   :  4.0    Min.   :    35    Min.   :    5  
##  1st Qu.: 24455       1st Qu.: 47.0    1st Qu.:  7422    1st Qu.: 1385  
##  Median : 51100       Median : 96.0    Median : 17077    Median : 4238  
##  Mean   : 74230       Mean   :122.4    Mean   : 24060    Mean   : 5766  
##  3rd Qu.: 99645       3rd Qu.:176.0    3rd Qu.: 34713    3rd Qu.: 9174  
##  Max.   :547500       Max.   :591.0    Max.   :177330    Max.   :36670  
##                       NA's   :94       NA's   :117       NA's   :117    
##  MED_SURG_LICBED_DAY    SURG_IP           SURG_OP     
##  Min.   :  1460      Min.   :    1.0   Min.   :    3  
##  1st Qu.: 16790      1st Qu.:  691.8   1st Qu.: 1110  
##  Median : 35040      Median : 1645.0   Median : 2283  
##  Mean   : 44894      Mean   : 2363.5   Mean   : 3291  
##  3rd Qu.: 64331      3rd Qu.: 3439.0   3rd Qu.: 4118  
##  Max.   :219730      Max.   :16213.0   Max.   :21678  
##  NA's   :93          NA's   :135       NA's   :142
summary(df_hosp_16)
##     OSHPD_ID        
##  Min.   :106010735  
##  1st Qu.:106190324  
##  Median :106301310  
##  Mean   :106678916  
##  3rd Qu.:106380863  
##  Max.   :206351814  
##                     
##                                                FAC_NAME        Year     
##  ALVARADO HOSPITAL MEDICAL CENTER                  :  2   Min.   :2016  
##  FOUNTAIN VALLEY REGIONAL HOSPITAL AND MEDICAL CENT:  2   1st Qu.:2016  
##  ADVENTIST HEALTH MEDICAL CENTER TEHACHAPI VALLEY  :  1   Median :2016  
##  ADVENTIST MEDICAL CENTER                          :  1   Mean   :2016  
##  ADVENTIST MEDICAL CENTER - CENTRAL VALLEY         :  1   3rd Qu.:2016  
##  ADVENTIST MEDICAL CENTER - REEDLEY                :  1   Max.   :2016  
##  (Other)                                           :498                 
##  HOSP_BED_LIC_TOTL HOSP_CENS_DAY_TOTL HOSP_DIS_TOTL   HOSP_LICBED_DAY_TOTL
##  Min.   :  10.0    Min.   :    51     Min.   :    5   Min.   :  3234      
##  1st Qu.:  64.0    1st Qu.: 12060     1st Qu.: 1077   1st Qu.: 23150      
##  Median : 139.0    Median : 27841     Median : 4594   Median : 51240      
##  Mean   : 197.5    Mean   : 42569     Mean   : 6961   Mean   : 72057      
##  3rd Qu.: 270.0    3rd Qu.: 57851     3rd Qu.:10822   3rd Qu.: 98820      
##  Max.   :1500.0    Max.   :572258     Max.   :49736   Max.   :549000      
##                    NA's   :14         NA's   :14                          
##  MED_SURG_BED_LIC MED_SURG_CENS_DAY  MED_SURG_DIS   MED_SURG_LICBED_DAY
##  Min.   :  4.0    Min.   :     9    Min.   :    1   Min.   :  1464     
##  1st Qu.: 48.0    1st Qu.:  7852    1st Qu.: 1492   1st Qu.: 17202     
##  Median : 99.0    Median : 18556    Median : 4410   Median : 36051     
##  Mean   :122.1    Mean   : 25855    Mean   : 5963   Mean   : 44464     
##  3rd Qu.:176.0    3rd Qu.: 37119    3rd Qu.: 8863   3rd Qu.: 64050     
##  Max.   :591.0    Max.   :191110    Max.   :39285   Max.   :216306     
##  NA's   :98       NA's   :121       NA's   :121     NA's   :96         
##     SURG_IP           SURG_OP     
##  Min.   :    2.0   Min.   :   13  
##  1st Qu.:  660.5   1st Qu.: 1104  
##  Median : 1731.0   Median : 2416  
##  Mean   : 2381.6   Mean   : 3683  
##  3rd Qu.: 3411.5   3rd Qu.: 4634  
##  Max.   :15280.0   Max.   :27682  
##  NA's   :135       NA's   :143
  • Total Numbers
# boxplot of census days
boxplot(df_hosputil_3$HOSP_CENS_DAY_TOTL ~ df_hosputil_3$Year, col="blue", main="Distribution of Annual Total Census Days", ylab="Annual Census Days", xlab="Years") 

#histogram of outpatient surgeries for 2012
histogram(df_hosp_13$SURG_OP)

#histogram of outpatient surgeries for 2016
histogram(df_hosp_16$SURG_OP)