Create a .CSV file (or optionally, a MySQL database!) that includes all of the information included in the dataset. You’re encouraged to use a “wide” structure similar to how the information appears in the discussion item, so that you can practice tidying and transformations as described below.
Read the information from your .CSV file into R, and use tidyr and dplyr as needed to tidy and transform your data. [Most of your grade will be based on this step!]
Perform the analysis requested in the discussion item.
Your code should be in an R Markdown file, posted to rpubs.com, and should include narrative descriptions of your data cleanup work, analysis, and conclusions.
Data Source 1: Hospital Readmissions Reduction Program
Background: In October 2012, CMS began reducing Medicare payments for Inpatient Prospective Payment System hospitals with excess readmissions. Excess readmissions are measured by a ratio, by dividing a hospital’s number of “predicted” 30-day readmissions for heart attack, heart failure, and pneumonia by the number that would be “expected,” based on an average hospital with similar patients. A ratio greater than 1 indicates excess readmissions.
library(tidyr)
library(ggplot2)
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tibble)
# Medicaid inpatient readmission reduce program data
# Source: https://data.medicare.gov/Hospital-Compare/Hospital-Readmissions-Reduction-Program/9n3s-kdb3#SaveAs
dataMed <- read.csv('Hospital_Readmissions_Reduction_Program.csv',
na.strings = c("Not Available", "NA", "Too Few to Report"),
header = TRUE, stringsAsFactors = FALSE)
dim(dataMed)
## [1] 19878 12
str(dataMed)
## 'data.frame': 19878 obs. of 12 variables:
## $ Hospital.Name : chr "SOUTHEAST ALABAMA MEDICAL CENTER" "SOUTHEAST ALABAMA MEDICAL CENTER" "SOUTHEAST ALABAMA MEDICAL CENTER" "SOUTHEAST ALABAMA MEDICAL CENTER" ...
## $ Provider.Number : int 10001 10001 10001 10001 10001 10001 10005 10005 10005 10005 ...
## $ State : chr "AL" "AL" "AL" "AL" ...
## $ Measure.Name : chr "READM-30-AMI-HRRP" "READM-30-CABG-HRRP" "READM-30-COPD-HRRP" "READM-30-HF-HRRP" ...
## $ Number.of.Discharges : int 781 273 709 983 335 664 NA NA 667 389 ...
## $ Footnote : int NA NA NA NA NA NA NA 5 NA NA ...
## $ Excess.Readmission.Ratio : num 0.984 1.062 1.046 0.951 1.12 ...
## $ Predicted.Readmission.Rate: num 15.4 13.9 19.8 20.3 5.6 ...
## $ Expected.Readmission.Rate : num 15.6 13.1 18.9 21.3 5 ...
## $ Number.of.Readmissions : int 119 40 143 196 21 119 NA NA 107 84 ...
## $ Start.Date : chr "1-Jul-12" "1-Jul-12" "1-Jul-12" "1-Jul-12" ...
## $ End.Date : chr "30-Jun-15" "30-Jun-15" "30-Jun-15" "30-Jun-15" ...
head(dataMed, 3)
## Hospital.Name Provider.Number State
## 1 SOUTHEAST ALABAMA MEDICAL CENTER 10001 AL
## 2 SOUTHEAST ALABAMA MEDICAL CENTER 10001 AL
## 3 SOUTHEAST ALABAMA MEDICAL CENTER 10001 AL
## Measure.Name Number.of.Discharges Footnote
## 1 READM-30-AMI-HRRP 781 NA
## 2 READM-30-CABG-HRRP 273 NA
## 3 READM-30-COPD-HRRP 709 NA
## Excess.Readmission.Ratio Predicted.Readmission.Rate
## 1 0.9837 15.3580
## 2 1.0618 13.8887
## 3 1.0455 19.7525
## Expected.Readmission.Rate Number.of.Readmissions Start.Date End.Date
## 1 15.6121 119 1-Jul-12 30-Jun-15
## 2 13.0809 40 1-Jul-12 30-Jun-15
## 3 18.8932 143 1-Jul-12 30-Jun-15
tail(dataMed, 3)
## Hospital.Name Provider.Number State Measure.Name
## 19876 WEIMAR MEDICAL CENTER 670114 TX READM-30-HF-HRRP
## 19877 WEIMAR MEDICAL CENTER 670114 TX READM-30-HIP-KNEE-HRRP
## 19878 WEIMAR MEDICAL CENTER 670114 TX READM-30-PN-HRRP
## Number.of.Discharges Footnote Excess.Readmission.Ratio
## 19876 NA 5 NA
## 19877 NA 5 NA
## 19878 NA 5 NA
## Predicted.Readmission.Rate Expected.Readmission.Rate
## 19876 NA NA
## 19877 NA NA
## 19878 NA NA
## Number.of.Readmissions Start.Date End.Date
## 19876 NA 1-Jul-12 30-Jun-15
## 19877 NA 1-Jul-12 30-Jun-15
## 19878 NA 1-Jul-12 30-Jun-15
as_tibble(dataMed)
## # A tibble: 19,878 × 12
## Hospital.Name Provider.Number State
## <chr> <int> <chr>
## 1 SOUTHEAST ALABAMA MEDICAL CENTER 10001 AL
## 2 SOUTHEAST ALABAMA MEDICAL CENTER 10001 AL
## 3 SOUTHEAST ALABAMA MEDICAL CENTER 10001 AL
## 4 SOUTHEAST ALABAMA MEDICAL CENTER 10001 AL
## 5 SOUTHEAST ALABAMA MEDICAL CENTER 10001 AL
## 6 SOUTHEAST ALABAMA MEDICAL CENTER 10001 AL
## 7 MARSHALL MEDICAL CENTER SOUTH 10005 AL
## 8 MARSHALL MEDICAL CENTER SOUTH 10005 AL
## 9 MARSHALL MEDICAL CENTER SOUTH 10005 AL
## 10 MARSHALL MEDICAL CENTER SOUTH 10005 AL
## # ... with 19,868 more rows, and 9 more variables: Measure.Name <chr>,
## # Number.of.Discharges <int>, Footnote <int>,
## # Excess.Readmission.Ratio <dbl>, Predicted.Readmission.Rate <dbl>,
## # Expected.Readmission.Rate <dbl>, Number.of.Readmissions <int>,
## # Start.Date <chr>, End.Date <chr>
# create ratio of number of readmission to total of discharges
df <- dataMed %>%
select(Hospital.Name, Measure.Name, Number.of.Discharges, Number.of.Readmissions, State) %>%
mutate(ratio.Readmi.dischar = round(Number.of.Readmissions / Number.of.Discharges, 2))
# Readmissions by procedure
ggplot(df, aes(x = Measure.Name, y = Number.of.Readmissions)) +
geom_boxplot() + coord_flip()
## Warning: Removed 8272 rows containing non-finite values (stat_boxplot).
# Ratio of readmission to discharge by procedure
ggplot(df, aes(x = Measure.Name, y = ratio.Readmi.dischar)) +
geom_boxplot() + coord_flip()
## Warning: Removed 8272 rows containing non-finite values (stat_boxplot).
# top 5 hospitals with the highest ratio of readmission to discharge
top_df <- df %>%
dplyr::group_by(Hospital.Name) %>%
dplyr::summarise(ratio = round(sum(Number.of.Readmissions, na.rm = TRUE)/sum(Number.of.Discharges, na.rm = TRUE), 2)) %>%
arrange(desc(ratio)) %>%
top_n(5)
## Selecting by ratio
ggplot(top_df, aes(reorder(Hospital.Name, ratio), ratio)) +
geom_bar(stat="identity", position="stack") +
coord_flip()