Introduction

Data analysis of hospital data of US and ranking them according to states and overall

Dataset

The data used for this analysis can be downloaded here: US Hospital data

The data for this project comes from the Hospital Compare web site run by the U.S. Department of Health and Human Services. The purpose of the web site is to provide data and information about the quality of care at over 4,000 Medicare-certified hospitals in the U.S. This dataset essentially covers all major U.S. hospitals. This dataset is used for a variety of purposes, including determining whether hospitals should be fined for not providing high quality care to patients (see for some background on this particular topic).

The data inside the zip contains the following files:

  • outcome-of-care-measures.csv: Contains information about 30-day mortality and readmission rates for heart attacks, heart failure, and pneumonia for over 4,000 hospitals.
  • hospital-data.csv: Contains information about each hospital.
  • Hospital_Revised_Flatfiles.pdf: Descriptions of the variables in each file (i.e the code book).
library(ggplot2)
library(data.table)
knitr::opts_chunk$set(echo = TRUE)

Firstly, we look at data and find its summary and key features that’ll be helpful for better understanding.

setwd("C:/Users/virar/Desktop/ranga/R/US Hospital Data")
data<-read.csv("outcome-of-care-measures.csv",colClasses = "character")
dim(data)
## [1] 4706   46
summary(data)
##  Provider.Number    Hospital.Name       Address.1          Address.2        
##  Length:4706        Length:4706        Length:4706        Length:4706       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##   Address.3             City              State             ZIP.Code        
##  Length:4706        Length:4706        Length:4706        Length:4706       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##  County.Name        Phone.Number      
##  Length:4706        Length:4706       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##  Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
##  Length:4706                                              
##  Class :character                                         
##  Mode  :character                                         
##  Comparison.to.U.S..Rate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
##  Length:4706                                                                        
##  Class :character                                                                   
##  Mode  :character                                                                   
##  Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
##  Length:4706                                                                         
##  Class :character                                                                    
##  Mode  :character                                                                    
##  Upper.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
##  Length:4706                                                                         
##  Class :character                                                                    
##  Mode  :character                                                                    
##  Number.of.Patients...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
##  Length:4706                                                                   
##  Class :character                                                              
##  Mode  :character                                                              
##  Footnote...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
##  Length:4706                                                         
##  Class :character                                                    
##  Mode  :character                                                    
##  Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
##  Length:4706                                               
##  Class :character                                          
##  Mode  :character                                          
##  Comparison.to.U.S..Rate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
##  Length:4706                                                                         
##  Class :character                                                                    
##  Mode  :character                                                                    
##  Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
##  Length:4706                                                                          
##  Class :character                                                                     
##  Mode  :character                                                                     
##  Upper.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
##  Length:4706                                                                          
##  Class :character                                                                     
##  Mode  :character                                                                     
##  Number.of.Patients...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
##  Length:4706                                                                    
##  Class :character                                                               
##  Mode  :character                                                               
##  Footnote...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
##  Length:4706                                                          
##  Class :character                                                     
##  Mode  :character                                                     
##  Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
##  Length:4706                                           
##  Class :character                                      
##  Mode  :character                                      
##  Comparison.to.U.S..Rate...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
##  Length:4706                                                                     
##  Class :character                                                                
##  Mode  :character                                                                
##  Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
##  Length:4706                                                                      
##  Class :character                                                                 
##  Mode  :character                                                                 
##  Upper.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
##  Length:4706                                                                      
##  Class :character                                                                 
##  Mode  :character                                                                 
##  Number.of.Patients...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
##  Length:4706                                                                
##  Class :character                                                           
##  Mode  :character                                                           
##  Footnote...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
##  Length:4706                                                      
##  Class :character                                                 
##  Mode  :character                                                 
##  Hospital.30.Day.Readmission.Rates.from.Heart.Attack
##  Length:4706                                        
##  Class :character                                   
##  Mode  :character                                   
##  Comparison.to.U.S..Rate...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
##  Length:4706                                                                  
##  Class :character                                                             
##  Mode  :character                                                             
##  Lower.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
##  Length:4706                                                                     
##  Class :character                                                                
##  Mode  :character                                                                
##  Upper.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
##  Length:4706                                                                     
##  Class :character                                                                
##  Mode  :character                                                                
##  Number.of.Patients...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
##  Length:4706                                                             
##  Class :character                                                        
##  Mode  :character                                                        
##  Footnote...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
##  Length:4706                                                   
##  Class :character                                              
##  Mode  :character                                              
##  Hospital.30.Day.Readmission.Rates.from.Heart.Failure
##  Length:4706                                         
##  Class :character                                    
##  Mode  :character                                    
##  Comparison.to.U.S..Rate...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
##  Length:4706                                                                   
##  Class :character                                                              
##  Mode  :character                                                              
##  Lower.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
##  Length:4706                                                                      
##  Class :character                                                                 
##  Mode  :character                                                                 
##  Upper.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
##  Length:4706                                                                      
##  Class :character                                                                 
##  Mode  :character                                                                 
##  Number.of.Patients...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
##  Length:4706                                                              
##  Class :character                                                         
##  Mode  :character                                                         
##  Footnote...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
##  Length:4706                                                    
##  Class :character                                               
##  Mode  :character                                               
##  Hospital.30.Day.Readmission.Rates.from.Pneumonia
##  Length:4706                                     
##  Class :character                                
##  Mode  :character                                
##  Comparison.to.U.S..Rate...Hospital.30.Day.Readmission.Rates.from.Pneumonia
##  Length:4706                                                               
##  Class :character                                                          
##  Mode  :character                                                          
##  Lower.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Pneumonia
##  Length:4706                                                                  
##  Class :character                                                             
##  Mode  :character                                                             
##  Upper.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Pneumonia
##  Length:4706                                                                  
##  Class :character                                                             
##  Mode  :character                                                             
##  Number.of.Patients...Hospital.30.Day.Readmission.Rates.from.Pneumonia
##  Length:4706                                                          
##  Class :character                                                     
##  Mode  :character                                                     
##  Footnote...Hospital.30.Day.Readmission.Rates.from.Pneumonia
##  Length:4706                                                
##  Class :character                                           
##  Mode  :character

In this assignment, we’ll be primarily focussing on deaths due to heart attacks,heart failures and neumonia. To get a better understanding of the data, we’ll plot their histograms.

par(mfrow=c(1,3))                                           #plotting respective histograms side-by-side
hist(as.numeric(data[,11]),xlab ="",main = "30-day mortality rates for heart attack",col = "red")
## Warning in hist(as.numeric(data[, 11]), xlab = "", main = "30-day mortality
## rates for heart attack", : NAs introduced by coercion
hist(as.numeric(data[,17]),xlab ="",main = "30-day mortality rates for heart failure",col="salmon" )
## Warning in hist(as.numeric(data[, 17]), xlab = "", main = "30-day mortality
## rates for heart failure", : NAs introduced by coercion
hist(as.numeric(data[,23]),xlab ="",main = "30-day mortality rates for neumonia",col="yellow" )
## Warning in hist(as.numeric(data[, 23]), xlab = "", main = "30-day mortality
## rates for neumonia", : NAs introduced by coercion

Best Hospital in State

This function takes two arguments:

  • 2-character abbreviated name of a state
  • outcome name

The function will return a character vector with the name of the hospital that has the best (i.e. lowest) 30-day mortality for the specified outcome in that state. The hospital name is the name provided in the Hospital.Name variable. The outcomes can be one of “heart attack”, “heart failure”, or “pneumonia”. Hospitals that do not have data on a particular outcome are excluded from the set of hospitals when deciding the rankings.

Handling ties: If there is a tie for the best hospital for a given outcome, then the hospital names are sorted in alphabetical order and the first hospital in that set is chosen (i.e. if hospitals “b”, “c”, and “f” are tied for best, then hospital “b” should be returned).

The function will check the validity of its arguments. If an invalid state value is passed to best, the function should throw an error via the stop function with the exact message “invalid state”. If an invalid outcome value is passed to best, the function should throw an error via the stop function with the exact message “invalid outcome”.

best<-function(state,outcome){
  
    outcome1 <- read.csv("outcome-of-care-measures.csv",
                         colClasses = "character")
    if(!any(state == outcome1$State)){
      stop("invalid state")}
    else if((outcome %in% c("heart attack", "heart failure",
                            "pneumonia")) == FALSE) {
      stop(print("invalid outcome"))
    }
    outcome2 <- subset(outcome1, State == state)
    if (outcome == "heart attack") {
      colnum <- 11
    }
    else if (outcome == "heart failure") {
      colnum <- 17
    }
    else {
      colnum <- 23
    }
    min_row <- which(as.numeric(outcome2[ ,colnum]) == 
                       min(as.numeric(outcome2[ ,colnum]), na.rm = TRUE))
    hospitals <- outcome2[min_row,2]
    hospitals <- sort(hospitals)
    return(hospitals[1])
  }

Ranking of hospitals in one State

This function that takes three arguments:

  • 2-character abbreviated name of a state (state)
  • outcome (outcome)
  • ranking of a hospital in that state for that outcome (num).

The function will return a character vector with the name of the hospital that has the ranking specified by the num argument. For example, the call rankhospital(“MD”, “heart failure”, 5) would return a character vector containing the name of the hospital with the 5th lowest 30-day death rate for heart failure. The num argument can take values “best”, “worst”, or an integer indicating the ranking (smaller numbers are better). If the number given by num is larger than the number of hospitals in that state, then the function should return NA. Hospitals that do not have data on a particular outcome are excluded from the set of hospitals when deciding the rankings.

Handling ties: It may occur that multiple hospitals have the same 30-day mortality rate for a given cause of death. In those cases ties are broken by using the hospital name.

The function will check the validity of its arguments. If an invalid state value is passed to rankhospital, the function should throw an error via the stop function with the exact message “invalid state”. If an invalid outcome value is passed to rankhospital, the function should throw an error via the stop function with the exact message “invalid outcome”.

rankhospital<- function(state, outcome, num = "best")
{
  outcome1 <- read.csv("outcome-of-care-measures.csv",
                       colClasses = "character")
  if(!any(state == outcome1$State)){
    stop("invalid state")}
  else if((outcome %in% c("heart attack", "heart failure",
                          "pneumonia")) == FALSE) {
    stop(print("invalid outcome"))
  }
  outcome2 <- subset(outcome1, State == state)
  if (outcome == "heart attack") {
    colnum <- 11
  }
  else if (outcome == "heart failure") {
    colnum <- 17
  }
  else {
    colnum <- 23
  }
  outcome2[ ,colnum] <- as.numeric(outcome2[ ,colnum])
  outcome3 <- outcome2[order(outcome2[ ,colnum],outcome2[,2]), ]
  outcome3 <- outcome3[(!is.na(outcome3[ ,colnum])),]
  if(num == "best"){
    num <- 1
  }            
  else if (num == "worst"){
    num <- nrow(outcome3)
  }      
  return(outcome3[num,2])
}

Ranking of Hospitals in all States

This function takes two arguments: * outcome name (outcome) * hospital ranking (num) The function will return a 2-column data frame containing the hospital in each state that has the ranking specified in num. For example the function call rankall(“heart attack”, “best”) would return a data frame containing the names of the hospitals that are the best in their respective states for 30-day heart attack death rates. The function will return a value for every state (some may be NA). The first column in the data frame is named hospital, which contains the hospital name, and the second column is named state, which contains the 2-character abbreviation for the state name. Hospitals that do not have data on a particular outcome should be excluded from the set of hospitals when deciding the rankings.

Handling ties: The rankall function should handle ties in the 30-day mortality rates in the same way that the rankhospital function handles ties.

The function checks the validity of its arguments. If an invalid outcome value is passed to rankall, the function should throw an error via the stop function with the exact message “invalid outcome”. The num variable can take values “best”, “worst”, or an integer indicating the ranking (smaller numbers are better). If the number given by num is larger than the number of hospitals in that state, then the function should return NA.

rankall <- function(outcome, num = "best"){
  
  data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
  fd   <- as.data.frame(cbind(data[, 2],  # hospital
                              data[, 7],  # state
                              data[, 11],  # heart attack
                              data[, 17],  # heart failure
                              data[, 23]), # pneumonia
                        stringsAsFactors = FALSE)
  colnames(fd) <- c("hospital", "state", "heart attack", "heart failure", "pneumonia")
  fd[, eval(outcome)] <- as.numeric(fd[, eval(outcome)])
  
  if (!outcome %in% c("heart attack", "heart failure", "pneumonia")){ 
    stop('invalid outcome')
  } else if (is.numeric(num)) {
    by_state <- with(fd, split(fd, state))
    ordered  <- list()
    for (i in seq_along(by_state)){
      by_state[[i]] <- by_state[[i]][order(by_state[[i]][, eval(outcome)], 
                                           by_state[[i]][, "hospital"]), ]
      ordered[[i]]  <- c(by_state[[i]][num, "hospital"], by_state[[i]][, "state"][1])
    }
    result <- do.call(rbind, ordered)
    output <- as.data.frame(result, row.names = result[, 2], stringsAsFactors = FALSE)
    names(output) <- c("hospital", "state")
  } else if (!is.numeric(num)) {
    if (num == "best") {
      by_state <- with(fd, split(fd, state))
      ordered  <- list()
      for (i in seq_along(by_state)){
        by_state[[i]] <- by_state[[i]][order(by_state[[i]][, eval(outcome)], 
                                             by_state[[i]][, "hospital"]), ]
        ordered[[i]]  <- c(by_state[[i]][1, c("hospital", "state")])
      }
      result <- do.call(rbind, ordered)
      output <- as.data.frame(result, stringsAsFactors = FALSE)
      rownames(output) <- output[, 2]
    } else if (num == "worst") {
      by_state <- with(fd, split(fd, state))
      ordered  <- list()
      for (i in seq_along(by_state)){
        by_state[[i]] <- by_state[[i]][order(by_state[[i]][, eval(outcome)], 
                                             by_state[[i]][, "hospital"], 
                                             decreasing = TRUE), ]
        ordered[[i]]  <- c(by_state[[i]][1, c("hospital", "state")])
      }
      result <- do.call(rbind, ordered)
      output <- as.data.frame(result, stringsAsFactors = FALSE)
      rownames(output) <- output[, 2]
    } else {
      stop('invalid num')
    }
  }
  return(output)
}