Introduction

The data come from the Hospital Compare web site (http://hospitalcompare.hhs.gov) run by the U.S. Department of Health and Human Services. The purpose of the web site is to provide data and information about the quality of care at over 4,000 Medicare-certified hospitals in the U.S. This dataset essentially covers all major U.S. hospitals. This dataset is used for a variety of purposes, including determining whether hospitals should be fined for not providing high quality care to patients (see http://goo.gl/jAXFX for some background on this particular topic).

Getting data

download.file('https://d396qusza40orc.cloudfront.net/rprog%2Fdata%2FProgAssignment3-data.zip',destfile = 'prog3data.zip')
unzip('prog3data.zip')

1 30-day mortality rates for heart attack

A simple histogram of the 30-day death rates from heart attack

outcome <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
outcome[, 11] <- as.numeric(outcome[, 11])
## Warning: NAs introduced by coercion
hist(outcome[, 11],xlab = names(outcome)[11],main = '')

2 The best hospital in a state

Function called best takes two arguments: the 2-character abbreviated name of a state and an outcome name. The function reads the outcome-of-care-measures.csv file and returns a character vector with the name of the hospital that has the best (i.e. lowest) 30-day mortality for the specified outcome in that state. The outcomes is one of “heart attack”, “heart failure”, or “pneumonia”.

best <- function(state, outcome) {
    ## Read outcome data
    data <- read.csv('outcome-of-care-measures.csv',colClasses = 'character')
    ## Check that state and outcome are valid
    if (!(state %in% unique(data[,'State']))) {
        stop('invalid state')
    }
    outcomes = c('heart attack','heart failure','pneumonia')
    if (!(outcome %in% outcomes)) {
        stop('invalid outcome')
    }
    
    hospitals <- split(data$Hospital.Name,data$State)[[state]]
    if (outcome == 'heart attack') {
        rate <- split(data[,11],data$State)[[state]]
        minval <- min(rate,na.rm = T)
        names <- unique(hospitals[rate==minval])
        name <- sort(names,na.last = T)[1]
    } else if (outcome == 'heart failure') {
        rate <- split(data[,17],data$State)[[state]]
        minval <- min(rate,na.rm = T)
        names <- unique(hospitals[rate==minval])
        name <- sort(names,na.last = T)[1]
    } else if (outcome == 'pneumonia') {
        rate <- split(data[,27],data$State)[[state]]
        minval <- min(rate,na.rm = T)
        names <- unique(hospitals[rate==minval])
        name <- sort(names,na.last = T)[1]   
    }
    
    
    ## Return hospital name in that state with lowest 30-day death
    ## rate
    return(name)
}

Testing:

best("MD", "pneumonia")
## [1] "PRINCE GEORGES HOSPITAL CENTER"

3 Ranking hospitals by outcome in a state

Function called rankhospital takes three arguments: the 2-character abbreviated name of a state (state), an outcome (outcome), and the ranking of a hospital in that state for that outcome (num). The function reads the outcome-of-care-measures.csv file and returns a character vector with the name of the hospital that has the ranking specified by the num argument. The num argument can take values “best”, “worst”, or an integer indicating the ranking.

rankhospital <- function(state, outcome, num) {
    ## Read outcome data
    options(warn = -1)
    data <- read.csv('outcome-of-care-measures.csv',colClasses = 'character')
    ## Check that state and outcome are valid
    if (!(state %in% unique(data[,'State']))) {
        stop('invalid state')
    }
    outcomes = c('heart attack','heart failure','pneumonia')
    if (!(outcome %in% outcomes)) {
        stop('invalid outcome')
    }
    
    hospitals <- split(data$Hospital.Name,data$State)[[state]]
    
    if (outcome == 'heart attack') {
        rate <- split(data[,11],data$State)[[state]]
    } else if (outcome == 'heart failure') {
        rate <- split(data[,17],data$State)[[state]]
    } else if (outcome == 'pneumonia') {
        rate <- split(data[,23],data$State)[[state]]
    }
    
    rate <- as.numeric(rate)
    names <- hospitals[order(rate,hospitals)]
    ## removing NAs
    names <- names[!is.na(names)]
    
 
    
    if (num == 'best') {
        return(names[1])
    } else if (num == 'worst') {
        names <- hospitals[order(rate,hospitals,decreasing = T)]
        return(names[1])
    } else if (is.numeric(num)) {
        return(names[num])
    } else stop('num is not valid')

}

Testing:

rankhospital("MD", "heart attack", "worst")
## [1] "HARFORD MEMORIAL HOSPITAL"

4 Ranking hospitals in all states

Function called rankall takes two arguments: an outcome name (outcome) and a hospital ranking (num). The function reads the outcome-of-care-measures.csv file and returns a 2-column data frame containing the hospital in each state that has the ranking specified in num.

rankall <- function(outcome, num='best') {
    ## Read outcome data
    options(warn = -1)
    data <- read.csv('outcome-of-care-measures.csv',colClasses = 'character')
    
    outcomes = c('heart attack','heart failure','pneumonia')
    if (!(outcome %in% outcomes)) {
        stop('invalid outcome')
    }
    
    
    if (outcome == 'heart attack') {
        data[,11] <- as.numeric(data[,11])
        data_state <- split(data[,c(2,11,7)],data$State)
    } else if (outcome == 'heart failure') {
        data[,17] <- as.numeric(data[,17])
        data_state <- split(data[,c(2,17,7)],data$State)
    } else if (outcome == 'pneumonia') {
        data[,23] <- as.numeric(data[,23])
        data_state <- split(data[,c(2,23,7)],data$State)
    }

    data_ordered <- lapply(data_state, function(x) x[order(x[,2],x[,1]),c(1,3)])

    ## removing NAs
    ## names <- names[!is.na(names)]
    
    
    
    if (num == 'best') {
        vec<-sapply(data_ordered,function(x) x[1,1])
        return(data.frame(hospital=vec,state=names(vec)))
    } else if (num == 'worst') {
        data_ordered <- lapply(data_state, function(x) x[order(x[,2],x[,1],decreasing = T),c(1,3)])
        vec<-sapply(data_ordered,function(x) x[1,1])
        return(data.frame(hospital=vec,state=names(vec)))
    } else if (is.numeric(num)) {
        vec<-sapply(data_ordered,function(x) x[num,1])
        return(data.frame(hospital=vec,state=names(vec)))
    } else stop('num is not valid')
}

Testing:

head(rankall("heart attack", 20), 10)
##                               hospital state
## AK                                <NA>    AK
## AL      D W MCMILLAN MEMORIAL HOSPITAL    AL
## AR   ARKANSAS METHODIST MEDICAL CENTER    AR
## AZ JOHN C LINCOLN DEER VALLEY HOSPITAL    AZ
## CA               SHERMAN OAKS HOSPITAL    CA
## CO            SKY RIDGE MEDICAL CENTER    CO
## CT             MIDSTATE MEDICAL CENTER    CT
## DC                                <NA>    DC
## DE                                <NA>    DE
## FL      SOUTH FLORIDA BAPTIST HOSPITAL    FL