The data come from the Hospital Compare web site (http://hospitalcompare.hhs.gov) run by the U.S. Department of Health and Human Services. The purpose of the web site is to provide data and information about the quality of care at over 4,000 Medicare-certified hospitals in the U.S. This dataset essentially covers all major U.S. hospitals. This dataset is used for a variety of purposes, including determining whether hospitals should be fined for not providing high quality care to patients (see http://goo.gl/jAXFX for some background on this particular topic).
download.file('https://d396qusza40orc.cloudfront.net/rprog%2Fdata%2FProgAssignment3-data.zip',destfile = 'prog3data.zip')
unzip('prog3data.zip')
A simple histogram of the 30-day death rates from heart attack
outcome <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
outcome[, 11] <- as.numeric(outcome[, 11])
## Warning: NAs introduced by coercion
hist(outcome[, 11],xlab = names(outcome)[11],main = '')
Function called best takes two arguments: the 2-character abbreviated name of a state and an outcome name. The function reads the outcome-of-care-measures.csv file and returns a character vector with the name of the hospital that has the best (i.e. lowest) 30-day mortality for the specified outcome in that state. The outcomes is one of “heart attack”, “heart failure”, or “pneumonia”.
best <- function(state, outcome) {
## Read outcome data
data <- read.csv('outcome-of-care-measures.csv',colClasses = 'character')
## Check that state and outcome are valid
if (!(state %in% unique(data[,'State']))) {
stop('invalid state')
}
outcomes = c('heart attack','heart failure','pneumonia')
if (!(outcome %in% outcomes)) {
stop('invalid outcome')
}
hospitals <- split(data$Hospital.Name,data$State)[[state]]
if (outcome == 'heart attack') {
rate <- split(data[,11],data$State)[[state]]
minval <- min(rate,na.rm = T)
names <- unique(hospitals[rate==minval])
name <- sort(names,na.last = T)[1]
} else if (outcome == 'heart failure') {
rate <- split(data[,17],data$State)[[state]]
minval <- min(rate,na.rm = T)
names <- unique(hospitals[rate==minval])
name <- sort(names,na.last = T)[1]
} else if (outcome == 'pneumonia') {
rate <- split(data[,27],data$State)[[state]]
minval <- min(rate,na.rm = T)
names <- unique(hospitals[rate==minval])
name <- sort(names,na.last = T)[1]
}
## Return hospital name in that state with lowest 30-day death
## rate
return(name)
}
Testing:
best("MD", "pneumonia")
## [1] "PRINCE GEORGES HOSPITAL CENTER"
Function called rankhospital takes three arguments: the 2-character abbreviated name of a state (state), an outcome (outcome), and the ranking of a hospital in that state for that outcome (num). The function reads the outcome-of-care-measures.csv file and returns a character vector with the name of the hospital that has the ranking specified by the num argument. The num argument can take values “best”, “worst”, or an integer indicating the ranking.
rankhospital <- function(state, outcome, num) {
## Read outcome data
options(warn = -1)
data <- read.csv('outcome-of-care-measures.csv',colClasses = 'character')
## Check that state and outcome are valid
if (!(state %in% unique(data[,'State']))) {
stop('invalid state')
}
outcomes = c('heart attack','heart failure','pneumonia')
if (!(outcome %in% outcomes)) {
stop('invalid outcome')
}
hospitals <- split(data$Hospital.Name,data$State)[[state]]
if (outcome == 'heart attack') {
rate <- split(data[,11],data$State)[[state]]
} else if (outcome == 'heart failure') {
rate <- split(data[,17],data$State)[[state]]
} else if (outcome == 'pneumonia') {
rate <- split(data[,23],data$State)[[state]]
}
rate <- as.numeric(rate)
names <- hospitals[order(rate,hospitals)]
## removing NAs
names <- names[!is.na(names)]
if (num == 'best') {
return(names[1])
} else if (num == 'worst') {
names <- hospitals[order(rate,hospitals,decreasing = T)]
return(names[1])
} else if (is.numeric(num)) {
return(names[num])
} else stop('num is not valid')
}
Testing:
rankhospital("MD", "heart attack", "worst")
## [1] "HARFORD MEMORIAL HOSPITAL"
Function called rankall takes two arguments: an outcome name (outcome) and a hospital ranking (num). The function reads the outcome-of-care-measures.csv file and returns a 2-column data frame containing the hospital in each state that has the ranking specified in num.
rankall <- function(outcome, num='best') {
## Read outcome data
options(warn = -1)
data <- read.csv('outcome-of-care-measures.csv',colClasses = 'character')
outcomes = c('heart attack','heart failure','pneumonia')
if (!(outcome %in% outcomes)) {
stop('invalid outcome')
}
if (outcome == 'heart attack') {
data[,11] <- as.numeric(data[,11])
data_state <- split(data[,c(2,11,7)],data$State)
} else if (outcome == 'heart failure') {
data[,17] <- as.numeric(data[,17])
data_state <- split(data[,c(2,17,7)],data$State)
} else if (outcome == 'pneumonia') {
data[,23] <- as.numeric(data[,23])
data_state <- split(data[,c(2,23,7)],data$State)
}
data_ordered <- lapply(data_state, function(x) x[order(x[,2],x[,1]),c(1,3)])
## removing NAs
## names <- names[!is.na(names)]
if (num == 'best') {
vec<-sapply(data_ordered,function(x) x[1,1])
return(data.frame(hospital=vec,state=names(vec)))
} else if (num == 'worst') {
data_ordered <- lapply(data_state, function(x) x[order(x[,2],x[,1],decreasing = T),c(1,3)])
vec<-sapply(data_ordered,function(x) x[1,1])
return(data.frame(hospital=vec,state=names(vec)))
} else if (is.numeric(num)) {
vec<-sapply(data_ordered,function(x) x[num,1])
return(data.frame(hospital=vec,state=names(vec)))
} else stop('num is not valid')
}
Testing:
head(rankall("heart attack", 20), 10)
## hospital state
## AK <NA> AK
## AL D W MCMILLAN MEMORIAL HOSPITAL AL
## AR ARKANSAS METHODIST MEDICAL CENTER AR
## AZ JOHN C LINCOLN DEER VALLEY HOSPITAL AZ
## CA SHERMAN OAKS HOSPITAL CA
## CO SKY RIDGE MEDICAL CENTER CO
## CT MIDSTATE MEDICAL CENTER CT
## DC <NA> DC
## DE <NA> DE
## FL SOUTH FLORIDA BAPTIST HOSPITAL FL