OBJETIVE
Report quality of care at more than 4,000 Medicare-certified hospitals in the US This data set essentially covers all major hospitals in the United States.
DESCRIPTION
To make the report we will use three data files:
• Care outcome measures.csv: Contains information on 30-day mortality and readmission rates for heart attacks, heart failure, and pneumonia for more than 4,000 hospitals.
• hospital-data.csv: Contains information about each hospital.
Hospital_Revised_Flatfiles.pdf - description of the variables used in the *.csv files.
TASKS
# we will use the RStudio IDE
#setwd(“create a work folder”)
if ( !file.exists("rprog_data_ProgAssignment3-data.zip")) {
url.file<-"https://d396qusza40orc.cloudfront.net/rprog%2Fdata%2FProgAssignment3-data.zip"
file.zip<-"rprog_data_ProgAssignment3-data.zip"
download.file(url.file, file.zip)
unzip(file.zip)
}
list.files()
## [1] "best.R"
## [2] "hospital-data.csv"
## [3] "Hospital_Revised_Flatfiles.pdf"
## [4] "magz.png"
## [5] "outcome-of-care-measures.csv"
## [6] "rankall.R"
## [7] "rankhospital.R"
## [8] "README.md"
## [9] "rprog_data_ProgAssignment3-data.zip"
## [10] "r_programming_01_c.html"
## [11] "r_programming_01_c.Rmd"
## [12] "rsconnect"
# load the contents of the file into the variable file.temp.
data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
#column numbers
ncol(data)
## [1] 46
#numbers of rows
nrow(data)
## [1] 4706
#we show the columns that we are going to use
head(data[,c(2,7,11)])
## Hospital.Name State
## 1 SOUTHEAST ALABAMA MEDICAL CENTER AL
## 2 MARSHALL MEDICAL CENTER SOUTH AL
## 3 ELIZA COFFEE MEMORIAL HOSPITAL AL
## 4 MIZELL MEMORIAL HOSPITAL AL
## 5 CRENSHAW COMMUNITY HOSPITAL AL
## 6 MARSHALL MEDICAL CENTER NORTH AL
## Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## 1 14.3
## 2 18.5
## 3 18.1
## 4 Not Available
## 5 Not Available
## 6 Not Available
3.1. Plot the 30-day mortality rates for heart attack
The following code, gives a response from the best hospital to attend a heart attack.
rankall <- function() {
## load state
validState = sort(unique(data[,7]))
data_column <- 11 ## Column name = Hospital 30-Day Death (Mortality) Rates from Heart Attack
hospital<-character(0)
for (i in seq_along(validState)) {
## Return hospital name in that state with the given rank 30-day death rate
newdata <- data[data$State==validState[i],]
# order data by outcome
newdatasorted <- newdata[order(suppressWarnings(as.numeric(newdata[,data_column])),newdata[["Hospital.Name"]],decreasing=FALSE,na.last=NA), ]
# best hospital = 1
hospital[i] <- newdatasorted[1,"Hospital.Name"]
}
## Return a data frame with the hospital names and the (abbreviated) state name
data.frame(hospital=hospital,state=validState,row.names=validState)
}
head(rankall())
## hospital state
## AK PROVIDENCE ALASKA MEDICAL CENTER AK
## AL CRESTWOOD MEDICAL CENTER AL
## AR ARKANSAS HEART HOSPITAL AR
## AZ MAYO CLINIC HOSPITAL AZ
## CA GLENDALE ADVENTIST MEDICAL CENTER CA
## CO ST MARYS HOSPITAL AND MEDICAL CENTER CO
3.2 Finding the best hospital in a state
Write a function called best that take two arguments: the 2-character abbreviated name of a state and an outcome name.
Example, best(“NY”, “heart attack”)
best <- function(state, outcome) {
## read file
data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
## Check outcome
validOutcome = c("heart attack","heart failure","pneumonia")
if (!outcome %in% validOutcome) {
stop("invalid outcome")
}
## Check state - we saved to declare a variable
if (!state %in% data[,7]){
stop("invalid state")
}
if (outcome == "heart attack") {
data_column <- 11 ## Column name = Hospital 30-Day Death (Mortality) Rates from Heart Attack
}
else if (outcome == "heart failure") {
data_column <- 17 ## Column name = Hospital 30-Day Death (Mortality) Rates from Heart Failure
}
newdata <- data[data$State==state,]
idRegistro <- which.min(suppressWarnings(as.double(newdata[,data_column])))
## suppressWarnings
## not print
## Warning message: NAs introduced by coercion
newdata[idRegistro,"Hospital.Name"]
}
best("NY", "heart attack")
## [1] "NYU HOSPITALS CENTER"
3.3 Ranking hospitals by outcome in a state
Write a function called rankhospital that takes three arguments: the 2-character abbreviated name of a state (state), an outcome (outcome), and the ranking of a hospital in that state for that outcome (num).
The function reads the outcome-of-care-measures.csv file and returns a character vector with the name of the hospital that has the ranking specified by the num argument.
For example, the call rankhospital(“MD”, “heart failure”, 5)
rankhospital <- function(state, outcome, num = "best") {
## read file
data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
## Check outcome
validOutcome = c("heart attack","heart failure","pneumonia")
if (!outcome %in% validOutcome) {
stop("invalid outcome")
}
## Check state - we saved to declare a variable
if (!state %in% data[,7]){
stop("invalid state")
}
if (outcome == "heart attack") {
data_column <- 11 ## Column name = Hospital 30-Day Death (Mortality) Rates from Heart Attack
}
else if (outcome == "heart failure") {
data_column <- 17 ## Column name = Hospital 30-Day Death (Mortality) Rates from Heart Failure
}
else {
data_column <- 23 ## Column name = Hospital 30-Day Death (Mortality) Rates from Pneumonia
}
newdata <- data[data$State==state,]
newdatasorted <- newdata[order( suppressWarnings(as.numeric(newdata[,data_column])),newdata["Hospital.Name"],decreasing=FALSE,na.last=NA), ]
## suppressWarnings
## not print
## Warning message: NAs introduced by coercion
if (num=="best"){
num = 1
}
if (num=='worst'){
num = nrow(newdatasorted)
}
newdatasorted[num,"Hospital.Name"]
}
rankhospital("MD", "heart failure", 5)
## [1] "SAINT AGNES HOSPITAL"
Conclusion
We processed the data to find the best hospital (ranking) by state in heart attack treatments.