Loading required packages
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.2 v dplyr 1.0.6
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(datasets)
Data summary
outcome <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
head(outcome)
## Provider.Number Hospital.Name Address.1
## 1 010001 SOUTHEAST ALABAMA MEDICAL CENTER 1108 ROSS CLARK CIRCLE
## 2 010005 MARSHALL MEDICAL CENTER SOUTH 2505 U S HIGHWAY 431 NORTH
## 3 010006 ELIZA COFFEE MEMORIAL HOSPITAL 205 MARENGO STREET
## 4 010007 MIZELL MEMORIAL HOSPITAL 702 N MAIN ST
## 5 010008 CRENSHAW COMMUNITY HOSPITAL 101 HOSPITAL CIRCLE
## 6 010010 MARSHALL MEDICAL CENTER NORTH 8000 ALABAMA HIGHWAY 69
## Address.2 Address.3 City State ZIP.Code County.Name Phone.Number
## 1 DOTHAN AL 36301 HOUSTON 3347938701
## 2 BOAZ AL 35957 MARSHALL 2565938310
## 3 FLORENCE AL 35631 LAUDERDALE 2567688400
## 4 OPP AL 36467 COVINGTON 3344933541
## 5 LUVERNE AL 36049 CRENSHAW 3343353374
## 6 GUNTERSVILLE AL 35976 MARSHALL 2565718000
## Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## 1 14.3
## 2 18.5
## 3 18.1
## 4 Not Available
## 5 Not Available
## 6 Not Available
## Comparison.to.U.S..Rate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## 1 No Different than U.S. National Rate
## 2 No Different than U.S. National Rate
## 3 No Different than U.S. National Rate
## 4 Number of Cases Too Small
## 5 Number of Cases Too Small
## 6 Number of Cases Too Small
## Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## 1 12.1
## 2 14.7
## 3 14.8
## 4 Not Available
## 5 Not Available
## 6 Not Available
## Upper.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## 1 17.0
## 2 23.0
## 3 21.8
## 4 Not Available
## 5 Not Available
## 6 Not Available
## Number.of.Patients...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## 1 666
## 2 44
## 3 329
## 4 14
## 5 9
## 6 22
## Footnote...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## 1
## 2
## 3
## 4 number of cases is too small (fewer than 25) to reliably tell how well the hospital is performing
## 5 number of cases is too small (fewer than 25) to reliably tell how well the hospital is performing
## 6 number of cases is too small (fewer than 25) to reliably tell how well the hospital is performing
## Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## 1 11.4
## 2 15.2
## 3 11.3
## 4 13.6
## 5 13.8
## 6 12.5
## Comparison.to.U.S..Rate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## 1 No Different than U.S. National Rate
## 2 Worse than U.S. National Rate
## 3 No Different than U.S. National Rate
## 4 No Different than U.S. National Rate
## 5 No Different than U.S. National Rate
## 6 No Different than U.S. National Rate
## Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## 1 9.5
## 2 12.2
## 3 9.1
## 4 10.0
## 5 9.9
## 6 9.9
## Upper.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## 1 13.7
## 2 18.8
## 3 13.9
## 4 18.2
## 5 18.7
## 6 15.6
## Number.of.Patients...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## 1 741
## 2 234
## 3 523
## 4 113
## 5 53
## 6 163
## Footnote...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## 1
## 2
## 3
## 4
## 5
## 6
## Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
## 1 10.9
## 2 13.9
## 3 13.4
## 4 14.9
## 5 15.8
## 6 8.7
## Comparison.to.U.S..Rate...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
## 1 No Different than U.S. National Rate
## 2 No Different than U.S. National Rate
## 3 No Different than U.S. National Rate
## 4 No Different than U.S. National Rate
## 5 No Different than U.S. National Rate
## 6 Better than U.S. National Rate
## Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
## 1 8.6
## 2 11.3
## 3 11.2
## 4 11.6
## 5 11.4
## 6 6.8
## Upper.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
## 1 13.7
## 2 17.0
## 3 15.8
## 4 19.0
## 5 21.5
## 6 11.0
## Number.of.Patients...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
## 1 371
## 2 372
## 3 836
## 4 239
## 5 61
## 6 315
## Footnote...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
## 1
## 2
## 3
## 4
## 5
## 6
## Hospital.30.Day.Readmission.Rates.from.Heart.Attack
## 1 19.0
## 2 Not Available
## 3 17.8
## 4 Not Available
## 5 Not Available
## 6 Not Available
## Comparison.to.U.S..Rate...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
## 1 No Different than U.S. National Rate
## 2 Number of Cases Too Small
## 3 No Different than U.S. National Rate
## 4 Number of Cases Too Small
## 5 Number of Cases Too Small
## 6 Number of Cases Too Small
## Lower.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
## 1 16.6
## 2 Not Available
## 3 14.9
## 4 Not Available
## 5 Not Available
## 6 Not Available
## Upper.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
## 1 21.7
## 2 Not Available
## 3 21.5
## 4 Not Available
## 5 Not Available
## 6 Not Available
## Number.of.Patients...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
## 1 728
## 2 21
## 3 342
## 4 1
## 5 4
## 6 13
## Footnote...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
## 1
## 2 number of cases is too small (fewer than 25) to reliably tell how well the hospital is performing
## 3
## 4 number of cases is too small (fewer than 25) to reliably tell how well the hospital is performing
## 5 number of cases is too small (fewer than 25) to reliably tell how well the hospital is performing
## 6 number of cases is too small (fewer than 25) to reliably tell how well the hospital is performing
## Hospital.30.Day.Readmission.Rates.from.Heart.Failure
## 1 23.7
## 2 22.5
## 3 19.8
## 4 27.1
## 5 24.7
## 6 23.9
## Comparison.to.U.S..Rate...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
## 1 No Different than U.S. National Rate
## 2 No Different than U.S. National Rate
## 3 Better than U.S. National Rate
## 4 No Different than U.S. National Rate
## 5 No Different than U.S. National Rate
## 6 No Different than U.S. National Rate
## Lower.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
## 1 21.3
## 2 19.2
## 3 17.2
## 4 22.4
## 5 19.9
## 6 20.1
## Upper.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
## 1 26.5
## 2 26.1
## 3 22.9
## 4 31.9
## 5 30.2
## 6 28.2
## Number.of.Patients...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
## 1 891
## 2 264
## 3 614
## 4 135
## 5 59
## 6 173
## Footnote...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
## 1
## 2
## 3
## 4
## 5
## 6
## Hospital.30.Day.Readmission.Rates.from.Pneumonia
## 1 17.1
## 2 17.6
## 3 16.9
## 4 19.4
## 5 18.0
## 6 18.7
## Comparison.to.U.S..Rate...Hospital.30.Day.Readmission.Rates.from.Pneumonia
## 1 No Different than U.S. National Rate
## 2 No Different than U.S. National Rate
## 3 No Different than U.S. National Rate
## 4 No Different than U.S. National Rate
## 5 No Different than U.S. National Rate
## 6 No Different than U.S. National Rate
## Lower.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Pneumonia
## 1 14.4
## 2 15.0
## 3 14.7
## 4 15.9
## 5 14.0
## 6 15.7
## Upper.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Pneumonia
## 1 20.4
## 2 20.6
## 3 19.5
## 4 23.2
## 5 22.8
## 6 22.2
## Number.of.Patients...Hospital.30.Day.Readmission.Rates.from.Pneumonia
## 1 400
## 2 374
## 3 842
## 4 254
## 5 56
## 6 326
## Footnote...Hospital.30.Day.Readmission.Rates.from.Pneumonia
## 1
## 2
## 3
## 4
## 5
## 6
tibble(outcome)
## # A tibble: 4,706 x 46
## Provider.Number Hospital.Name Address.1 Address.2 Address.3 City State
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 010001 SOUTHEAST ALABA~ 1108 ROSS C~ "" "" DOTH~ AL
## 2 010005 MARSHALL MEDICA~ 2505 U S HI~ "" "" BOAZ AL
## 3 010006 ELIZA COFFEE ME~ 205 MARENGO~ "" "" FLOR~ AL
## 4 010007 MIZELL MEMORIAL~ 702 N MAIN ~ "" "" OPP AL
## 5 010008 CRENSHAW COMMUN~ 101 HOSPITA~ "" "" LUVE~ AL
## 6 010010 MARSHALL MEDICA~ 8000 ALABAM~ "" "" GUNT~ AL
## 7 010011 ST VINCENT'S EA~ 50 MEDICAL ~ "" "" BIRM~ AL
## 8 010012 DEKALB REGIONAL~ 200 MED CEN~ "" "" FORT~ AL
## 9 010016 SHELBY BAPTIST ~ 1000 FIRST ~ "" "" ALAB~ AL
## 10 010018 CALLAHAN EYE FO~ 1720 UNIVER~ "" "" BIRM~ AL
## # ... with 4,696 more rows, and 39 more variables: ZIP.Code <chr>,
## # County.Name <chr>, Phone.Number <chr>,
## # Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack <chr>,
## # Comparison.to.U.S..Rate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack <chr>,
## # Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack <chr>,
## # Upper.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack <chr>,
## # Number.of.Patients...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack <chr>,
## # Footnote...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack <chr>,
## # Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure <chr>,
## # Comparison.to.U.S..Rate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure <chr>,
## # Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure <chr>,
## # Upper.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure <chr>,
## # Number.of.Patients...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure <chr>,
## # Footnote...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure <chr>,
## # Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia <chr>,
## # Comparison.to.U.S..Rate...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia <chr>,
## # Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia <chr>,
## # Upper.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia <chr>,
## # Number.of.Patients...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia <chr>,
## # Footnote...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia <chr>,
## # Hospital.30.Day.Readmission.Rates.from.Heart.Attack <chr>,
## # Comparison.to.U.S..Rate...Hospital.30.Day.Readmission.Rates.from.Heart.Attack <chr>,
## # Lower.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Attack <chr>,
## # Upper.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Attack <chr>,
## # Number.of.Patients...Hospital.30.Day.Readmission.Rates.from.Heart.Attack <chr>,
## # Footnote...Hospital.30.Day.Readmission.Rates.from.Heart.Attack <chr>,
## # Hospital.30.Day.Readmission.Rates.from.Heart.Failure <chr>,
## # Comparison.to.U.S..Rate...Hospital.30.Day.Readmission.Rates.from.Heart.Failure <chr>,
## # Lower.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Failure <chr>,
## # Upper.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Failure <chr>,
## # Number.of.Patients...Hospital.30.Day.Readmission.Rates.from.Heart.Failure <chr>,
## # Footnote...Hospital.30.Day.Readmission.Rates.from.Heart.Failure <chr>,
## # Hospital.30.Day.Readmission.Rates.from.Pneumonia <chr>,
## # Comparison.to.U.S..Rate...Hospital.30.Day.Readmission.Rates.from.Pneumonia <chr>,
## # Lower.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Pneumonia <chr>,
## # Upper.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Pneumonia <chr>,
## # Number.of.Patients...Hospital.30.Day.Readmission.Rates.from.Pneumonia <chr>,
## # Footnote...Hospital.30.Day.Readmission.Rates.from.Pneumonia <chr>
str(outcome[,c(2,7,11,17,19,23)])
## 'data.frame': 4706 obs. of 6 variables:
## $ Hospital.Name : chr "SOUTHEAST ALABAMA MEDICAL CENTER" "MARSHALL MEDICAL CENTER SOUTH" "ELIZA COFFEE MEMORIAL HOSPITAL" "MIZELL MEMORIAL HOSPITAL" ...
## $ State : chr "AL" "AL" "AL" "AL" ...
## $ Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack : chr "14.3" "18.5" "18.1" "Not Available" ...
## $ Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure : chr "11.4" "15.2" "11.3" "13.6" ...
## $ Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure: chr "9.5" "12.2" "9.1" "10.0" ...
## $ Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia : chr "10.9" "13.9" "13.4" "14.9" ...
summary(outcome[,c(2,7, 11,17,19,23)])
## Hospital.Name State
## Length:4706 Length:4706
## Class :character Class :character
## Mode :character Mode :character
## Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## Length:4706
## Class :character
## Mode :character
## Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## Length:4706
## Class :character
## Mode :character
## Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## Length:4706
## Class :character
## Mode :character
## Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
## Length:4706
## Class :character
## Mode :character
30-day mortality rates plot for heart attack
outcome[, 11] <- as.numeric(outcome[, 11])
## Warning: NAs introduced by coercion
hist(outcome[, 11], col = "aquamarine4")
Finding the best hospital in the state
best <- function(state, outcome) {
## Validate the outcome string
outcomes = c("heart attack", "heart failure", "pneumonia")
if( outcome %in% outcomes == FALSE ) stop("invalid outcome")
## Read outcome data
data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
## Filter and simplify the column names
data <- data[c(2, 7, 11, 17, 23)]
names(data)[1] <- "name"
names(data)[2] <- "state"
names(data)[3] <- "heart attack"
names(data)[4] <- "heart failure"
names(data)[5] <- "pneumonia"
## Validate the state string
states <- data[, 2]
states <- unique(states)
if( state %in% states == FALSE ) stop("invalid state")
## Grab only rows with our state value
data <- data[data$state==state & data[outcome] != 'Not Available', ]
vals <- data[, outcome]
rowNum <- which.min(vals)
## Return hospital name in that state with lowest 30-day death rate
data[rowNum, ]$name
}
best("SC", "heart attack")
## [1] "MUSC MEDICAL CENTER"
best("NY", "pneumonia")
## [1] "MAIMONIDES MEDICAL CENTER"
best("AK", "pneumonia")
## [1] "YUKON KUSKOKWIM DELTA REG HOSPITAL"
rankhospital <- function(state, outcome, num) {
## Read outcome data
data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
data <- data[c(2, 7, 11, 17, 23)]
names(data)[1] <- "name"
names(data)[2] <- "state"
names(data)[3] <- "heart attack"
names(data)[4] <- "heart failure"
names(data)[5] <- "pneumonia"
## Validate the outcome string
outcomes = c("heart attack", "heart failure", "pneumonia")
if( outcome %in% outcomes == FALSE ) stop("invalid outcome")
## Validate the state string
states <- data[, 2]
states <- unique(states)
if( state %in% states == FALSE ) stop("invalid state")
## Validate the num value
if( num != "best" && num != "worst" && num%%1 != 0 ) stop("invalid num")
## Grab only rows with our state value
data <- data[data$state==state & data[outcome] != 'Not Available', ]
## Order the data
data[outcome] <- as.data.frame(sapply(data[outcome], as.numeric))
data <- data[order(data$name, decreasing = FALSE), ]
data <- data[order(data[outcome], decreasing = FALSE), ]
## Process the num argument
vals <- data[, outcome]
if( num == "best" ) {
rowNum <- which.min(vals)
} else if( num == "worst" ) {
rowNum <- which.max(vals)
} else {
rowNum <- num
}
## Return hospital name in that state with lowest 30-day death rate
data[rowNum, ]$name
}
rankhospital("NC", "heart attack", "worst")
## [1] "WAYNE MEMORIAL HOSPITAL"
rankhospital("WA", "heart attack", 7)
## [1] "YAKIMA VALLEY MEMORIAL HOSPITAL"
rankhospital("TX", "pneumonia", 10)
## [1] "SETON SMITHVILLE REGIONAL HOSPITAL"
rankhospital("NY", "heart attack", 7)
## [1] "BELLEVUE HOSPITAL CENTER"
rankall <- function(outcome, num = "best") {
## Read outcome data
data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
data <- data[c(2, 7, 11, 17, 23)]
names(data)[1] <- "name"
names(data)[2] <- "state"
names(data)[3] <- "heart attack"
names(data)[4] <- "heart failure"
names(data)[5] <- "pneumonia"
## Validate the outcome string
outcomes = c("heart attack", "heart failure", "pneumonia")
if( outcome %in% outcomes == FALSE ) stop("invalid outcome")
## Validate the num value
if( num != "best" && num != "worst" && num%%1 != 0 ) stop("invalid num")
## Grab only rows with data in our outcome
data <- data[data[outcome] != 'Not Available', ]
## Order the data
data[outcome] <- as.data.frame(sapply(data[outcome], as.numeric))
data <- data[order(data$name, decreasing = FALSE), ]
data <- data[order(data[outcome], decreasing = FALSE), ]
## Helper functiont to process the num argument
getHospByRank <- function(df, s, n) {
df <- df[df$state==s, ]
vals <- df[, outcome]
if( n == "best" ) {
rowNum <- which.min(vals)
} else if( n == "worst" ) {
rowNum <- which.max(vals)
} else {
rowNum <- n
}
df[rowNum, ]$name
}
## For each state, find the hospital of the given rank
states <- data[, 2]
states <- unique(states)
newdata <- data.frame("hospital"=character(), "state"=character())
for(st in states) {
hosp <- getHospByRank(data, st, num)
newdata <- rbind(newdata, data.frame(hospital=hosp, state=st))
}
## Return a data frame with the hospital names and the (abbreviated) state name
newdata <- newdata[order(newdata['state'], decreasing = FALSE), ]
newdata
}
r <- rankall("pneumonia", "worst")
as.character(subset(r, state == "NJ")$hospital)
## [1] "BERGEN REGIONAL MEDICAL CENTER"
r <- rankall("heart failure", 10)
as.character(subset(r, state == "NV")$hospital)
## [1] "RENOWN SOUTH MEADOWS MEDICAL CENTER"
*End