#load data
outcome <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
head(outcome, 1)
## Provider.Number Hospital.Name Address.1
## 1 010001 SOUTHEAST ALABAMA MEDICAL CENTER 1108 ROSS CLARK CIRCLE
## Address.2 Address.3 City State ZIP.Code County.Name Phone.Number
## 1 DOTHAN AL 36301 HOUSTON 3347938701
## Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## 1 14.3
## Comparison.to.U.S..Rate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## 1 No Different than U.S. National Rate
## Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## 1 12.1
## Upper.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## 1 17.0
## Number.of.Patients...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## 1 666
## Footnote...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack
## 1
## Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## 1 11.4
## Comparison.to.U.S..Rate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## 1 No Different than U.S. National Rate
## Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## 1 9.5
## Upper.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## 1 13.7
## Number.of.Patients...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## 1 741
## Footnote...Hospital.30.Day.Death..Mortality..Rates.from.Heart.Failure
## 1
## Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
## 1 10.9
## Comparison.to.U.S..Rate...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
## 1 No Different than U.S. National Rate
## Lower.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
## 1 8.6
## Upper.Mortality.Estimate...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
## 1 13.7
## Number.of.Patients...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
## 1 371
## Footnote...Hospital.30.Day.Death..Mortality..Rates.from.Pneumonia
## 1
## Hospital.30.Day.Readmission.Rates.from.Heart.Attack
## 1 19.0
## Comparison.to.U.S..Rate...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
## 1 No Different than U.S. National Rate
## Lower.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
## 1 16.6
## Upper.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
## 1 21.7
## Number.of.Patients...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
## 1 728
## Footnote...Hospital.30.Day.Readmission.Rates.from.Heart.Attack
## 1
## Hospital.30.Day.Readmission.Rates.from.Heart.Failure
## 1 23.7
## Comparison.to.U.S..Rate...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
## 1 No Different than U.S. National Rate
## Lower.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
## 1 21.3
## Upper.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
## 1 26.5
## Number.of.Patients...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
## 1 891
## Footnote...Hospital.30.Day.Readmission.Rates.from.Heart.Failure
## 1
## Hospital.30.Day.Readmission.Rates.from.Pneumonia
## 1 17.1
## Comparison.to.U.S..Rate...Hospital.30.Day.Readmission.Rates.from.Pneumonia
## 1 No Different than U.S. National Rate
## Lower.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Pneumonia
## 1 14.4
## Upper.Readmission.Estimate...Hospital.30.Day.Readmission.Rates.from.Pneumonia
## 1 20.4
## Number.of.Patients...Hospital.30.Day.Readmission.Rates.from.Pneumonia
## 1 400
## Footnote...Hospital.30.Day.Readmission.Rates.from.Pneumonia
## 1
names(outcome)[11]#查看第11列的名字
## [1] "Hospital.30.Day.Death..Mortality..Rates.from.Heart.Attack"
head(outcome[, 11])#查看第11列的值,均是字符串
## [1] "14.3" "18.5" "18.1" "Not Available"
## [5] "Not Available" "Not Available"
outcome[, 11] <- as.numeric(outcome[, 11])
## You may get a warning about NAs being introduced; that is okay
hist(outcome[, 11])
library(stringr)
best <- function(state = "TX", outcome_item = "heart attack"){
## Read outcome data
outcome <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
##Check that state and outcome are valid
if (!outcome_item %in% c("heart attack", "heart failure","pneumonia")){
message("invalid outcome")
}else if (!state %in% unique(outcome$State) ){
message("invalid state")
}else{
##Return hospital name in that state with lowest 30-day death
#正则匹配,注意Hospital.Name,中间是逗号,而在EXCEL里面是空格
data <- outcome[outcome$State==state, grep(pattern = paste("Hospital.Name", "State",
str_sub(outcome_item,-5), sep = "|"),
colnames(outcome))] #参考文献1
data <- data[,c(1:3)]
names(data) <- c("Hospital.Name", "State","untreatedData")
mu <- data.frame(Rate = as.numeric(data$u))
data1 <- cbind(data,mu) #添加一列
good <- complete.cases(data1) #去除NA数据
data2 <- data1[good, ]
data2 <- data2[order(data2[, "Rate"], data2[,"Hospital.Name"]), ] #参考文献2
data2$Hospital.Name[1]
}
}
# data <- best("MD", "pneumonia")
best("SC", "heart attack")
## [1] "MUSC MEDICAL CENTER"
best("NY", "pneumonia")
## [1] "MAIMONIDES MEDICAL CENTER"
best("AK", "pneumonia")
## [1] "YUKON KUSKOKWIM DELTA REG HOSPITAL"
rankhospital <- function(state, outcome_item, num = "best") {
## Read outcome data
outcome <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
##Check that state and outcome are valid
if (!outcome_item %in% c("heart attack", "heart failure","pneumonia")){
message("invalid outcome")
}else if (!state %in% unique(outcome$State) ){
message("invalid state")
}else{
##Return hospital name in that state with lowest 30-day death
#正则匹配,注意Hospital.Name,中间是逗号,而在EXCEL里面是空格
data <- outcome[outcome$State == state, grep(pattern = paste("Hospital.Name", "^State$",
str_sub(outcome_item,-5), sep = "|"),
colnames(outcome))] #参考文献1
data <- data[,c(1:3)]
names(data) <- c("Hospital.Name", "State","untreatedData")
mu <- data.frame(Rate = as.numeric(data$u))
data1 <- cbind(data,mu) #添加一列
good <- complete.cases(data1) #去除NA数据
data2 <- data1[good, ]
data2 <- data2[order(data2[, "Rate"], data2[,"Hospital.Name"]), ] #参考文献2
data2$rank <- c(1:nrow(data2))
data2 <- data2[, c(1, 2, 4, 5)]
if (num == "best"){
return(data2$Hospital.Name[1])
}else if (num == "worst"){
return(data2$Hospital.Name[nrow(data2)])
}else{
return(data2$Hospital.Name[num])
}
}
}
# rankhospital("MN", "heart attack", 5000)
rankhospital("NC", "heart attack", "worst")
## [1] "WAYNE MEMORIAL HOSPITAL"
rankhospital("WA", "heart attack", 7)
## [1] "YAKIMA VALLEY MEMORIAL HOSPITAL"
rankhospital("TX", "pneumonia", 10)
## [1] "SETON SMITHVILLE REGIONAL HOSPITAL"
rankhospital("NY", "heart attack", 7)
## [1] "BELLEVUE HOSPITAL CENTER"
####4 Ranking hospitals in all states
library(stringr)
library(dplyr)
rankall <- function(outcome_item, num = "best") {
## Read outcome data
outcome <- read.csv("outcome-of-care-measures.csv", colClasses = "character")
## Check that state and outcome are valid
if (!outcome_item %in% c("heart attack", "heart failure","pneumonia")){
message("invalid outcome")
}else{
data <- outcome[, grep(pattern = paste("Hospital.Name", "^State$",
str_sub(outcome_item,-5), sep = "|"),
colnames(outcome))]
data <- data[, c(1:3)]
data[[3]] <- as.numeric(data$Hospital.30) #对列进行处理,在未添加新列的情况下
data <- data[!is.na(data$Hospital.30), ] #去NA
names(data)[3] <- "usefulData"
#对数据进行分组,注意存在相同State值按照字母表顺序
data <- data %>%
group_by(State) %>%
arrange(Hospital.Name) %>% #升序排列
#前者字母表升序,所以在前面,名次
mutate(rank_z = rank(usefulData, ties.method = "first"))
#找出每个州对应outcomer_1最差的医院
worst_data <- data %>%
group_by(State) %>%
arrange(desc(rank_z)) %>% #降序
slice(1) #选择最大排名
# worst_data
if (num =="best"){
data <- data[data$rank_z == 1, ]
}else if (num =="worst"){ #每个state的最大值
data <- worst_data
}else{
data <- data[data$rank_z == num, ]
}
##Return a data frame with the hospital names and the (abbreviated) state name
# 申明一个空的data.frame 包含hospital和state两列
result_by_num <- data.frame("hospital" = vector(mode = "character", length = nrow(data)),
"state" = unique(data$State),
stringsAsFactors = FALSE)
result_by_num
for (i in data$State){
index <- which(data == i, arr.ind = TRUE)[1, "row"] #返回data对应行坐标
index2 <- which(result_by_num == i, arr.ind = TRUE)[1,"row"] #返回result_by_num对应的值
result_by_num[[1]][index2] <- data[[1]][index]
}
result_by_num
#不足之处:没有返回排名大于某些州最大排名,然而并不影响做题
#其实也很简单,就是定义 result_by_num 这个data.frame时用矩阵定义形如data.frame(matrix(NA,3,4))
#赋予初始值,这一块希望读者继续优化吧
}
}
# data <- rankall("heart attack", 20)
r <- rankall("heart attack", 4)
as.character(subset(r, state == "HI")$hospital)
## [1] "CASTLE MEDICAL CENTER"
r <- rankall("heart failure", 10)
as.character(subset(r, state == "NV")$hospital)
## [1] "RENOWN SOUTH MEADOWS MEDICAL CENTER"
r <- rankall("pneumonia", "worst")
# r
as.character(subset(r, state == "NJ")$hospital)
## [1] "BERGEN REGIONAL MEDICAL CENTER"