# Coursera - Johns Hopkins Data Science Specialization
# Course 2 - R Programming - Week 4 - Assignment 3 
# https://www.coursera.org/learn/r-programming/supplement/w1c7p/programming-assignment-3-instructions-hospital-quality

# CHANGE WORKING DIRECTORY ACCORDINGLY:
# PLACE specdata FOLDER IN WORKING DIRECTORY

# PART 4

# Write a function called rankall that takes two arguments: an outcome name (outcome) and a hospital ranking (num). The function reads the outcome-of-care-measures.csv file and returns a 2-column data frame
# containing the hospital in each state that has the ranking specified in num.

# Load the dplyr package to use the filter function later on

library(dplyr)
## 
## Attachement du package : 'dplyr'
## Les objets suivants sont masqués depuis 'package:stats':
## 
##     filter, lag
## Les objets suivants sont masqués depuis 'package:base':
## 
##     intersect, setdiff, setequal, union
# Acquire data from csv file

data <- read.csv("outcome-of-care-measures.csv", colClasses = "character")

# Select the necessary fields (variables/columns) in the following order:
# hospital name, state, 30-day mortality rate from heart attack, heart failure and pneumonia

data_new <- data[, c(2, 7, 11, 17, 23)]

# Note: Now, 2nd col becomes 1st, 7th 2nd, 11th 3rd, 17th 4th, 23rd 5th


# Create the function "rankall" 

rankall <- function(outcome, num = "best") {
  
  # Select the desired outcome
  
  if (outcome == "heart attack") {
    data_updated <- data_new[, 1:3]
  } else if (outcome == "heart failure") {
    data_updated <- data_new[, c(1, 2, 4)]
  } else if (outcome == "pneumonia") {
    data_updated <- data_new[, c(1, 2, 5)]
  } else {
    stop("invalid outcome")
  }
  
  
  # Convert the outcome column to numeric type
  
  data_updated[, 3] <- suppressWarnings(as.numeric(data_updated[, 3]))
  
  # Delete NAs from the outcome column
  
  non_na_indices <- which(!is.na(data_updated[, 3]))
  data_complete <- data_updated[non_na_indices, ]
  
  # Order the data frame in ascending order by state, outcome then hospital name column
  
  data_ordered <- data_complete[with(data_complete, order(data_complete[, 2], data_complete[, 3], data_complete[, 1])), ]
  
  # Create a list of data frames, each one corresponding to a specif state
  
  data_by_state <- vector(mode = "list")
  
  unique_states <- unique(data_ordered[, 2])
  
  for (s in 1:length(unique_states)) {
    data_by_state[[s]] <- filter(data_ordered, data_ordered[, 2] == unique_states[s])
  }
  
  # Create a frame containing one hospital per state according to the desired rank ("best", "worst" or numerical value)
  
  ranking <- data.frame(nrow = length(unique_states), ncol = 2)
  colnames(ranking) <- c("hospital", "state")
  
  if (num == "best") {
    
    for (s in 1:length(unique_states)) {
      ranking[s, 1] <- data_by_state[[s]][1, 1]
      ranking[s, 2] <- data_by_state[[s]][1, 2] 
    }
    
  } else if (num == "worst") {
    
    for (s in 1:length(unique_states)) {
      ranking[s, 1] <- data_by_state[[s]][nrow(data_by_state[[s]]), 1]
      ranking[s, 2] <- data_by_state[[s]][nrow(data_by_state[[s]]), 2] 
    } 
    
  } else {
    
    for (s in 1:length(unique_states)) {

      if (nrow(data_by_state[[s]]) >= num) {

        ranking[s, 1] <- data_by_state[[s]][num, 1]
        ranking[s, 2] <- data_by_state[[s]][num, 2]

      } else {

        ranking[s, 1] <- "<NA>"
        ranking[s, 2] <- data_by_state[[s]][1, 2]
      }
    }
    
  }
  
  # Output the result: ranking data frame
    
  ranking
  
}