Data set Overview

  • Name: Cardiac Surgery and Per cutaneous Coronary Interventions by Hospital

  • Source: New York State Department of Health

  • Background: Data is a cardiac profile system developed to assess hospital and provider performance over time

  • Statistical Methods were used to predict mortality on the basis of low and high amounts of risk factors

  • to determine if the models are reasonably accurate

  • Goal:

  • Apply biological statistical analysis methods to generate further insights

Setup - R code

load in libraries and read in data set

# Load necessary libraries
suppressWarnings({ 
 # Code that generates warning messages 
  library(conflicted)
  library(tidyverse)
    conflict_prefer_all("tidyverse")
  library(httr)
  library(jsonlite)
  library(plotly)
})    

# Read in data set
df = read_csv("Cardiac_Surgery_and_Percutaneous_Coronary_Interventions_by_Hospital___Beginning_2008.csv", show_col_types = FALSE)

Verifying Dataframe Integrity

# Checking that the data was read in correctly
dim(df)      # should be 2812 rows, 14 columns
## [1] 2812   14
head(df)
## # A tibble: 6 × 14
##   `Facility ID` `Hospital Name`        `Detailed Region`       Region  Procedure
##   <chr>         <chr>                  <chr>                   <chr>   <chr>    
## 1 0001          Albany Med. Ctr        Capital District        Capita… All PCI  
## 2 1045          White Plains Hospital  NY Metro - New Rochelle NY Met… Non-Emer…
## 3 1438          Bellevue Hospital Ctr  Manhattan               NY Met… All PCI  
## 4 1439          Beth Israel Med Ctr    Manhattan               NY Met… All PCI  
## 5 1178          Bronx-Lebanon-Cncourse Bronx                   NY Met… All PCI  
## 6 1286          Brookdale Hosp Med Ctr Kings                   NY Met… All PCI  
## # ℹ 9 more variables: `Year of Hospital Discharge` <chr>,
## #   `Number of Cases` <dbl>, `Number of Deaths` <dbl>,
## #   `Observed Mortality Rate` <dbl>, `Expected Mortality Rate` <dbl>,
## #   `Risk-Adjusted Mortality Rate` <dbl>,
## #   `Lower Limit of Confidence Interval` <dbl>,
## #   `Upper Limit of Confidence Interval` <dbl>, `Comparison Results` <chr>

Slide with Ploty Plot

# Write function to create plotly plots
mortality_box_plot = function(df, x_val, y_var){
  df_col = append(x_val, y_var)
  mort_rates = df %>% select(c(df_col))
  for (i in  seq_along(mort_rates)){
    if (i == 1){ next }
    else {
      fig = plot_ly(y = mort_rates$Procedure, x = mort_rates[, i], color = mort_rates$Procedure, type = "box") %>%
        plotly::layout(
          title = as.character(colnames(mort_rates[, i])))
      print(fig)}
  }
} 
# Make list of desired columns
mort_rate_types = c('Observed Mortality Rate', 'Expected Mortality Rate', 'Risk-Adjusted Mortality Rate', 'Lower Limit of Confidence Interval', 'Upper Limit of Confidence Interval')
x_col = c('Procedure') 

# Applying function to create box plots for each mortality rate column
mortality_box_plot(df, x_col, mort_rate_types)

ggplot Slide 1

# Distribution of procedures by region 
ggplot(df, aes(x = "Region", y = df$'Number of Cases', color = Region)) +
  geom_bar(aes(df$Region), stat = "identity") +
  labs(title = "Cases by Region") +
  xlab("Region") +
  ylab("Number of Cases") +
  theme(axis.text.x = element_blank() )

ggplot Slide 2

Math text Slide 1

Math text Slide 2