DATA608Module3

#Libraries required
library(ggplot2)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(shiny)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(rsconnect)

## 
## Attaching package: 'rsconnect'

## The following object is masked from 'package:shiny':
## 
##     serverInfo

#Read the data
df <- read.csv("https://raw.githubusercontent.com/charleyferrari/CUNY_DATA608/master/lecture3/data/cleaned-cdc-mortality-1999-2010-2.csv")

#Display the first few rows in the data
head(df)

##                                 ICD.Chapter State Year Deaths Population
## 1 Certain infectious and parasitic diseases    AL 1999   1092    4430141
## 2 Certain infectious and parasitic diseases    AL 2000   1188    4447100
## 3 Certain infectious and parasitic diseases    AL 2001   1211    4467634
## 4 Certain infectious and parasitic diseases    AL 2002   1215    4480089
## 5 Certain infectious and parasitic diseases    AL 2003   1350    4503491
## 6 Certain infectious and parasitic diseases    AL 2004   1251    4530729
##   Crude.Rate
## 1       24.6
## 2       26.7
## 3       27.1
## 4       27.1
## 5       30.0
## 6       27.6

#Number of colums
ncol(df)

## [1] 6

#Names of columns
colnames(df)

## [1] "ICD.Chapter" "State"       "Year"        "Deaths"      "Population" 
## [6] "Crude.Rate"

There are 6 variables in this dataset(ICD.Chapter, State, Year, Deaths, Population, Crude.Rate) and 9961 observations.

Question 1: As a researcher, you frequently compare mortality rates from particular causes across different States. You need a visualization that will let you see (for 2010 only) the crude mortality rate, across all States, from one cause (for example, Neoplasms, which are effectively cancers). Create a visualization that allows you to rank States by crude mortality for each cause of death.

# Subset for 2010 only
df_2010 <- subset(df, Year==2010)

# Use plotly library to see the Crude rate for 2010 year for Mental and behavioural disorders
Data <- df %>%
  filter(., Year == "2010" & ICD.Chapter == "Mental and behavioural disorders") %>% 
  arrange(order(State), Crude.Rate)                   


head(Data)

##                        ICD.Chapter State Year Deaths Population Crude.Rate
## 1 Mental and behavioural disorders    AK 2010    184     710231       25.9
## 2 Mental and behavioural disorders    AL 2010   2340    4779736       49.0
## 3 Mental and behavioural disorders    AR 2010   1208    2915918       41.4
## 4 Mental and behavioural disorders    AZ 2010   1989    6392017       31.1
## 5 Mental and behavioural disorders    CA 2010   7851   37253956       21.1
## 6 Mental and behavioural disorders    CO 2010   1821    5029196       36.2

MBD2 <- df_2010 %>%
  plot_ly(x = ~df_2010$Crude.Rate, y = df_2010$State, type="bar", orientation="h") %>%
  layout(
          title= list(text=paste0("Mental and behavioural disorders","\ncause-based 2010 State Crude Mortality Rates"),font=list(size = 10)),
    xaxis=list(title="Crude Rate"),
    yaxis=list(title="States",
    categoryorder = "array",
    categoryarray = (~State)))

subplot(MBD2)

# Get the unique causes of death for 2010
Total_causes1 <- unique(df_2010$ICD.Chapter)
length(Total_causes1)

## [1] 17

There are 17 diseases (cause of death) in the dataset

States <- unique(df_2010$State)
length(States)

## [1] 51

There are 51 state in the dataset

# Create server logic
ui1 <- fluidPage(
  titlePanel("CDC Mortality Rates for 2010"),
  fluidRow(selectInput("cause", "Cause of Death:", choices=sort(Total_causes1))), 
  plotOutput("plot1", height = 800)
)

# Create user interface definition
server1 <- function(input, output) {
  showrates <- reactive({showrates <- subset(df_2010, ICD.Chapter==input$cause)})
  output$plot1 <- renderPlot({
    ggplot(showrates(), aes(x=Crude.Rate, y=reorder(State, -Crude.Rate)))+
      scale_x_continuous(limits=c(0, max(showrates()$Crude.Rate))+5, expand = c(0,0))+
      geom_segment(aes(yend=State), xend=0, color="red")+
      geom_point(size=3, color = "blue") +
      theme_bw()+
      theme(panel.grid.major.y = element_blank(), axis.title=element_text(size=14))+
      xlab("CDC Mortality Rate for 2010 year") +
      ylab("State") +
      ggtitle(input$cause)
  }) 
}

# Run the application 
shinyApp(ui = ui1, server = server1)

Shiny applications not supported in static R Markdown documents

ShinyApp Link: https://ivanit.shinyapps.io/DATA_608_Module3/?_ga=2.227650525.1145081870.1677022315-303584501.1677022315 http://127.0.0.1:5039/

Question 2: Often you are asked whether particular States are improving their mortality rates (per cause) faster than, or slower than, the national average. Create a visualization that lets your clients see this for themselves for one cause of death at the time. Keep in mind that the national average should be weighted by the national population

# Create server logic
ui <- fluidPage(
    titlePanel("Crude Mortality Rate Across All States Vs National Average"), 
    sidebarPanel(
        selectInput("select1", label = strong("State"), 
                    choices = levels(as.factor(df$State)), 
                    selected = 1),
    
        selectInput("select2", label = strong("Cause of Death"), 
                    choices = levels(as.factor(df$ICD.Chapter)), 
                    selected = 1),width = "auto"),
        
        mainPanel(
            plotOutput("distPlot")
        )
    )

# Create user interface definition

server <- function(input, output) {
    output$distPlot <- renderPlot({
        df %>% 
            group_by(Year, ICD.Chapter) %>%
            mutate(N_Population = sum(Population),
                   N_Count = sum(Deaths), 
                   N_Crude_Rate = 10^5*(N_Count/N_Population)) %>% 
            group_by(Year, ICD.Chapter, State) %>%
            mutate(S_Count=sum(Deaths),
                   S_Crude_Rate=10^5*(S_Count/Population)) %>%
            select(ICD.Chapter, State, Year, N_Crude_Rate, S_Crude_Rate) %>% 
            filter(ICD.Chapter == input$select2, State == input$select1) %>% 
            ggplot() +
            geom_bar(aes(x = Year, weight = S_Crude_Rate)) +
            labs(x = "State", y = "Crude Mortality Rate") + 
            geom_line(aes(x = Year, y = N_Crude_Rate, linetype = "National Average"), col = "red", lwd = 1) +
            scale_linetype(name = NULL) +
            theme_minimal()
        }
    )
    }

shinyApp(ui = ui, server = server)

Shiny applications not supported in static R Markdown documents

ShinyApp Link: https://ivanit.shinyapps.io/DATA608_Module3_Q2/?_ga=2.160147581.1145081870.1677022315-303584501.1677022315 http://127.0.0.1:5039/

DATA608Module3

IvanTikhonov

2023-02-22