I have provided you with data about mortality from all 50 states and the District of Columbia.Please access it at https://github.com/charleyferrari/CUNY_DATA608/tree/master/module3/data

You are invited to gather more data from our provider, the CDC WONDER system, at https://wonder.cdc.gov

This assignment must be done in R. It must be done using the ‘shiny’ package.

It is recommended you use an R package that supports interactive graphing such as plotly, or vegalite, but this is not required.

Your apps must be deployed, I won’t be accepting raw files. Luckily, you can pretty easily deploy apps with a free account at shinyapps.io

Question 2: Often you are asked whether particular States are improving their mortality rates (per cause) faster than, or slower than, the national average. Create a visualization that lets your clients see this for themselves for one cause of death at the time. Keep in mind that the national average should be weighted by the national population.

library(tidyverse)  
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.5     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)    
library(tidyr)    
library(tibble)      
library(reshape2)   
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
library(stringr)   

library(plotly)
## Warning: package 'plotly' was built under R version 4.1.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(shiny)
## Warning: package 'shiny' was built under R version 4.1.3
library(rsconnect)
## Warning: package 'rsconnect' was built under R version 4.1.3
## 
## Attaching package: 'rsconnect'
## The following object is masked from 'package:shiny':
## 
##     serverInfo
## Importing the data
df <- read.csv("https://raw.githubusercontent.com/charleyferrari/CUNY_DATA_608/master/module3/data/cleaned-cdc-mortality-1999-2010-2.csv", header= TRUE)

## Exploratory data
head(df)
##                                 ICD.Chapter State Year Deaths Population
## 1 Certain infectious and parasitic diseases    AL 1999   1092    4430141
## 2 Certain infectious and parasitic diseases    AL 2000   1188    4447100
## 3 Certain infectious and parasitic diseases    AL 2001   1211    4467634
## 4 Certain infectious and parasitic diseases    AL 2002   1215    4480089
## 5 Certain infectious and parasitic diseases    AL 2003   1350    4503491
## 6 Certain infectious and parasitic diseases    AL 2004   1251    4530729
##   Crude.Rate
## 1       24.6
## 2       26.7
## 3       27.1
## 4       27.1
## 5       30.0
## 6       27.6
dim(df)
## [1] 9961    6
str(df)
## 'data.frame':    9961 obs. of  6 variables:
##  $ ICD.Chapter: chr  "Certain infectious and parasitic diseases" "Certain infectious and parasitic diseases" "Certain infectious and parasitic diseases" "Certain infectious and parasitic diseases" ...
##  $ State      : chr  "AL" "AL" "AL" "AL" ...
##  $ Year       : int  1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 ...
##  $ Deaths     : int  1092 1188 1211 1215 1350 1251 1303 1312 1241 1385 ...
##  $ Population : int  4430141 4447100 4467634 4480089 4503491 4530729 4569805 4628981 4672840 4718206 ...
##  $ Crude.Rate : num  24.6 26.7 27.1 27.1 30 27.6 28.5 28.3 26.6 29.4 ...
count(distinct(df,ICD.Chapter))
##    n
## 1 19
unique(df$ICD.Chapter)
##  [1] "Certain infectious and parasitic diseases"                                                          
##  [2] "Neoplasms"                                                                                          
##  [3] "Diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism"
##  [4] "Endocrine, nutritional and metabolic diseases"                                                      
##  [5] "Mental and behavioural disorders"                                                                   
##  [6] "Diseases of the nervous system"                                                                     
##  [7] "Diseases of the ear and mastoid process"                                                            
##  [8] "Diseases of the circulatory system"                                                                 
##  [9] "Diseases of the respiratory system"                                                                 
## [10] "Diseases of the digestive system"                                                                   
## [11] "Diseases of the skin and subcutaneous tissue"                                                       
## [12] "Diseases of the musculoskeletal system and connective tissue"                                       
## [13] "Diseases of the genitourinary system"                                                               
## [14] "Pregnancy, childbirth and the puerperium"                                                           
## [15] "Certain conditions originating in the perinatal period"                                             
## [16] "Congenital malformations, deformations and chromosomal abnormalities"                               
## [17] "Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified"            
## [18] "Codes for special purposes"                                                                         
## [19] "External causes of morbidity and mortality"
unique(df$Year)
##  [1] 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010

And lets preview this data:

library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.1.3
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
drate<- df %>%
    group_by(State) %>%
    summarise(avg_crude = mean(Crude.Rate), 
              min_crude = min(Crude.Rate),
              max_crude = max(Crude.Rate)
    )

kable(head(drate,5))  %>%
 kable_styling(bootstrap_options = "striped", font_size = 11)
State avg_crude min_crude max_crude
AK 33.19943 1.5 142.2
AL 62.45846 0.2 396.1
AR 62.59231 0.4 424.6
AZ 47.33918 0.2 287.0
CA 37.84737 0.0 288.1
flag = "Neoplasms"
df1 <- df %>%
  filter(., Year == "2010" & ICD.Chapter == "Neoplasms") %>% 
  #arrange(desc(State,ICD.Chapter), Crude.Rate)
 arrange(Crude.Rate)

kable(head(df1,50))  %>%
 kable_styling(bootstrap_options = "striped", font_size = 11)
ICD.Chapter State Year Deaths Population Crude.Rate
Neoplasms UT 2010 2915 2763885 105.5
Neoplasms AK 2010 910 710231 128.1
Neoplasms CO 2010 7220 5029196 143.6
Neoplasms TX 2010 37607 25145561 149.6
Neoplasms CA 2010 57820 37253956 155.2
Neoplasms GA 2010 15788 9687653 163.0
Neoplasms ID 2010 2599 1567582 165.8
Neoplasms NM 2010 3458 2059179 167.9
Neoplasms NV 2010 4605 2700551 170.5
Neoplasms HI 2010 2346 1360301 172.5
Neoplasms AZ 2010 11129 6392017 174.1
Neoplasms DC 2010 1062 601723 176.5
Neoplasms VA 2010 14425 8001024 180.3
Neoplasms WA 2010 12140 6724540 180.5
Neoplasms MD 2010 10513 5773552 182.1
Neoplasms WY 2010 1053 563626 186.8
Neoplasms MN 2010 9915 5303925 186.9
Neoplasms NY 2010 36237 19378102 187.0
Neoplasms IL 2010 24739 12830632 192.8
Neoplasms KS 2010 5521 2853118 193.5
Neoplasms NE 2010 3535 1826341 193.6
Neoplasms NC 2010 18514 9535483 194.2
Neoplasms ND 2010 1311 672591 194.9
Neoplasms NJ 2010 17317 8791894 197.0
Neoplasms NH 2010 2604 1316470 197.8
Neoplasms CT 2010 7170 3574097 200.6
Neoplasms MT 2010 1987 989415 200.8
Neoplasms MA 2010 13315 6547629 203.4
Neoplasms WI 2010 11644 5686986 204.7
Neoplasms OR 2010 7863 3831074 205.2
Neoplasms LA 2010 9390 4533372 207.1
Neoplasms SC 2010 9577 4625364 207.1
Neoplasms IN 2010 13467 6483802 207.7
Neoplasms SD 2010 1699 814180 208.7
Neoplasms MI 2010 21143 9883640 213.9
Neoplasms OK 2010 8029 3751351 214.0
Neoplasms IA 2010 6541 3046355 214.7
Neoplasms MS 2010 6383 2967297 215.1
Neoplasms MO 2010 12943 5988927 216.1
Neoplasms DE 2010 1956 897934 217.8
Neoplasms AL 2010 10429 4779736 218.2
Neoplasms TN 2010 13851 6346105 218.3
Neoplasms RI 2010 2327 1052567 221.1
Neoplasms OH 2010 25747 11536504 223.2
Neoplasms AR 2010 6616 2915918 226.9
Neoplasms FL 2010 42680 18801310 227.0
Neoplasms VT 2010 1431 625741 228.7
Neoplasms KY 2010 10149 4339367 233.9
Neoplasms PA 2010 29900 12702379 235.4
Neoplasms ME 2010 3343 1328361 251.7
# df2 <- df %>%
#   filter(., Year == "2010" ) %>% 
#   #arrange(desc(State,ICD.Chapter), Crude.Rate)
#  arrange(ICD.Chapter,desc(State,Crude.Rate))  
# 
# kable(head(df2,50))  %>%
#  kable_styling(bootstrap_options = "striped", font_size = 11)
# 


fig <- plot_ly( data = df1, x = ~df1$Crude.Rate, y = ~df1$State,type="bar",   #~df1$Crude.Rate
               marker = list(size = 10,
                             color = 'rgba(255, 182, 193, .9)',
                             line = list(color = 'rgba(152, 0, 0, .8)',
                                         width = 2)))
fig <- fig %>% layout(title = 'Styled Scatter',
         yaxis = list(zeroline = FALSE),
         xaxis = list(zeroline = FALSE)
       )

fig
# 
# fig1 <- ggplot(aes( x =  ICD.Chapter, y = Crude.Rate),data = df) + 
#   geom_bar(stat = 'identity') +
#   ylab('Total Deaths') +
#   xlab('') +
#   ggtitle('Causes of Deaths ') +
#   theme(axis.text.x = element_text(angle = 90, hjust = 0.5))
# 
# fig1


fig1 <- ggplot(aes( x =  reorder(State,-Crude.Rate), y = Crude.Rate),data = df1) + 
  geom_bar(stat = 'identity') +
  ylab('Total Deaths') +
  xlab('') +
  ggtitle('Causes of Death only Neoplasms ') +
  theme(axis.text.x = element_text(angle = 90, hjust = 1))

fig1

Question 1: As a researcher, you frequently compare mortality rates from particular causes across different States. You need a visualization that will let you see (for 2010 only) the crude mortality rate, across all States, from one cause (for example, Neoplasms, which are effectively cancers). Create a visualization that allows you to rank States by crude mortality for each cause of death.

ANSWER: I have used Shiny Module below to create a dashboard to show the visualiztion for Question 1. The user can select the value of the Cause to view the plot in a TABPANEL layout.

MY SHINY APP URL - https://banuboopalan.shinyapps.io/BanuMortalityQuestion1/

# Define UI for application that draws a histogram
library(shiny)

df3 <- df %>%
  filter(., Year == (2004:2010) ) %>%       #, ICD.Chapter == flag| ICD.Chapter == "External causes of morbidity and mortality"
  arrange(desc(State,ICD.Chapter), Crude.Rate)  

head(df3)
##                                  ICD.Chapter State Year Deaths Population
## 1 External causes of morbidity and mortality    WY 2004    366     509106
## 2 External causes of morbidity and mortality    WY 2009    438     559851
## 3 External causes of morbidity and mortality    WY 2007    431     534876
## 4 External causes of morbidity and mortality    WY 2005    424     514157
## 5 External causes of morbidity and mortality    WY 2006    439     522667
## 6 External causes of morbidity and mortality    WY 2010    497     563626
##   Crude.Rate
## 1       71.9
## 2       78.2
## 3       80.6
## 4       82.5
## 5       84.0
## 6       88.2
ggplot(df3, aes(x=Year,y=Crude.Rate)) + 
  geom_bar(aes(fill = reorder(ICD.Chapter,Crude.Rate)), position ="dodge",
  stat = "identity") +
  ggtitle("Crude Rate by Year") +
  ylab(label = "Crude Rate" )+
  scale_fill_discrete(name = "Cause of Death")

ui <- fluidPage(
    
    # Application title
    titlePanel("Mortality Dashboard"),
    
    sidebarLayout(
        sidebarPanel(
            selectInput(
                        inputId = "react_ICD.Chapter",
                        label = "Select Cause",
                        choices = unique(df3$ICD.Chapter)),
       
            selectInput(inputId = "react_Year",
                         label = "Select Year",
                        choices = unique(df3$Year))
        ),
       
     mainPanel(
      tabsetPanel(
        tabPanel("Summary", dataTableOutput('table')),
        tabPanel("Plot", plotOutput("plot1")),
        tabPanel("Plot", plotOutput("plot2"))
      )
    )
    

    )
)
      

# Define server logic required 
server <- function(input, output) {
    selections = reactive ({
      req(input$react_ICD.Chapter)
      req(input$react_Year)
      filter(df3,ICD.Chapter == input$react_ICD.Chapter) %>%  filter(Year %in% input$react_Year)
    })
    
    output$plot1 <- renderPlot ({
        
        ggplot(data = selections(),aes(y = reorder(State, Crude.Rate), x = Crude.Rate,
                       
            )) +
            geom_col(fill = 'skyblue') + 
            theme_minimal() +
            labs(y = 'State')})
      
      output$plot2 <- renderPlot ({
        
      ggplot(df3, aes(x=Year,y=Crude.Rate)) + 
      geom_bar(aes(fill = reorder(ICD.Chapter,Crude.Rate)), position ="dodge",
        stat = "identity") +
        ggtitle("Crude Rate by Year") +
        ylab(label = "Crude Rate" )+
        scale_fill_discrete(name = "Cause of Death")
        
  })
      
      output$table <- renderDataTable(df3)
}

# Run the application 
shinyApp(ui = ui, server = server)
## PhantomJS not found. You can install it with webshot::install_phantomjs(). If it is installed, please make sure the phantomjs executable can be found via the PATH variable.

Shiny applications not supported in static R Markdown documents

Question 2: Often you are asked whether particular States are improving their mortality rates (per cause) faster than, or slower than, the national average. Create a visualization that lets your clients see this for themselves for one cause of death at the time. Keep in mind that the national average should be weighted by the national population.

Reference - https://moderndata.plotly.com/dashboards-in-r-with-shiny-plotly/

ANSWER : Using the above example code, I am having issues to figure out this code for a graphoutput

graphOutput <- function(inputId, width="100%", height="550px") {
    tagList(
        singleton(tags$head(
            tags$script(src="plotlyGraphWidget.js")
        )),
       tags$iframe(id=inputId, src="https://plot.ly/~playground/7.embed",
       class="graphs", style="border:none;", seamless=TRUE, width=width, height=height)
    )
}

renderGraph <- function(expr, env=parent.frame(), quoted=FALSE) {
    ## This gets called when inputs change --
    ## Place data wrangling code in here
    ## and pass the result to the client
    ## to be graphed.

    installExprFunction(expr, "func", env, quoted)

    function(){
        data = func();
       
        return(data)
    }
}

ui2 <- shinyUI(fluidPage(

  # Application title
  titlePanel("Mortality question2"),

  # Sidebar with a slider input for number of bins

    sidebarPanel(
      h3("Mortality question2"),
    
      selectInput(inputId= "name",
                     #inputId = "react_State",
                         label = "Select state",
                        choices = unique(df$State)
                  #multiple = T
                  #selected = "AL"

                  ),
      
      selectInput(inputId = "cause",
                     #inputId = "react_State",
                         label = "Select cause",
                        choices = unique(df$ICD.Chapter)
                #selected = "Neoplasms"

                  ),

      #Term plot
      plotOutput("Plot", height = 200),

    ),

    #Show a plot of the generated distribution
    mainPanel(
      graphOutput("gg")
    )
  
  # mainPanel(
  #     tabsetPanel(
  #      
  #       tabPanel("Plot", plotOutput("trendPlot"))
  #     )
  #   )
  # 
  
  
  
  )
)

library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.1.3
s1 <- shinyServer(function(input, output, session) {
  
   selections = reactive ({
      req(input$name)
      req(input$cause)
       filter(df,State %in% input$name) %>% filter(ICD.Chapter %in% input$cause)
    })

  output$trendPlot <- renderGraph({
    if (length(input$name)==0) print("Please select at least one State")

    else {
      df_trend <- df %>%
        filter(State %in% input$name) %>%
        filter(ICD.Chapter%in% input$cause)

   head(df_trend,5)

      ggideal_point <- ggplot(df_trend) +
        geom_line(aes(x=Year, y=Crude.Rate, by=ICD.Chapter, color=ICD.Chapter)) +
        labs(x = "Year") +
        labs(y = "Crude.Rate") +
        labs(title = "Crude.Rate") +
        scale_colour_hue("clarity",l=70, c=150) +
        theme_few()

      # Year range
      min_Year <- min(df_trend$Year)
      max_Year <- max(df_trend$Year)

      # use gg2list() to convert from ggplot->plotly
      gg <- gg2list(ggideal_point)

      # Send this message up to the browser client, which will get fed through to
      # Plotly's javascript graphing library embedded inside the graph
      return(list(
          list(
              id="trendPlot",
              task="newPlot",
              data=gg$data,
              layout=gg$layout
          )
      ))
    
  }

})
  
})

shinyApp(ui = ui2, server = s1)

Shiny applications not supported in static R Markdown documents