I have provided you with data about mortality from all 50 states and the District of Columbia.Please access it at https://github.com/charleyferrari/CUNY_DATA608/tree/master/module3/data
You are invited to gather more data from our provider, the CDC WONDER system, at https://wonder.cdc.gov
This assignment must be done in R. It must be done using the ‘shiny’ package.
It is recommended you use an R package that supports interactive graphing such as plotly, or vegalite, but this is not required.
Your apps must be deployed, I won’t be accepting raw files. Luckily, you can pretty easily deploy apps with a free account at shinyapps.io
Question 2: Often you are asked whether particular States are improving their mortality rates (per cause) faster than, or slower than, the national average. Create a visualization that lets your clients see this for themselves for one cause of death at the time. Keep in mind that the national average should be weighted by the national population.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.2
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.5 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## Warning: package 'readr' was built under R version 4.1.2
## Warning: package 'forcats' was built under R version 4.1.2
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(tidyr)
library(tibble)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(stringr)
library(plotly)
## Warning: package 'plotly' was built under R version 4.1.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(shiny)
## Warning: package 'shiny' was built under R version 4.1.3
library(rsconnect)
## Warning: package 'rsconnect' was built under R version 4.1.3
##
## Attaching package: 'rsconnect'
## The following object is masked from 'package:shiny':
##
## serverInfo
## Importing the data
df <- read.csv("https://raw.githubusercontent.com/charleyferrari/CUNY_DATA_608/master/module3/data/cleaned-cdc-mortality-1999-2010-2.csv", header= TRUE)
## Exploratory data
head(df)
## ICD.Chapter State Year Deaths Population
## 1 Certain infectious and parasitic diseases AL 1999 1092 4430141
## 2 Certain infectious and parasitic diseases AL 2000 1188 4447100
## 3 Certain infectious and parasitic diseases AL 2001 1211 4467634
## 4 Certain infectious and parasitic diseases AL 2002 1215 4480089
## 5 Certain infectious and parasitic diseases AL 2003 1350 4503491
## 6 Certain infectious and parasitic diseases AL 2004 1251 4530729
## Crude.Rate
## 1 24.6
## 2 26.7
## 3 27.1
## 4 27.1
## 5 30.0
## 6 27.6
dim(df)
## [1] 9961 6
str(df)
## 'data.frame': 9961 obs. of 6 variables:
## $ ICD.Chapter: chr "Certain infectious and parasitic diseases" "Certain infectious and parasitic diseases" "Certain infectious and parasitic diseases" "Certain infectious and parasitic diseases" ...
## $ State : chr "AL" "AL" "AL" "AL" ...
## $ Year : int 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 ...
## $ Deaths : int 1092 1188 1211 1215 1350 1251 1303 1312 1241 1385 ...
## $ Population : int 4430141 4447100 4467634 4480089 4503491 4530729 4569805 4628981 4672840 4718206 ...
## $ Crude.Rate : num 24.6 26.7 27.1 27.1 30 27.6 28.5 28.3 26.6 29.4 ...
count(distinct(df,ICD.Chapter))
## n
## 1 19
unique(df$ICD.Chapter)
## [1] "Certain infectious and parasitic diseases"
## [2] "Neoplasms"
## [3] "Diseases of the blood and blood-forming organs and certain disorders involving the immune mechanism"
## [4] "Endocrine, nutritional and metabolic diseases"
## [5] "Mental and behavioural disorders"
## [6] "Diseases of the nervous system"
## [7] "Diseases of the ear and mastoid process"
## [8] "Diseases of the circulatory system"
## [9] "Diseases of the respiratory system"
## [10] "Diseases of the digestive system"
## [11] "Diseases of the skin and subcutaneous tissue"
## [12] "Diseases of the musculoskeletal system and connective tissue"
## [13] "Diseases of the genitourinary system"
## [14] "Pregnancy, childbirth and the puerperium"
## [15] "Certain conditions originating in the perinatal period"
## [16] "Congenital malformations, deformations and chromosomal abnormalities"
## [17] "Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified"
## [18] "Codes for special purposes"
## [19] "External causes of morbidity and mortality"
unique(df$Year)
## [1] 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010
And lets preview this data:
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.1.3
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
drate<- df %>%
group_by(State) %>%
summarise(avg_crude = mean(Crude.Rate),
min_crude = min(Crude.Rate),
max_crude = max(Crude.Rate)
)
kable(head(drate,5)) %>%
kable_styling(bootstrap_options = "striped", font_size = 11)
| State | avg_crude | min_crude | max_crude |
|---|---|---|---|
| AK | 33.19943 | 1.5 | 142.2 |
| AL | 62.45846 | 0.2 | 396.1 |
| AR | 62.59231 | 0.4 | 424.6 |
| AZ | 47.33918 | 0.2 | 287.0 |
| CA | 37.84737 | 0.0 | 288.1 |
flag = "Neoplasms"
df1 <- df %>%
filter(., Year == "2010" & ICD.Chapter == "Neoplasms") %>%
#arrange(desc(State,ICD.Chapter), Crude.Rate)
arrange(Crude.Rate)
kable(head(df1,50)) %>%
kable_styling(bootstrap_options = "striped", font_size = 11)
| ICD.Chapter | State | Year | Deaths | Population | Crude.Rate |
|---|---|---|---|---|---|
| Neoplasms | UT | 2010 | 2915 | 2763885 | 105.5 |
| Neoplasms | AK | 2010 | 910 | 710231 | 128.1 |
| Neoplasms | CO | 2010 | 7220 | 5029196 | 143.6 |
| Neoplasms | TX | 2010 | 37607 | 25145561 | 149.6 |
| Neoplasms | CA | 2010 | 57820 | 37253956 | 155.2 |
| Neoplasms | GA | 2010 | 15788 | 9687653 | 163.0 |
| Neoplasms | ID | 2010 | 2599 | 1567582 | 165.8 |
| Neoplasms | NM | 2010 | 3458 | 2059179 | 167.9 |
| Neoplasms | NV | 2010 | 4605 | 2700551 | 170.5 |
| Neoplasms | HI | 2010 | 2346 | 1360301 | 172.5 |
| Neoplasms | AZ | 2010 | 11129 | 6392017 | 174.1 |
| Neoplasms | DC | 2010 | 1062 | 601723 | 176.5 |
| Neoplasms | VA | 2010 | 14425 | 8001024 | 180.3 |
| Neoplasms | WA | 2010 | 12140 | 6724540 | 180.5 |
| Neoplasms | MD | 2010 | 10513 | 5773552 | 182.1 |
| Neoplasms | WY | 2010 | 1053 | 563626 | 186.8 |
| Neoplasms | MN | 2010 | 9915 | 5303925 | 186.9 |
| Neoplasms | NY | 2010 | 36237 | 19378102 | 187.0 |
| Neoplasms | IL | 2010 | 24739 | 12830632 | 192.8 |
| Neoplasms | KS | 2010 | 5521 | 2853118 | 193.5 |
| Neoplasms | NE | 2010 | 3535 | 1826341 | 193.6 |
| Neoplasms | NC | 2010 | 18514 | 9535483 | 194.2 |
| Neoplasms | ND | 2010 | 1311 | 672591 | 194.9 |
| Neoplasms | NJ | 2010 | 17317 | 8791894 | 197.0 |
| Neoplasms | NH | 2010 | 2604 | 1316470 | 197.8 |
| Neoplasms | CT | 2010 | 7170 | 3574097 | 200.6 |
| Neoplasms | MT | 2010 | 1987 | 989415 | 200.8 |
| Neoplasms | MA | 2010 | 13315 | 6547629 | 203.4 |
| Neoplasms | WI | 2010 | 11644 | 5686986 | 204.7 |
| Neoplasms | OR | 2010 | 7863 | 3831074 | 205.2 |
| Neoplasms | LA | 2010 | 9390 | 4533372 | 207.1 |
| Neoplasms | SC | 2010 | 9577 | 4625364 | 207.1 |
| Neoplasms | IN | 2010 | 13467 | 6483802 | 207.7 |
| Neoplasms | SD | 2010 | 1699 | 814180 | 208.7 |
| Neoplasms | MI | 2010 | 21143 | 9883640 | 213.9 |
| Neoplasms | OK | 2010 | 8029 | 3751351 | 214.0 |
| Neoplasms | IA | 2010 | 6541 | 3046355 | 214.7 |
| Neoplasms | MS | 2010 | 6383 | 2967297 | 215.1 |
| Neoplasms | MO | 2010 | 12943 | 5988927 | 216.1 |
| Neoplasms | DE | 2010 | 1956 | 897934 | 217.8 |
| Neoplasms | AL | 2010 | 10429 | 4779736 | 218.2 |
| Neoplasms | TN | 2010 | 13851 | 6346105 | 218.3 |
| Neoplasms | RI | 2010 | 2327 | 1052567 | 221.1 |
| Neoplasms | OH | 2010 | 25747 | 11536504 | 223.2 |
| Neoplasms | AR | 2010 | 6616 | 2915918 | 226.9 |
| Neoplasms | FL | 2010 | 42680 | 18801310 | 227.0 |
| Neoplasms | VT | 2010 | 1431 | 625741 | 228.7 |
| Neoplasms | KY | 2010 | 10149 | 4339367 | 233.9 |
| Neoplasms | PA | 2010 | 29900 | 12702379 | 235.4 |
| Neoplasms | ME | 2010 | 3343 | 1328361 | 251.7 |
# df2 <- df %>%
# filter(., Year == "2010" ) %>%
# #arrange(desc(State,ICD.Chapter), Crude.Rate)
# arrange(ICD.Chapter,desc(State,Crude.Rate))
#
# kable(head(df2,50)) %>%
# kable_styling(bootstrap_options = "striped", font_size = 11)
#
fig <- plot_ly( data = df1, x = ~df1$Crude.Rate, y = ~df1$State,type="bar", #~df1$Crude.Rate
marker = list(size = 10,
color = 'rgba(255, 182, 193, .9)',
line = list(color = 'rgba(152, 0, 0, .8)',
width = 2)))
fig <- fig %>% layout(title = 'Styled Scatter',
yaxis = list(zeroline = FALSE),
xaxis = list(zeroline = FALSE)
)
fig
#
# fig1 <- ggplot(aes( x = ICD.Chapter, y = Crude.Rate),data = df) +
# geom_bar(stat = 'identity') +
# ylab('Total Deaths') +
# xlab('') +
# ggtitle('Causes of Deaths ') +
# theme(axis.text.x = element_text(angle = 90, hjust = 0.5))
#
# fig1
fig1 <- ggplot(aes( x = reorder(State,-Crude.Rate), y = Crude.Rate),data = df1) +
geom_bar(stat = 'identity') +
ylab('Total Deaths') +
xlab('') +
ggtitle('Causes of Death only Neoplasms ') +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
fig1
Question 1: As a researcher, you frequently compare mortality rates from particular causes across different States. You need a visualization that will let you see (for 2010 only) the crude mortality rate, across all States, from one cause (for example, Neoplasms, which are effectively cancers). Create a visualization that allows you to rank States by crude mortality for each cause of death.
ANSWER: I have used Shiny Module below to create a dashboard to show the visualiztion for Question 1. The user can select the value of the Cause to view the plot in a TABPANEL layout.
MY SHINY APP URL - https://banuboopalan.shinyapps.io/BanuMortalityQuestion1/
# Define UI for application that draws a histogram
library(shiny)
df3 <- df %>%
filter(., Year == (2004:2010) ) %>% #, ICD.Chapter == flag| ICD.Chapter == "External causes of morbidity and mortality"
arrange(desc(State,ICD.Chapter), Crude.Rate)
head(df3)
## ICD.Chapter State Year Deaths Population
## 1 External causes of morbidity and mortality WY 2004 366 509106
## 2 External causes of morbidity and mortality WY 2009 438 559851
## 3 External causes of morbidity and mortality WY 2007 431 534876
## 4 External causes of morbidity and mortality WY 2005 424 514157
## 5 External causes of morbidity and mortality WY 2006 439 522667
## 6 External causes of morbidity and mortality WY 2010 497 563626
## Crude.Rate
## 1 71.9
## 2 78.2
## 3 80.6
## 4 82.5
## 5 84.0
## 6 88.2
ggplot(df3, aes(x=Year,y=Crude.Rate)) +
geom_bar(aes(fill = reorder(ICD.Chapter,Crude.Rate)), position ="dodge",
stat = "identity") +
ggtitle("Crude Rate by Year") +
ylab(label = "Crude Rate" )+
scale_fill_discrete(name = "Cause of Death")
ui <- fluidPage(
# Application title
titlePanel("Mortality Dashboard"),
sidebarLayout(
sidebarPanel(
selectInput(
inputId = "react_ICD.Chapter",
label = "Select Cause",
choices = unique(df3$ICD.Chapter)),
selectInput(inputId = "react_Year",
label = "Select Year",
choices = unique(df3$Year))
),
mainPanel(
tabsetPanel(
tabPanel("Summary", dataTableOutput('table')),
tabPanel("Plot", plotOutput("plot1")),
tabPanel("Plot", plotOutput("plot2"))
)
)
)
)
# Define server logic required
server <- function(input, output) {
selections = reactive ({
req(input$react_ICD.Chapter)
req(input$react_Year)
filter(df3,ICD.Chapter == input$react_ICD.Chapter) %>% filter(Year %in% input$react_Year)
})
output$plot1 <- renderPlot ({
ggplot(data = selections(),aes(y = reorder(State, Crude.Rate), x = Crude.Rate,
)) +
geom_col(fill = 'skyblue') +
theme_minimal() +
labs(y = 'State')})
output$plot2 <- renderPlot ({
ggplot(df3, aes(x=Year,y=Crude.Rate)) +
geom_bar(aes(fill = reorder(ICD.Chapter,Crude.Rate)), position ="dodge",
stat = "identity") +
ggtitle("Crude Rate by Year") +
ylab(label = "Crude Rate" )+
scale_fill_discrete(name = "Cause of Death")
})
output$table <- renderDataTable(df3)
}
# Run the application
shinyApp(ui = ui, server = server)
## PhantomJS not found. You can install it with webshot::install_phantomjs(). If it is installed, please make sure the phantomjs executable can be found via the PATH variable.
Question 2: Often you are asked whether particular States are improving their mortality rates (per cause) faster than, or slower than, the national average. Create a visualization that lets your clients see this for themselves for one cause of death at the time. Keep in mind that the national average should be weighted by the national population.
Reference - https://moderndata.plotly.com/dashboards-in-r-with-shiny-plotly/
ANSWER : Using the above example code, I am having issues to figure out this code for a graphoutput
graphOutput <- function(inputId, width="100%", height="550px") {
tagList(
singleton(tags$head(
tags$script(src="plotlyGraphWidget.js")
)),
tags$iframe(id=inputId, src="https://plot.ly/~playground/7.embed",
class="graphs", style="border:none;", seamless=TRUE, width=width, height=height)
)
}
renderGraph <- function(expr, env=parent.frame(), quoted=FALSE) {
## This gets called when inputs change --
## Place data wrangling code in here
## and pass the result to the client
## to be graphed.
installExprFunction(expr, "func", env, quoted)
function(){
data = func();
return(data)
}
}
ui2 <- shinyUI(fluidPage(
# Application title
titlePanel("Mortality question2"),
# Sidebar with a slider input for number of bins
sidebarPanel(
h3("Mortality question2"),
selectInput(inputId= "name",
#inputId = "react_State",
label = "Select state",
choices = unique(df$State)
#multiple = T
#selected = "AL"
),
selectInput(inputId = "cause",
#inputId = "react_State",
label = "Select cause",
choices = unique(df$ICD.Chapter)
#selected = "Neoplasms"
),
#Term plot
plotOutput("Plot", height = 200),
),
#Show a plot of the generated distribution
mainPanel(
graphOutput("gg")
)
# mainPanel(
# tabsetPanel(
#
# tabPanel("Plot", plotOutput("trendPlot"))
# )
# )
#
)
)
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.1.3
s1 <- shinyServer(function(input, output, session) {
selections = reactive ({
req(input$name)
req(input$cause)
filter(df,State %in% input$name) %>% filter(ICD.Chapter %in% input$cause)
})
output$trendPlot <- renderGraph({
if (length(input$name)==0) print("Please select at least one State")
else {
df_trend <- df %>%
filter(State %in% input$name) %>%
filter(ICD.Chapter%in% input$cause)
head(df_trend,5)
ggideal_point <- ggplot(df_trend) +
geom_line(aes(x=Year, y=Crude.Rate, by=ICD.Chapter, color=ICD.Chapter)) +
labs(x = "Year") +
labs(y = "Crude.Rate") +
labs(title = "Crude.Rate") +
scale_colour_hue("clarity",l=70, c=150) +
theme_few()
# Year range
min_Year <- min(df_trend$Year)
max_Year <- max(df_trend$Year)
# use gg2list() to convert from ggplot->plotly
gg <- gg2list(ggideal_point)
# Send this message up to the browser client, which will get fed through to
# Plotly's javascript graphing library embedded inside the graph
return(list(
list(
id="trendPlot",
task="newPlot",
data=gg$data,
layout=gg$layout
)
))
}
})
})
shinyApp(ui = ui2, server = s1)