#Libraries required
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(shiny)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(rsconnect)
##
## Attaching package: 'rsconnect'
## The following object is masked from 'package:shiny':
##
## serverInfo
#Read the data
df <- read.csv("https://raw.githubusercontent.com/charleyferrari/CUNY_DATA608/master/lecture3/data/cleaned-cdc-mortality-1999-2010-2.csv")
#Display the first few rows in the data
head(df)
## ICD.Chapter State Year Deaths Population
## 1 Certain infectious and parasitic diseases AL 1999 1092 4430141
## 2 Certain infectious and parasitic diseases AL 2000 1188 4447100
## 3 Certain infectious and parasitic diseases AL 2001 1211 4467634
## 4 Certain infectious and parasitic diseases AL 2002 1215 4480089
## 5 Certain infectious and parasitic diseases AL 2003 1350 4503491
## 6 Certain infectious and parasitic diseases AL 2004 1251 4530729
## Crude.Rate
## 1 24.6
## 2 26.7
## 3 27.1
## 4 27.1
## 5 30.0
## 6 27.6
#Number of colums
ncol(df)
## [1] 6
#Names of columns
colnames(df)
## [1] "ICD.Chapter" "State" "Year" "Deaths" "Population"
## [6] "Crude.Rate"
There are 6 variables in this dataset(ICD.Chapter, State, Year, Deaths, Population, Crude.Rate) and 9961 observations.
Question 1: As a researcher, you frequently compare mortality rates from particular causes across different States. You need a visualization that will let you see (for 2010 only) the crude mortality rate, across all States, from one cause (for example, Neoplasms, which are effectively cancers). Create a visualization that allows you to rank States by crude mortality for each cause of death.
# Subset for 2010 only
df_2010 <- subset(df, Year==2010)
# Use plotly library to see the Crude rate for 2010 year for Mental and behavioural disorders
Data <- df %>%
filter(., Year == "2010" & ICD.Chapter == "Mental and behavioural disorders") %>%
arrange(order(State), Crude.Rate)
head(Data)
## ICD.Chapter State Year Deaths Population Crude.Rate
## 1 Mental and behavioural disorders AK 2010 184 710231 25.9
## 2 Mental and behavioural disorders AL 2010 2340 4779736 49.0
## 3 Mental and behavioural disorders AR 2010 1208 2915918 41.4
## 4 Mental and behavioural disorders AZ 2010 1989 6392017 31.1
## 5 Mental and behavioural disorders CA 2010 7851 37253956 21.1
## 6 Mental and behavioural disorders CO 2010 1821 5029196 36.2
MBD2 <- df_2010 %>%
plot_ly(x = ~df_2010$Crude.Rate, y = df_2010$State, type="bar", orientation="h") %>%
layout(
title= list(text=paste0("Mental and behavioural disorders","\ncause-based 2010 State Crude Mortality Rates"),font=list(size = 10)),
xaxis=list(title="Crude Rate"),
yaxis=list(title="States",
categoryorder = "array",
categoryarray = (~State)))
subplot(MBD2)
# Get the unique causes of death for 2010
Total_causes1 <- unique(df_2010$ICD.Chapter)
length(Total_causes1)
## [1] 17
There are 17 diseases (cause of death) in the dataset
States <- unique(df_2010$State)
length(States)
## [1] 51
There are 51 state in the dataset
# Create server logic
ui1 <- fluidPage(
titlePanel("CDC Mortality Rates for 2010"),
fluidRow(selectInput("cause", "Cause of Death:", choices=sort(Total_causes1))),
plotOutput("plot1", height = 800)
)
# Create user interface definition
server1 <- function(input, output) {
showrates <- reactive({showrates <- subset(df_2010, ICD.Chapter==input$cause)})
output$plot1 <- renderPlot({
ggplot(showrates(), aes(x=Crude.Rate, y=reorder(State, -Crude.Rate)))+
scale_x_continuous(limits=c(0, max(showrates()$Crude.Rate))+5, expand = c(0,0))+
geom_segment(aes(yend=State), xend=0, color="red")+
geom_point(size=3, color = "blue") +
theme_bw()+
theme(panel.grid.major.y = element_blank(), axis.title=element_text(size=14))+
xlab("CDC Mortality Rate for 2010 year") +
ylab("State") +
ggtitle(input$cause)
})
}
# Run the application
shinyApp(ui = ui1, server = server1)
ShinyApp Link: https://ivanit.shinyapps.io/DATA_608_Module3/?_ga=2.227650525.1145081870.1677022315-303584501.1677022315 http://127.0.0.1:5039/
Question 2: Often you are asked whether particular States are improving their mortality rates (per cause) faster than, or slower than, the national average. Create a visualization that lets your clients see this for themselves for one cause of death at the time. Keep in mind that the national average should be weighted by the national population
# Create server logic
ui <- fluidPage(
titlePanel("Crude Mortality Rate Across All States Vs National Average"),
sidebarPanel(
selectInput("select1", label = strong("State"),
choices = levels(as.factor(df$State)),
selected = 1),
selectInput("select2", label = strong("Cause of Death"),
choices = levels(as.factor(df$ICD.Chapter)),
selected = 1),width = "auto"),
mainPanel(
plotOutput("distPlot")
)
)
# Create user interface definition
server <- function(input, output) {
output$distPlot <- renderPlot({
df %>%
group_by(Year, ICD.Chapter) %>%
mutate(N_Population = sum(Population),
N_Count = sum(Deaths),
N_Crude_Rate = 10^5*(N_Count/N_Population)) %>%
group_by(Year, ICD.Chapter, State) %>%
mutate(S_Count=sum(Deaths),
S_Crude_Rate=10^5*(S_Count/Population)) %>%
select(ICD.Chapter, State, Year, N_Crude_Rate, S_Crude_Rate) %>%
filter(ICD.Chapter == input$select2, State == input$select1) %>%
ggplot() +
geom_bar(aes(x = Year, weight = S_Crude_Rate)) +
labs(x = "State", y = "Crude Mortality Rate") +
geom_line(aes(x = Year, y = N_Crude_Rate, linetype = "National Average"), col = "red", lwd = 1) +
scale_linetype(name = NULL) +
theme_minimal()
}
)
}
shinyApp(ui = ui, server = server)
ShinyApp Link: https://ivanit.shinyapps.io/DATA608_Module3_Q2/?_ga=2.160147581.1145081870.1677022315-303584501.1677022315 http://127.0.0.1:5039/