New York City Leading Causes of Death from https://data.cityofnewyork.us/Health/New-York-City-Leading-Causes-of-Death/jb7j-dtam down load csv file to local computer for data analysis

set environment

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
library(DT)
options(dplyr.width = Inf)

read database

setwd("C:/Users/tbao/Desktop/CUNY MSDS notes/607/week 6/project 2/New York City Leading cause of Death")
getwd()
## [1] "C:/Users/tbao/Desktop/CUNY MSDS notes/607/week 6/project 2/New York City Leading cause of Death"
df <- read.csv("C:/Users/tbao/Desktop/CUNY MSDS notes/607/week 6/project 2/New York City Leading cause of Death/New_York_City_Leading_Causes_of_Death.csv", header=TRUE, sep=",")
datatable(df)

leading causes of death each year for men and women.

df_death <- df %>% subset(select = c(Sex,Year,Cause.of.Death,Count)) %>% 
  group_by(Sex,Year) %>%
  filter(Count == max(Count)) %>% distinct(Sex,Year,Cause.of.Death,Count)
head(df_death)
## # A tibble: 6 x 4
## # Groups:   Sex, Year [6]
##   Sex     Year Cause.of.Death    Count
##   <fct>  <int> <fct>             <int>
## 1 FEMALE  2010 DISEASES OF HEART  5351
## 2 MALE    2010 DISEASES OF HEART  4495
## 3 FEMALE  2011 DISEASES OF HEART  5016
## 4 MALE    2011 DISEASES OF HEART  4220
## 5 FEMALE  2007 DISEASES OF HEART  7050
## 6 MALE    2007 DISEASES OF HEART  5632

Display the leading causes of death each year for each ethnic group.

df_death_eth <- df %>% subset(select = c(Ethnicity,Year,Cause.of.Death,Count)) %>% 
  group_by(Ethnicity,Year) %>%
  filter(Count == max(Count)) %>% distinct(Ethnicity,Year,Cause.of.Death,Count)

head(df_death_eth)
## # A tibble: 6 x 4
## # Groups:   Ethnicity, Year [6]
##   Ethnicity                 Year Cause.of.Death      Count
##   <fct>                    <int> <fct>               <int>
## 1 NON-HISPANIC WHITE        2010 DISEASES OF HEART    5351
## 2 ASIAN & PACIFIC ISLANDER  2011 MALIGNANT NEOPLASMS   550
## 3 HISPANIC                  2011 DISEASES OF HEART    1348
## 4 NON-HISPANIC BLACK        2011 DISEASES OF HEART    2243
## 5 NON-HISPANIC WHITE        2011 DISEASES OF HEART    5016
## 6 ASIAN & PACIFIC ISLANDER  2007 MALIGNANT NEOPLASMS   528

Calculate which cause of death has reduced the most and which has increased the most in the years given

df_death_trends <- df %>% subset(select = c(Year,Cause.of.Death,Count)) %>%
  group_by(Year, Cause.of.Death, the_min = min(Count), the_max = max(Count), the_diff = (the_max - the_min)) %>% count(Year, Cause.of.Death, max(the_diff))

datatable(df_death_trends)

Calculate which cause of death has remained stable over the years given.

df_death_stable <- df %>% subset(select = c(Year,Cause.of.Death,Count)) %>%
  group_by(Year, Cause.of.Death, the_min = min(Count), the_max = max(Count), the_diff = (the_max - the_min)) %>% count(Year, Cause.of.Death, min(the_diff))

datatable(df_death_stable)