library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.1
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(readr)
library(dplyr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:plotly':
##
## arrange, mutate, rename, summarise
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(RColorBrewer)
setwd("/Users/tiffanyking/Desktop/Data 110/Final Project Data 110")
firefighters<- read_csv("database.csv")
## Warning: Missing column names filled in: 'X14' [14]
## Parsed with column specification:
## cols(
## `First Name` = col_character(),
## `Last Name` = col_character(),
## Age = col_character(),
## Rank = col_character(),
## Classification = col_character(),
## `Date of Incident` = col_character(),
## `Date of Death` = col_character(),
## `Cause Of Death` = col_character(),
## `Nature Of Death` = col_character(),
## Duty = col_character(),
## Activity = col_character(),
## Emergency = col_character(),
## `Property Type` = col_character(),
## X14 = col_character()
## )
str(firefighters)
## tibble [2,005 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ First Name : chr [1:2005] "Robert" "Lee" "Ronald" "Allen" ...
## $ Last Name : chr [1:2005] "Pollard" "Purdy" "Osadacz" "Streeter" ...
## $ Age : chr [1:2005] "64" "57" "36" "58" ...
## $ Rank : chr [1:2005] "Firefighter" "Pump Operator/Inspector" "First Assistant Chief" "Firefighter" ...
## $ Classification : chr [1:2005] "Volunteer" "Volunteer" "Volunteer" "Volunteer" ...
## $ Date of Incident: chr [1:2005] "Dec 31, 1999" "Jan 8, 2000" "Jan 11, 2000" "Jan 11, 2000" ...
## $ Date of Death : chr [1:2005] "Jan 1, 2000" "Jan 8, 2000" "Jan 11, 2000" "Jan 11, 2000" ...
## $ Cause Of Death : chr [1:2005] "Stress/Overexertion" "Stress/Overexertion" "Stress/Overexertion" "Stress/Overexertion" ...
## $ Nature Of Death : chr [1:2005] "Stroke" "Heart Attack" "Heart Attack" "Heart Attack" ...
## $ Duty : chr [1:2005] "Response" "On-Scene Fire" "Response" "On-Scene Fire" ...
## $ Activity : chr [1:2005] "Vehicle Passenger" "Advance Hose Lines/Fire Attack" "Advance Hose Lines/Fire Attack" "Advance Hose Lines/Fire Attack" ...
## $ Emergency : chr [1:2005] "Yes" "Yes" "Yes" "Yes" ...
## $ Property Type : chr [1:2005] "Outdoor Property" "Residential" "Street/Road" "Outdoor Property" ...
## $ X14 : chr [1:2005] NA NA NA NA ...
## - attr(*, "spec")=
## .. cols(
## .. `First Name` = col_character(),
## .. `Last Name` = col_character(),
## .. Age = col_character(),
## .. Rank = col_character(),
## .. Classification = col_character(),
## .. `Date of Incident` = col_character(),
## .. `Date of Death` = col_character(),
## .. `Cause Of Death` = col_character(),
## .. `Nature Of Death` = col_character(),
## .. Duty = col_character(),
## .. Activity = col_character(),
## .. Emergency = col_character(),
## .. `Property Type` = col_character(),
## .. X14 = col_character()
## .. )
names(firefighters) <- tolower(names(firefighters))
names(firefighters) <- gsub(" ", "_", names(firefighters))
firefighters$age <- as.numeric(firefighters$age)
## Warning: NAs introduced by coercion
fire <- firefighters %>%
filter(!is.na(age))
view(fire)
summary(fire)
## first_name last_name age rank
## Length:1645 Length:1645 Min. :14.00 Length:1645
## Class :character Class :character 1st Qu.:36.00 Class :character
## Mode :character Mode :character Median :47.00 Mode :character
## Mean :46.33
## 3rd Qu.:56.00
## Max. :95.00
## classification date_of_incident date_of_death cause_of_death
## Length:1645 Length:1645 Length:1645 Length:1645
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## nature_of_death duty activity emergency
## Length:1645 Length:1645 Length:1645 Length:1645
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## property_type x14
## Length:1645 Length:1645
## Class :character Class :character
## Mode :character Mode :character
##
##
##
mu<- ddply(fire,"classification", summarise, grp.mean=mean(age))
head(mu)
## classification grp.mean
## 1 Career 43.80424
## 2 Industrial 50.25000
## 3 Paid-on-Call 48.15385
## 4 Part-Time (Paid) 43.18750
## 5 Volunteer 49.30289
## 6 Wildland Contract 45.05000
p1<- fire %>%
filter(classification %in% c("Career","Volunteer")) %>%
ggplot(aes(x=age, fill=classification, color=classification)) +
ggtitle("Age of Death by Classification") +
xlab("Age") + ylab("Total Count")+
geom_histogram( position="dodge", alpha=0.4)+
geom_vline(aes(xintercept=mean(age)),
color="black", linetype="dashed", size=1) +
scale_color_brewer(palette="Dark2")+
scale_fill_brewer(palette="Dark2")+
theme(legend.position="top")+
theme_linedraw()
p1
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
p2 <- fire %>%
filter(classification %in% c("Career","Industrial","Paid-on-Call","Part-Time(Paid)"," Wildland Contract","Wildland Full-Time", "Wildland Part-Time", "Volunteer")) %>%
ggplot( aes(x=classification, y=age, fill=classification)) +
geom_boxplot()+
labs(title="Age of Death by Job Classification",x="Job Classification", y = "Age")+
scale_fill_brewer(palette="Dark2") + theme_minimal()
p2 + theme_minimal()
I chose the firefighter fatality dataset from Kaggle. Before deciding on a topic I searched soley on Kaggle for datasets and I narrow down my dataset to two topics Foster Care Childern in New York City and FireFighter Fatalities within the US (From FEMA). FEMA is referred to the Federal Emergency Management Agency is an agency of the United States Department of Homeland Security, initially created under President Jimmy Carter by Presidential Reorganization Plan No. 3 of 1978 and implemented by two Executive Orders on April 1, 1979. I noticed that the Firefigther dataset have several categorical and quantitative variables. Personally, I chose this topic because I thought it would be interresting explore firefighters job/duties and what are the causes their death and their age. At first glace I would think that alot of firefigthers have died from inhaling smoke or trapped in the building. In my research, I found an article that examined the mortility among Fire Department of the City of New York Rescue and Recovery Workers Exposed to the World Trade Center Disaster, 2001–2017. Interestingly enough, Colbeth found that firefighters who experienced 9/11 have consistently been associated with elevated rates of physical and mental health morbidities/death. Over time, exposure to dust/smoke causes long-term airway injury, which is linked to several health issues. In my plots/charts, I explored several variables. My first plot is a histogram, which is showing the age of death by classification. The dotted black line represents the mean. My second plot is a box plot, which is showing the age of death between various job classifications. Then I decided to use tableau to make another interactive line & stacked bar plot. Please see the link below. In the line chart, you can see that in 2001 there were massive casualties due to the 9/11 terrorist attack; many fire fighters died due to trama, and many others died from heart attacks and heat exhaustion. My last plot was a stacked barplot which shows FireFighters Fatalities over the years(includes property types, cause of death). I thought there would be more residential properties with a higher number in terms of fire locations/property type. Overall, I wished I explored the property type (residential, office building) or where the accidents occur and find the average of each.
Tableau Link: https://public.tableau.com/profile/tiffany.king#!/vizhome/Project3_16081364566940/Dashboard1?publish=yes
References Colbeth, Hilary L et al. “Mortality among Fire Department of the City of New York Rescue and Recovery Workers Exposed to the World Trade Center Disaster, 2001-2017.” International journal of environmental research and public health vol. 17,17 6266. 28 Aug. 2020, doi:10.3390/ijerph17176266