knitr::opts_chunk$set(echo = TRUE)
# Load required packages
pacman::p_load(tidyverse, kableExtra)
Synopsis: This document describes effects of storms and severe weather events on public health and economic outcomes. Data were downloaded from a publicly available dataset from NOAA. Public health outcomes were described using the total number of injuries and fatalities per event, while economic outcomes were described using the cost of property and crop damage. Due to the large number of severe weather events, I only depicted the top 15 events that caused the greatest adverse health outcomes (total injuries + fatalities) and greatest economic consequences (total property + crop damage). Missing data were not incuded in this specific analysis. Although these values may be imputed in a variety of ways, I did not want to risk any confusion during the peer review process. The events with the largest effect on population health include tornadoes and excessive heat, while the events with the largest effect on the economy include flooding and hurricanes.
Data Processing
# Download
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
if (!file.exists("StormData.csv.bz2")) {
download.file(url, destfile = "StormData.csv.bz2",mode = "wb")
}
# Import, read_csv() will automatically decompress the file
storm.df <- read_csv("StormData.csv.bz2")
## Rows: 902297 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): BGN_DATE, BGN_TIME, TIME_ZONE, COUNTYNAME, STATE, EVTYPE, BGN_AZI,...
## dbl (18): STATE__, COUNTY, BGN_RANGE, COUNTY_END, END_RANGE, LENGTH, WIDTH, ...
## lgl (1): COUNTYENDN
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Convert factor variables
factor.vars <- c("TIME_ZONE", "STATE", "EVTYPE", "CROPDMGEXP", "PROPDMGEXP")
storm.df[factor.vars] <- lapply(storm.df[factor.vars], as.factor)
############################### Data processing: Question 1 ######################
# Q1: Across the United States, which types of events are most harmful with respect to population health?
# Note to reviewer: Health here is being defined as injuries and fatalities. I describe these using the total number, mean, and SD of these events. I also calculate the total number of injuries + fatalities as a broad summary measure
# Make a dataframe containing summary statistics.
# Calculated the total sum of injuries/fatalities per event
pop.health <- storm.df %>%
group_by(EVTYPE) %>%
summarise(N_Events = n(),
InjuriesTotal = sum(INJURIES, na.rm = TRUE),
FatalitiesTotal = sum(FATALITIES, na.rm = TRUE),
TotalHealthImpact = FatalitiesTotal + InjuriesTotal)
# Rank the events by the Total Health Impact (total number of fatalities + injuries), then select the top 15 events
top.concerns <- pop.health %>%
arrange(desc(TotalHealthImpact)) %>%
slice(1:15)
# Pivot longer, will assist with graphing the data below
top.concerns.l <- pivot_longer(top.concerns, cols = c("InjuriesTotal","FatalitiesTotal"),
names_to = c("Outcome", ".value"),
names_pattern = "(Injuries|Fatalities)(Total)")
top.concerns.l$Outcome <- factor(top.concerns.l$Outcome, c("Injuries", "Fatalities"))
# Make concise summary table for the total numer of injuries + fatalities per event
top.concerns2 <- top.concerns
colnames(top.concerns2) <- c("Event", "Number of Events", "Total Number of Injuries",
"Total Number of Fatalities")
############################### Data processing: Question 2 ######################
# Q2: Across the United States, which types of events have the greatest economic consequences?
# Convert economic damage (crop and property) to more usable numeric format
# First identify the levels of these two variables
levels(storm.df$CROPDMGEXP)
## [1] "?" "0" "2" "B" "k" "K" "m" "M"
levels(storm.df$PROPDMGEXP)
## [1] "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K" "m" "M"
# Adjust the multipliers to be more compatible with data analysis in R
econ.df <- storm.df %>%
mutate(crop.mult = case_when(CROPDMGEXP == "K" |CROPDMGEXP == "k" ~ 1e3,
CROPDMGEXP == "M" |CROPDMGEXP == "m" ~ 1e6,
CROPDMGEXP == "B" ~ 1e9,
CROPDMGEXP %in% as.character(0:9) ~ 10^(as.numeric(CROPDMGEXP)),
CROPDMGEXP == "?" ~ NA,
TRUE ~ NA),
prop.mult = case_when(PROPDMGEXP == "H" |PROPDMGEXP == "h" ~ 1e2,
PROPDMGEXP == "K" |PROPDMGEXP == "k" ~ 1e3,
PROPDMGEXP == "M" |PROPDMGEXP == "m" ~ 1e6,
PROPDMGEXP == "B" ~ 1e9,
CROPDMGEXP %in% as.character(0:9) ~ 10^(as.numeric(PROPDMGEXP)),
PROPDMGEXP %in% c("-", "?", "+", "-") ~ NA,
TRUE ~ NA))
# Multiply the damage (property and crop) by respective multiplier to create a corrected, numeric column of economic costs
econ.df$prop.cor <- econ.df$PROPDMG*econ.df$prop.mult
econ.df$crop.cor <- econ.df$CROPDMG*econ.df$crop.mult
# Calculate the sum of the property + crop damage
econ.df$tot.dam <- econ.df$prop.cor + econ.df$crop.cor
# Calculate summary measures for property and crop damage
econ.dmg <- econ.df %>%
group_by(EVTYPE) %>%
summarise(N_Events = n(),
PropDmgTotal = sum(prop.cor, na.rm = TRUE),
CropDmgTotal = sum(crop.cor, na.rm = TRUE),
TotalEconImpact = PropDmgTotal + CropDmgTotal)
# Rank the events by the Total Economic Impact, then select the top 15
top.econ <- econ.dmg %>%
arrange(desc(TotalEconImpact)) %>%
slice(1:15)
# Pivot longer, will assist with graphing the data below
top.econ.l <- select(top.econ, -c("TotalEconImpact")) %>%
pivot_longer(cols = c("PropDmgTotal","CropDmgTotal"),
names_to = "Type", values_to = "EconomicDamage")
top.econ.l$Type <- as.factor(top.econ.l$Type) %>%
fct_recode("PropertyDamage" = "PropDmgTotal",
"CropDamage" = "CropDmgTotal")
# Add more descriptive column names for kable table
top.econ2 <- top.econ
colnames(top.econ2) <- c("Event", "Number of Events", "Total Property Damage (USD)",
"Total Crop Damage (USD)", "Total Economic Damage (USD)")
Results
################################# Question 1 ######################################
# Figure 1: Create bar graph showing the top 15 events that adversely affect population health, and the number of injuries/fatalities per event.
fig1.bar <- ggplot(top.concerns.l,
mapping = aes(x=Outcome, y= Total, fill=Outcome)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~EVTYPE, ncol = 3, scales = "free_y") +
labs(x = "Outcome", y = 'Number of Observations',
title = "Most Harmful Events: Injuries and Fatalities") +
theme_bw() +
theme(strip.background = element_rect(colour="black", fill="white"),
plot.title = element_text(hjust=0.5))
fig1.bar
# Figure 1: The top 15 events with the biggest impact on population health. Tornadoes and excessive heat are the most dangerous events.
# Table 1: Summarize the public health outcomes in an easy to read table
health.table <- top.concerns2 %>%
kbl(align="c") %>%
kable_styling(bootstrap_options = c("striped","hover"), position = "center")
health.table
| Event | Number of Events | Total Number of Injuries | Total Number of Fatalities | NA |
|---|---|---|---|---|
| TORNADO | 60652 | 91346 | 5633 | 96979 |
| EXCESSIVE HEAT | 1678 | 6525 | 1903 | 8428 |
| TSTM WIND | 219944 | 6957 | 504 | 7461 |
| FLOOD | 25326 | 6789 | 470 | 7259 |
| LIGHTNING | 15755 | 5230 | 816 | 6046 |
| HEAT | 767 | 2100 | 937 | 3037 |
| FLASH FLOOD | 54278 | 1777 | 978 | 2755 |
| ICE STORM | 2006 | 1975 | 89 | 2064 |
| THUNDERSTORM WIND | 82563 | 1488 | 133 | 1621 |
| WINTER STORM | 11433 | 1321 | 206 | 1527 |
| HIGH WIND | 20212 | 1137 | 248 | 1385 |
| HAIL | 288661 | 1361 | 15 | 1376 |
| HURRICANE/TYPHOON | 88 | 1275 | 64 | 1339 |
| HEAVY SNOW | 15708 | 1021 | 127 | 1148 |
| WILDFIRE | 2761 | 911 | 75 | 986 |
# Table 1: The top 15 events with the biggest impact on population health. Tornadoes and excessive heat are the most dangerous events.
############################### Question 2 ##################################
# Figure 2: Create bar graph showing the top 15 events that have the greatest economic consequences. The total damage ($) to property or crops per event
fig2.bar <- ggplot(top.econ.l,
mapping = aes(x=EconomicDamage, y=EVTYPE , fill=EVTYPE)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~Type, ncol = 1, scales = "free_y") +
labs(x = 'Total Econonomic Consequences ($)', y = "Event Type",
title = "Events with Greatest Economic Consequences", ) +
theme_bw() +
theme(strip.background = element_rect(colour="black", fill="white"),
plot.title = element_text(hjust=0.5),
legend.position = "none")
fig2.bar
# Figure 2: The top 15 events with the biggest economic consequences. Flooding and hurricanes caused the most economic damage.
# Table 2: Summarize the economic damage in an easy to read table
econ.table <- top.econ2 %>%
kbl(align="c") %>%
kable_styling(bootstrap_options = c("striped","hover"), position = "center")
econ.table
| Event | Number of Events | Total Property Damage (USD) | Total Crop Damage (USD) | Total Economic Damage (USD) |
|---|---|---|---|---|
| FLOOD | 25326 | 144657709800 | 5661968450 | 150319678250 |
| HURRICANE/TYPHOON | 88 | 69305840000 | 2607872800 | 71913712800 |
| TORNADO | 60652 | 56937160480 | 414969110 | 57352129590 |
| STORM SURGE | 261 | 43323536000 | 5000 | 43323541000 |
| HAIL | 288661 | 15732267220 | 3025956450 | 18758223670 |
| FLASH FLOOD | 54278 | 16140861510 | 1421317100 | 17562178610 |
| DROUGHT | 2488 | 1046106000 | 13972566000 | 15018672000 |
| HURRICANE | 174 | 11868319010 | 2741910000 | 14610229010 |
| RIVER FLOOD | 173 | 5118945500 | 5029459000 | 10148404500 |
| ICE STORM | 2006 | 3944927810 | 5022113500 | 8967041310 |
| TROPICAL STORM | 690 | 7703890550 | 678346000 | 8382236550 |
| WINTER STORM | 11433 | 6688497250 | 26944000 | 6715441250 |
| HIGH WIND | 20212 | 5270046260 | 638571300 | 5908617560 |
| WILDFIRE | 2761 | 4765114000 | 295472800 | 5060586800 |
| TSTM WIND | 219944 | 4493028440 | 554007350 | 5047035790 |
# Table 2: The top 15 events with the biggest economic consequences. Flooding and hurricanes caused the most economic damage.