Synopsis: This document describes effects of storms and severe weather events on public health and economic outcomes. Data were downloaded from a publicly available dataset from NOAA. Public health outcomes were described using the total number of injuries and fatalities per event, while economic outcomes were described using the cost of property and crop damage. Due to the large number of severe weather events, I only depicted the top 15 events that caused the greatest adverse health outcomes (total injuries + fatalities) and greatest economic consequences (total property + crop damage). Missing data were not incuded in this specific analysis. Although these values may be imputed in a variety of ways, I did not want to risk any confusion during the peer review process.
Data Processing
# Download
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
if (!file.exists("StormData.csv.bz2")) {
download.file(url, destfile = "StormData.csv.bz2",mode = "wb")
}
# Import, read_csv() will automatically decompress the file
storm.df <- read_csv("StormData.csv.bz2")
## Rows: 902297 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): BGN_DATE, BGN_TIME, TIME_ZONE, COUNTYNAME, STATE, EVTYPE, BGN_AZI,...
## dbl (18): STATE__, COUNTY, BGN_RANGE, COUNTY_END, END_RANGE, LENGTH, WIDTH, ...
## lgl (1): COUNTYENDN
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Convert factor variables
factor.vars <- c("TIME_ZONE", "STATE", "EVTYPE", "CROPDMGEXP", "PROPDMGEXP")
storm.df[factor.vars] <- lapply(storm.df[factor.vars], as.factor)
############################### Data processing: Question 1 ######################
# Q1: Across the United States, which types of events are most harmful with respect to population health?
# Note to reviewer: Health here is being defined as injuries and fatalities. I describe these using the total number, mean, and SD of these events. I also calculate the total number of injuries + fatalities as a broad summary measure
# Make a dataframe containing summary statistics.
# Calculated the total sum of injuries/fatalities per event
# Calculated the Mean and SD of the number of injuries/fatalities per event. May not give full picture since these events may not occur in (densely) populated areas.
pop.health <- storm.df %>%
group_by(EVTYPE) %>%
summarise(N_Events = n(),
InjuriesTotal = sum(INJURIES, na.rm = TRUE),
InjuriesMean = round(mean(INJURIES, na.rm = TRUE),2),
InjuriesSD = round(sd(INJURIES, na.rm = TRUE), 2),
FatalitiesTotal = sum(FATALITIES, na.rm = TRUE),
FatalitiesMean = round(mean(FATALITIES, na.rm = TRUE), 2),
FatalitiesSD = round(sd(FATALITIES, na.rm = TRUE), 2),
TotalHealthImpact = FatalitiesTotal + InjuriesTotal)
# Rank the events by the Total Health Impact (total number of fatalities + injuries), then select the top 15 events
top.concerns <- pop.health %>%
arrange(desc(TotalHealthImpact)) %>%
slice(1:15)
# Pivot longer, will assist with graphing the data below
top.concerns.l <- pivot_longer(top.concerns, cols = c("InjuriesTotal":"FatalitiesSD"),
names_to = c("Outcome", ".value"),
names_pattern = "(Injuries|Fatalities)(Total|Mean|SD)")
top.concerns.l$Outcome <- factor(top.concerns.l$Outcome, c("Injuries", "Fatalities"))
# Make concise summary table for the total numer of injuries + fatalities per event
top.concerns2 <- select(top.concerns, c(EVTYPE, N_Events, InjuriesTotal, FatalitiesTotal))
colnames(top.concerns2) <- c("Event", "Number of Events", "Total Number of Injuries",
"Total Number of Fatalities")
############################### Data processing: Question 2 ######################
# Q2: Across the United States, which types of events have the greatest economic consequences?
# Convert economic damage (crop and property) to more usable numeric format
# First identify the levels of these two variables
levels(storm.df$CROPDMGEXP)
## [1] "?" "0" "2" "B" "k" "K" "m" "M"
levels(storm.df$PROPDMGEXP)
## [1] "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K" "m" "M"
# Adjust the multipliers to be more compatible with data analysis in R
econ.df <- storm.df %>%
mutate(crop.mult = case_when(CROPDMGEXP == "K" |CROPDMGEXP == "k" ~ 1e3,
CROPDMGEXP == "M" |CROPDMGEXP == "m" ~ 1e6,
CROPDMGEXP == "B" ~ 1e9,
CROPDMGEXP %in% as.character(0:9) ~ 10^(as.numeric(CROPDMGEXP)),
CROPDMGEXP == "?" ~ NA,
TRUE ~ NA),
prop.mult = case_when(PROPDMGEXP == "H" |PROPDMGEXP == "h" ~ 1e2,
PROPDMGEXP == "K" |PROPDMGEXP == "k" ~ 1e3,
PROPDMGEXP == "M" |PROPDMGEXP == "m" ~ 1e6,
PROPDMGEXP == "B" ~ 1e9,
CROPDMGEXP %in% as.character(0:9) ~ 10^(as.numeric(PROPDMGEXP)),
PROPDMGEXP %in% c("-", "?", "+", "-") ~ NA,
TRUE ~ NA))
# Multiply the damage (property and crop) by respective multiplier to create a corrected, numeric column of economic costs
econ.df$prop.cor <- econ.df$PROPDMG*econ.df$prop.mult
econ.df$crop.cor <- econ.df$CROPDMG*econ.df$crop.mult
# Calculate the sum of the property + crop damage
econ.df$tot.dam <- econ.df$prop.cor + econ.df$crop.cor
# Calculate summary measures for property and crop damage
econ.dmg <- econ.df %>%
group_by(EVTYPE) %>%
summarise(N_Events = n(),
PropDmgTotal = sum(prop.cor, na.rm = TRUE),
CropDmgTotal = sum(crop.cor, na.rm = TRUE),
TotalEconImpact = PropDmgTotal + CropDmgTotal)
# Rank the events by the Total Economic Impact, then select the top 15
top.econ <- econ.dmg %>%
arrange(desc(TotalEconImpact)) %>%
slice(1:15)
# Pivot longer, will assist with graphing the data below
top.econ.l <- select(top.econ, -c("TotalEconImpact")) %>%
pivot_longer(cols = c("PropDmgTotal","CropDmgTotal"),
names_to = "Type", values_to = "EconomicDamage")
top.econ.l$Type <- as.factor(top.econ.l$Type) %>%
fct_recode("PropertyDamage" = "PropDmgTotal",
"CropDamage" = "CropDmgTotal")
# Add more descriptive column names for kable table
top.econ2 <- top.econ
colnames(top.econ2) <- c("Event", "Number of Events", "Total Property Damage (USD)",
"Total Crop Damage (USD)", "Total Economic Damage (USD)")
Results
################################### Question 1 ######################################
# Figure 1: Create bar graph showing the top 15 events that adversely affect population health, and the number of injuries/fatalities per event.
fig1.bar <- ggplot(top.concerns.l,
mapping = aes(x=Outcome, y= Total, fill=Outcome)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~EVTYPE, ncol = 3, scales = "free_y") +
labs(x = "Outcome", y = 'Number of Observations',
title = "Most Harmful Events: Injuries and Fatalities", ) +
theme_bw() +
theme(strip.background = element_rect(colour="black", fill="white"),
plot.title = element_text(hjust=0.5))
fig1.bar
# Table 1: Summarize the public health outcomes in an easy to read table
health.table <- top.concerns2 %>%
kbl(align="c") %>%
kable_styling(bootstrap_options = c("striped","hover"), position = "center")
health.table
| Event | Number of Events | Total Number of Injuries | Total Number of Fatalities |
|---|---|---|---|
| TORNADO | 60652 | 91346 | 5633 |
| EXCESSIVE HEAT | 1678 | 6525 | 1903 |
| TSTM WIND | 219944 | 6957 | 504 |
| FLOOD | 25326 | 6789 | 470 |
| LIGHTNING | 15755 | 5230 | 816 |
| HEAT | 767 | 2100 | 937 |
| FLASH FLOOD | 54278 | 1777 | 978 |
| ICE STORM | 2006 | 1975 | 89 |
| THUNDERSTORM WIND | 82563 | 1488 | 133 |
| WINTER STORM | 11433 | 1321 | 206 |
| HIGH WIND | 20212 | 1137 | 248 |
| HAIL | 288661 | 1361 | 15 |
| HURRICANE/TYPHOON | 88 | 1275 | 64 |
| HEAVY SNOW | 15708 | 1021 | 127 |
| WILDFIRE | 2761 | 911 | 75 |
######################################## Question 2 ##########################################
# Figure 2: Create bar graph showing the top 15 events that have the greatest economic consequences. The total damage ($) to property or crops per event
fig2.bar <- ggplot(top.econ.l,
mapping = aes(x=EconomicDamage, y=EVTYPE , fill=EVTYPE)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~Type, ncol = 1, scales = "free_y") +
labs(x = 'Total Econonomic Consequences ($)', y = "Event Type",
title = "Events with Greatest Economic Consequences", ) +
theme_bw() +
theme(strip.background = element_rect(colour="black", fill="white"),
plot.title = element_text(hjust=0.5),
legend.position = "none")
fig2.bar
# Table 2: Summarize the economic damage in an easy to read table
econ.table <- top.econ2 %>%
kbl(align="c") %>%
kable_styling(bootstrap_options = c("striped","hover"), position = "center")
econ.table
| Event | Number of Events | Total Property Damage (USD) | Total Crop Damage (USD) | Total Economic Damage (USD) |
|---|---|---|---|---|
| FLOOD | 25326 | 144657709800 | 5661968450 | 150319678250 |
| HURRICANE/TYPHOON | 88 | 69305840000 | 2607872800 | 71913712800 |
| TORNADO | 60652 | 56937160480 | 414969110 | 57352129590 |
| STORM SURGE | 261 | 43323536000 | 5000 | 43323541000 |
| HAIL | 288661 | 15732267220 | 3025956450 | 18758223670 |
| FLASH FLOOD | 54278 | 16140861510 | 1421317100 | 17562178610 |
| DROUGHT | 2488 | 1046106000 | 13972566000 | 15018672000 |
| HURRICANE | 174 | 11868319010 | 2741910000 | 14610229010 |
| RIVER FLOOD | 173 | 5118945500 | 5029459000 | 10148404500 |
| ICE STORM | 2006 | 3944927810 | 5022113500 | 8967041310 |
| TROPICAL STORM | 690 | 7703890550 | 678346000 | 8382236550 |
| WINTER STORM | 11433 | 6688497250 | 26944000 | 6715441250 |
| HIGH WIND | 20212 | 5270046260 | 638571300 | 5908617560 |
| WILDFIRE | 2761 | 4765114000 | 295472800 | 5060586800 |
| TSTM WIND | 219944 | 4493028440 | 554007350 | 5047035790 |