knitr::opts_chunk$set(echo = TRUE)

# Load required packages
pacman::p_load(tidyverse, kableExtra)

Synopsis: This document describes effects of storms and severe weather events on public health and economic outcomes. Data were downloaded from a publicly available dataset from NOAA. Public health outcomes were described using the total number of injuries and fatalities per event, while economic outcomes were described using the cost of property and crop damage. Due to the large number of severe weather events, I only depicted the top 15 events that caused the greatest adverse health outcomes (total injuries + fatalities) and greatest economic consequences (total property + crop damage). Missing data were not incuded in this specific analysis. Although these values may be imputed in a variety of ways, I did not want to risk any confusion during the peer review process. The events with the largest effect on population health include tornadoes and excessive heat, while the events with the largest effect on the economy include flooding and hurricanes.

Data Processing

# Download
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"

if (!file.exists("StormData.csv.bz2")) {
  download.file(url, destfile = "StormData.csv.bz2",mode = "wb")
  }

# Import, read_csv() will automatically decompress the file
storm.df <- read_csv("StormData.csv.bz2")
## Rows: 902297 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): BGN_DATE, BGN_TIME, TIME_ZONE, COUNTYNAME, STATE, EVTYPE, BGN_AZI,...
## dbl (18): STATE__, COUNTY, BGN_RANGE, COUNTY_END, END_RANGE, LENGTH, WIDTH, ...
## lgl  (1): COUNTYENDN
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Convert factor variables
factor.vars <- c("TIME_ZONE", "STATE", "EVTYPE", "CROPDMGEXP", "PROPDMGEXP")
storm.df[factor.vars] <- lapply(storm.df[factor.vars], as.factor)


###############################  Data processing: Question 1  ######################

# Q1: Across the United States, which types of events are most harmful with respect to population health?

# Note to reviewer: Health here is being defined as injuries and fatalities. I describe these using the total number, mean, and SD of these events. I also calculate the total number of injuries + fatalities as a broad summary measure

# Make a dataframe containing summary statistics. 
# Calculated the total sum of injuries/fatalities per event
pop.health <- storm.df %>%
  group_by(EVTYPE) %>%
  summarise(N_Events = n(),
            InjuriesTotal = sum(INJURIES, na.rm = TRUE),
            FatalitiesTotal = sum(FATALITIES, na.rm = TRUE),
            TotalHealthImpact = FatalitiesTotal + InjuriesTotal)

# Rank the events by the Total Health Impact (total number of fatalities + injuries), then select the top 15 events
top.concerns <- pop.health %>%
  arrange(desc(TotalHealthImpact)) %>% 
  slice(1:15)

# Pivot longer, will assist with graphing the data below
top.concerns.l <- pivot_longer(top.concerns, cols = c("InjuriesTotal","FatalitiesTotal"),
                               names_to = c("Outcome", ".value"),
                               names_pattern = "(Injuries|Fatalities)(Total)")
top.concerns.l$Outcome <- factor(top.concerns.l$Outcome, c("Injuries", "Fatalities"))


# Make concise summary table for the total numer of injuries + fatalities per event
top.concerns2 <- top.concerns
colnames(top.concerns2) <- c("Event", "Number of Events", "Total Number of Injuries",
                            "Total Number of Fatalities")



###############################  Data processing: Question 2  ######################

# Q2: Across the United States, which types of events have the greatest economic consequences?

# Convert economic damage (crop and property) to more usable numeric format
# First identify the levels of these two variables
levels(storm.df$CROPDMGEXP)
## [1] "?" "0" "2" "B" "k" "K" "m" "M"
levels(storm.df$PROPDMGEXP)
##  [1] "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K" "m" "M"
# Adjust the multipliers to be more compatible with data analysis in R
econ.df <- storm.df %>%
  mutate(crop.mult = case_when(CROPDMGEXP == "K" |CROPDMGEXP == "k"  ~ 1e3,
                               CROPDMGEXP == "M" |CROPDMGEXP == "m" ~ 1e6,
                               CROPDMGEXP == "B" ~ 1e9,
                               CROPDMGEXP %in% as.character(0:9) ~ 10^(as.numeric(CROPDMGEXP)),
                               CROPDMGEXP == "?" ~ NA,
                               TRUE ~ NA),
         prop.mult = case_when(PROPDMGEXP == "H" |PROPDMGEXP == "h"  ~ 1e2,
                               PROPDMGEXP == "K" |PROPDMGEXP == "k"  ~ 1e3,
                               PROPDMGEXP == "M" |PROPDMGEXP == "m" ~ 1e6,
                               PROPDMGEXP == "B" ~ 1e9,
                               CROPDMGEXP %in% as.character(0:9) ~ 10^(as.numeric(PROPDMGEXP)),
                               PROPDMGEXP %in% c("-", "?", "+", "-") ~ NA,
                               TRUE ~ NA))


# Multiply the damage (property and crop) by respective multiplier to create a corrected, numeric column of economic costs
econ.df$prop.cor <- econ.df$PROPDMG*econ.df$prop.mult
econ.df$crop.cor <- econ.df$CROPDMG*econ.df$crop.mult

# Calculate the sum of the property + crop damage
econ.df$tot.dam <- econ.df$prop.cor + econ.df$crop.cor


# Calculate summary measures for property and crop damage
econ.dmg <- econ.df %>%
  group_by(EVTYPE) %>%
  summarise(N_Events = n(),
            PropDmgTotal = sum(prop.cor, na.rm = TRUE),
            CropDmgTotal = sum(crop.cor, na.rm = TRUE),
            TotalEconImpact = PropDmgTotal + CropDmgTotal)

# Rank the events by the Total Economic Impact, then select the top 15
top.econ <- econ.dmg %>%
  arrange(desc(TotalEconImpact)) %>% 
  slice(1:15)

# Pivot longer, will assist with graphing the data below
top.econ.l <- select(top.econ, -c("TotalEconImpact")) %>%
  pivot_longer(cols = c("PropDmgTotal","CropDmgTotal"),
               names_to = "Type", values_to = "EconomicDamage")
top.econ.l$Type <- as.factor(top.econ.l$Type) %>%
  fct_recode("PropertyDamage" = "PropDmgTotal",
             "CropDamage" = "CropDmgTotal")


# Add more descriptive column names for kable table
top.econ2 <- top.econ
colnames(top.econ2) <- c("Event", "Number of Events", "Total Property Damage (USD)",
                            "Total Crop Damage (USD)", "Total Economic Damage (USD)")

Results

#################################   Question 1   ######################################
# Figure 1: Create bar graph showing the top 15 events that adversely affect population health, and the number of injuries/fatalities per event.

fig1.bar <- ggplot(top.concerns.l, 
                           mapping = aes(x=Outcome, y= Total, fill=Outcome)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~EVTYPE, ncol = 3, scales = "free_y") +
  labs(x = "Outcome", y = 'Number of Observations', 
       title = "Most Harmful Events: Injuries and Fatalities") +
  theme_bw() +
  theme(strip.background = element_rect(colour="black", fill="white"),
        plot.title = element_text(hjust=0.5))

fig1.bar

# Figure 1: The top 15 events with the biggest impact on population health. Tornadoes and excessive heat are the most dangerous events.


# Table 1: Summarize the public health outcomes in an easy to read table
health.table <- top.concerns2 %>% 
  kbl(align="c") %>%
  kable_styling(bootstrap_options = c("striped","hover"), position = "center")

  
health.table
Event Number of Events Total Number of Injuries Total Number of Fatalities NA
TORNADO 60652 91346 5633 96979
EXCESSIVE HEAT 1678 6525 1903 8428
TSTM WIND 219944 6957 504 7461
FLOOD 25326 6789 470 7259
LIGHTNING 15755 5230 816 6046
HEAT 767 2100 937 3037
FLASH FLOOD 54278 1777 978 2755
ICE STORM 2006 1975 89 2064
THUNDERSTORM WIND 82563 1488 133 1621
WINTER STORM 11433 1321 206 1527
HIGH WIND 20212 1137 248 1385
HAIL 288661 1361 15 1376
HURRICANE/TYPHOON 88 1275 64 1339
HEAVY SNOW 15708 1021 127 1148
WILDFIRE 2761 911 75 986
# Table 1: The top 15 events with the biggest impact on population health. Tornadoes and excessive heat are the most dangerous events.



###############################    Question 2   ##################################

# Figure 2: Create bar graph showing the top 15 events that have the greatest economic consequences. The total damage ($) to property or crops per event

fig2.bar <- ggplot(top.econ.l, 
                           mapping = aes(x=EconomicDamage, y=EVTYPE , fill=EVTYPE)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~Type, ncol = 1, scales = "free_y") +
  labs(x = 'Total Econonomic Consequences ($)', y = "Event Type",
       title = "Events with Greatest Economic Consequences", ) +
  theme_bw() +
  theme(strip.background = element_rect(colour="black", fill="white"),
        plot.title = element_text(hjust=0.5),
        legend.position = "none")

fig2.bar

# Figure 2: The top 15 events with the biggest economic consequences. Flooding and hurricanes caused the most economic damage. 

# Table 2: Summarize the economic damage in an easy to read table
econ.table <- top.econ2 %>% 
  kbl(align="c") %>%
  kable_styling(bootstrap_options = c("striped","hover"), position = "center")

  
econ.table
Event Number of Events Total Property Damage (USD) Total Crop Damage (USD) Total Economic Damage (USD)
FLOOD 25326 144657709800 5661968450 150319678250
HURRICANE/TYPHOON 88 69305840000 2607872800 71913712800
TORNADO 60652 56937160480 414969110 57352129590
STORM SURGE 261 43323536000 5000 43323541000
HAIL 288661 15732267220 3025956450 18758223670
FLASH FLOOD 54278 16140861510 1421317100 17562178610
DROUGHT 2488 1046106000 13972566000 15018672000
HURRICANE 174 11868319010 2741910000 14610229010
RIVER FLOOD 173 5118945500 5029459000 10148404500
ICE STORM 2006 3944927810 5022113500 8967041310
TROPICAL STORM 690 7703890550 678346000 8382236550
WINTER STORM 11433 6688497250 26944000 6715441250
HIGH WIND 20212 5270046260 638571300 5908617560
WILDFIRE 2761 4765114000 295472800 5060586800
TSTM WIND 219944 4493028440 554007350 5047035790
# Table 2: The top 15 events with the biggest economic consequences. Flooding and hurricanes caused the most economic damage.