Synopsis

This analysis explores the U.S. National Oceanic and Atmospheric Administration (NOAA) Storm Database. The goal is to identify (1) which event types are most harmful to population health and (2) which event types have the greatest economic consequences. The findings can help prioritize resources for severe weather preparedness.

Data Processing

library(dplyr)
library(ggplot2)
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
dest <- "StormData.csv.bz2"

if (!file.exists(dest)) {
  download.file(url, dest, mode = "wb")
}

storm <- read.csv(dest, stringsAsFactors = FALSE)
dim(storm)
## [1] 902297     37
storm2 <- storm %>%
  select(EVTYPE, FATALITIES, INJURIES,
         PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
exp_map <- function(x) {
  x <- toupper(x)
  ifelse(x == "K", 1e3,
         ifelse(x == "M", 1e6,
                ifelse(x == "B", 1e9, 1)))
}

storm2$PROPDMG_REAL <- storm2$PROPDMG * exp_map(storm2$PROPDMGEXP)
storm2$CROPDMG_REAL <- storm2$CROPDMG * exp_map(storm2$CROPDMGEXP)
storm2$ECONOMIC_DMG <- storm2$PROPDMG_REAL + storm2$CROPDMG_REAL

summary(storm2$ECONOMIC_DMG)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.00e+00 0.00e+00 0.00e+00 5.28e+05 1.00e+03 1.15e+11
## Most harmful events to population health
health_impact <- storm2 %>%
  group_by(EVTYPE) %>%
  summarise(
    fatalities = sum(FATALITIES, na.rm = TRUE),
    injuries = sum(INJURIES, na.rm = TRUE),
    total = fatalities + injuries
  ) %>%
  arrange(desc(total)) %>%
  slice(1:10)

health_impact
## # A tibble: 10 × 4
##    EVTYPE            fatalities injuries total
##    <chr>                  <dbl>    <dbl> <dbl>
##  1 TORNADO                 5633    91346 96979
##  2 EXCESSIVE HEAT          1903     6525  8428
##  3 TSTM WIND                504     6957  7461
##  4 FLOOD                    470     6789  7259
##  5 LIGHTNING                816     5230  6046
##  6 HEAT                     937     2100  3037
##  7 FLASH FLOOD              978     1777  2755
##  8 ICE STORM                 89     1975  2064
##  9 THUNDERSTORM WIND        133     1488  1621
## 10 WINTER STORM             206     1321  1527
ggplot(health_impact, aes(x = reorder(EVTYPE, total), y = total)) +
  geom_col() +
  coord_flip() +
  labs(
    title = "Top 10 Weather Events Most Harmful to Population Health",
    x = "Event Type",
    y = "Total Fatalities and Injuries"
  )

## Events with greatest economic consequences
economic_impact <- storm2 %>%
  group_by(EVTYPE) %>%
  summarise(total_damage = sum(ECONOMIC_DMG, na.rm = TRUE)) %>%
  arrange(desc(total_damage)) %>%
  slice(1:10)

economic_impact
## # A tibble: 10 × 2
##    EVTYPE             total_damage
##    <chr>                     <dbl>
##  1 FLOOD             150319678257 
##  2 HURRICANE/TYPHOON  71913712800 
##  3 TORNADO            57352114049.
##  4 STORM SURGE        43323541000 
##  5 HAIL               18758221521.
##  6 FLASH FLOOD        17562129167.
##  7 DROUGHT            15018672000 
##  8 HURRICANE          14610229010 
##  9 RIVER FLOOD        10148404500 
## 10 ICE STORM           8967041360
ggplot(economic_impact, aes(x = reorder(EVTYPE, total_damage), y = total_damage / 1e9)) +
  geom_col() +
  coord_flip() +
  labs(
    title = "Top 10 Weather Events Causing Greatest Economic Damage",
    x = "Event Type",
    y = "Total Economic Damage (Billion USD)"
  )