Synopsis

This report analyzes the NOAA Storm Database to determine which weather events are most harmful to population health and which have the greatest economic consequences. Population health impact is measured using total fatalities and injuries, while economic impact is measured using property and crop damage. The data were cleaned and grouped by event type. Results show that tornadoes are the most harmful to population health. Floods and hurricanes cause the greatest economic damage. These findings help prioritize disaster preparedness.

Data Processing

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Load data

data <- read.csv("repdata_data_StormData.csv.bz2")

Population health damage

health <- data %>%
  group_by(EVTYPE) %>%
  summarise(
    fatalities = sum(FATALITIES),
    injuries = sum(INJURIES)
  )
## `summarise()` ungrouping output (override with `.groups` argument)
health$total <- health$fatalities + health$injuries

health <- health %>%
  arrange(desc(total)) %>%
  head(10)

health
## # A tibble: 10 x 4
##    EVTYPE            fatalities injuries total
##    <chr>                  <dbl>    <dbl> <dbl>
##  1 TORNADO                 5633    91346 96979
##  2 EXCESSIVE HEAT          1903     6525  8428
##  3 TSTM WIND                504     6957  7461
##  4 FLOOD                    470     6789  7259
##  5 LIGHTNING                816     5230  6046
##  6 HEAT                     937     2100  3037
##  7 FLASH FLOOD              978     1777  2755
##  8 ICE STORM                 89     1975  2064
##  9 THUNDERSTORM WIND        133     1488  1621
## 10 WINTER STORM             206     1321  1527

Economic damage

data$PROPDMGEXP <- as.character(data$PROPDMGEXP)
data$CROPDMGEXP <- as.character(data$CROPDMGEXP)

data$PROPDMGEXP[data$PROPDMGEXP=="K"] <- 1000
data$PROPDMGEXP[data$PROPDMGEXP=="M"] <- 1000000
data$PROPDMGEXP[data$PROPDMGEXP=="B"] <- 1000000000

data$CROPDMGEXP[data$CROPDMGEXP=="K"] <- 1000
data$CROPDMGEXP[data$CROPDMGEXP=="M"] <- 1000000
data$CROPDMGEXP[data$CROPDMGEXP=="B"] <- 1000000000

data$PROPDMGEXP <- as.numeric(data$PROPDMGEXP)
## Warning: NAs introduced by coercion
data$CROPDMGEXP <- as.numeric(data$CROPDMGEXP)
## Warning: NAs introduced by coercion
data$PROPDMGEXP[is.na(data$PROPDMGEXP)] <- 1
data$CROPDMGEXP[is.na(data$CROPDMGEXP)] <- 1

data$prop <- data$PROPDMG * data$PROPDMGEXP
data$crop <- data$CROPDMG * data$CROPDMGEXP

econ <- data %>%
  group_by(EVTYPE) %>%
  summarise(damage = sum(prop + crop))
## `summarise()` ungrouping output (override with `.groups` argument)
econ <- econ %>%
  arrange(desc(damage)) %>%
  head(10)

econ
## # A tibble: 10 x 2
##    EVTYPE                   damage
##    <chr>                     <dbl>
##  1 FLOOD             150319678257 
##  2 HURRICANE/TYPHOON  71913712800 
##  3 TORNADO            57340614176.
##  4 STORM SURGE        43323541000 
##  5 HAIL               18752904807.
##  6 FLASH FLOOD        17562129394.
##  7 DROUGHT            15018672000 
##  8 HURRICANE          14610229010 
##  9 RIVER FLOOD        10148404500 
## 10 ICE STORM           8967041310

Results

Most harmful events to population health

ggplot(health, aes(x=reorder(EVTYPE,total), y=total)) +
  geom_bar(stat="identity") +
  coord_flip() +
  labs(title="Top Weather Events Harmful to Population Health",
       x="Event Type",
       y="Fatalities + Injuries")

Greatest economic consequences

ggplot(econ, aes(x=reorder(EVTYPE,damage), y=damage)) +
  geom_bar(stat="identity") +
  coord_flip() +
  labs(title="Top Weather Events with Greatest Economic Damage",
       x="Event Type",
       y="Damage Cost")