Synopsis

This report analyzes the NOAA Storm Database to determine which event types have the greatest impact on population health and economic consequences across the United States. The analysis uses raw storm data from 1950 to 2011 and summarizes fatalities, injuries, property damage, and crop damage to identify the most harmful weather events.

Data Processing

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# Download file if not available
if(!file.exists("stormdata.csv.bz2")){
    download.file(
        "https://d396qusza40orc.cloudfront.net/repdata/data/StormData.csv.bz2",
        destfile = "stormdata.csv.bz2"
    )
}

storm <- read.csv("stormdata.csv.bz2")

df <- storm %>%
select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP,
CROPDMG, CROPDMGEXP)

exp_map <- c("K"=1000, "M"=1e6, "B"=1e9, "k"=1000, "m"=1e6, "b"=1e9)

df$PROPDMGEXP <- exp_map[df$PROPDMGEXP]
df$CROPDMGEXP <- exp_map[df$CROPDMGEXP]

df$PROPDMGEXP[is.na(df$PROPDMGEXP)] <- 1
df$CROPDMGEXP[is.na(df$CROPDMGEXP)] <- 1

df$PROP_TOTAL <- df$PROPDMG * df$PROPDMGEXP
df$CROP_TOTAL <- df$CROPDMG * df$CROPDMGEXP
df$ECONOMIC_TOTAL <- df$PROP_TOTAL + df$CROP_TOTAL

health <- df %>%
group_by(EVTYPE) %>%
summarise(
total_fatalities = sum(FATALITIES, na.rm=TRUE),
total_injuries = sum(INJURIES, na.rm=TRUE),
total_health = total_fatalities + total_injuries
) %>%
arrange(desc(total_health)) %>%
head(10)

health
## # A tibble: 10 × 4
##    EVTYPE            total_fatalities total_injuries total_health
##    <chr>                        <dbl>          <dbl>        <dbl>
##  1 TORNADO                       5633          91346        96979
##  2 EXCESSIVE HEAT                1903           6525         8428
##  3 TSTM WIND                      504           6957         7461
##  4 FLOOD                          470           6789         7259
##  5 LIGHTNING                      816           5230         6046
##  6 HEAT                           937           2100         3037
##  7 FLASH FLOOD                    978           1777         2755
##  8 ICE STORM                       89           1975         2064
##  9 THUNDERSTORM WIND              133           1488         1621
## 10 WINTER STORM                   206           1321         1527
ggplot(health, aes(x=reorder(EVTYPE, total_health), y=total_health)) +
geom_col(fill="red") +
coord_flip() +
labs(title="Top 10 Most Harmful Events to Population Health",
x="Event Type", y="Fatalities + Injuries")

econ <- df %>%
group_by(EVTYPE) %>%
summarise(
total_prop = sum(PROP_TOTAL, na.rm=TRUE),
total_crop = sum(CROP_TOTAL, na.rm=TRUE),
total_econ = sum(ECONOMIC_TOTAL, na.rm=TRUE)
) %>%
arrange(desc(total_econ)) %>%
head(10)

econ
## # A tibble: 10 × 4
##    EVTYPE               total_prop  total_crop    total_econ
##    <chr>                     <dbl>       <dbl>         <dbl>
##  1 FLOOD             144657709807   5661968450 150319678257 
##  2 HURRICANE/TYPHOON  69305840000   2607872800  71913712800 
##  3 TORNADO            56937160779.   414953270  57352114049.
##  4 STORM SURGE        43323536000         5000  43323541000 
##  5 HAIL               15732267048.  3025954473  18758221521.
##  6 FLASH FLOOD        16140812067.  1421317100  17562129167.
##  7 DROUGHT             1046106000  13972566000  15018672000 
##  8 HURRICANE          11868319010   2741910000  14610229010 
##  9 RIVER FLOOD         5118945500   5029459000  10148404500 
## 10 ICE STORM           3944927860   5022113500   8967041360
ggplot(econ, aes(x=reorder(EVTYPE, total_econ), y=total_econ)) +
geom_col(fill="darkblue") +
coord_flip() +
labs(title="Top 10 Events with Greatest Economic Consequences",
x="Event Type", y="Economic Damage (USD)")