Synopsis

This report analyzes the U.S. National Oceanic and Atmospheric Administration (NOAA) storm database to identify which types of severe weather events have the greatest impact on population health and economic damage across the United States. The analysis uses storm data collected from 1950 through November 2011 and evaluates fatalities, injuries, property damage, and crop damage. Results show that tornadoes are responsible for the highest number of fatalities and injuries, while floods, hurricanes/typhoons, and storm surges cause the greatest economic losses. These findings help public officials prioritize resources for preparedness and mitigation.


Data Processing

Load the raw storm data

The analysis begins from the original raw compressed file (.csv.bz2).
Make sure this file is in your working directory before knitting.

storm <- read.csv("repdata_data_StormData.csv", stringsAsFactors = FALSE)
dim(storm)
## [1] 902297     37
head(storm)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6

Subset the required variables

We keep only the variables needed for the analysis.

storm_clean <- storm[, c("EVTYPE", "FATALITIES", "INJURIES",
                         "PROPDMG", "PROPDMGEXP",
                         "CROPDMG", "CROPDMGEXP")]

Convert property and crop damage to actual dollar amounts

The NOAA database uses multipliers such as K, M, B.

exp_map <- c("K" = 1e3, "k" = 1e3,
             "M" = 1e6, "m" = 1e6,
             "B" = 1e9, "b" = 1e9)

storm_clean$PROPDMGEXP[!(storm_clean$PROPDMGEXP %in% names(exp_map))] <- "0"
storm_clean$CROPDMGEXP[!(storm_clean$CROPDMGEXP %in% names(exp_map))] <- "0"

storm_clean$prop_mult <- ifelse(storm_clean$PROPDMGEXP == "0", 0,
                                exp_map[storm_clean$PROPDMGEXP])
storm_clean$crop_mult <- ifelse(storm_clean$CROPDMGEXP == "0", 0,
                                exp_map[storm_clean$CROPDMGEXP])

storm_clean$prop_damage <- storm_clean$PROPDMG * storm_clean$prop_mult
storm_clean$crop_damage <- storm_clean$CROPDMG * storm_clean$crop_mult

Aggregate population health impacts

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
health <- storm_clean %>%
  group_by(EVTYPE) %>%
  summarise(fatalities = sum(FATALITIES, na.rm = TRUE),
            injuries   = sum(INJURIES, na.rm = TRUE)) %>%
  arrange(desc(fatalities + injuries))

Top 10 events:

head(health, 10)
## # A tibble: 10 × 3
##    EVTYPE            fatalities injuries
##    <chr>                  <dbl>    <dbl>
##  1 TORNADO                 5633    91346
##  2 EXCESSIVE HEAT          1903     6525
##  3 TSTM WIND                504     6957
##  4 FLOOD                    470     6789
##  5 LIGHTNING                816     5230
##  6 HEAT                     937     2100
##  7 FLASH FLOOD              978     1777
##  8 ICE STORM                 89     1975
##  9 THUNDERSTORM WIND        133     1488
## 10 WINTER STORM             206     1321

Aggregate economic consequences

economic <- storm_clean %>%
  group_by(EVTYPE) %>%
  summarise(property = sum(prop_damage, na.rm = TRUE),
            crop     = sum(crop_damage, na.rm = TRUE),
            total    = property + crop) %>%
  arrange(desc(total))

Top 10 events:

head(economic, 10)
## # A tibble: 10 × 4
##    EVTYPE                property        crop        total
##    <chr>                    <dbl>       <dbl>        <dbl>
##  1 FLOOD             144657709800  5661968450 150319678250
##  2 HURRICANE/TYPHOON  69305840000  2607872800  71913712800
##  3 TORNADO            56937160480   414953110  57352113590
##  4 STORM SURGE        43323536000        5000  43323541000
##  5 HAIL               15732266720  3025954450  18758221170
##  6 FLASH FLOOD        16140811510  1421317100  17562128610
##  7 DROUGHT             1046106000 13972566000  15018672000
##  8 HURRICANE          11868319010  2741910000  14610229010
##  9 RIVER FLOOD         5118945500  5029459000  10148404500
## 10 ICE STORM           3944927810  5022113500   8967041310

Results

1. Events most harmful to population health

top_health <- health[1:10, ]

barplot(top_health$fatalities,
        names.arg = top_health$EVTYPE,
        las = 2,
        main = "Top 10 Weather Events by Fatalities",
        cex.names = 0.7)

Figure 1. Tornadoes cause the highest number of fatalities in the U.S.

barplot(top_health$injuries,
        names.arg = top_health$EVTYPE,
        las = 2,
        main = "Top 10 Weather Events by Injuries",
        cex.names = 0.7)

Figure 2. Tornadoes also cause the most injuries by a significant margin.


2. Events with greatest economic consequences

top_econ <- economic[1:10, ]

barplot(top_econ$total,
        names.arg = top_econ$EVTYPE,
        las = 2,
        main = "Top 10 Weather Events by Economic Damage",
        ylab = "Damage (USD)",
        cex.names = 0.7)

Figure 3. Floods, hurricanes/typhoons, and storm surges are the most economically damaging.


Conclusion