Synopsis: Since 1950 the National Oceanic and Atmospheric Administration (NOAA) has collected detailed storm data throughout the USA. Storm data from 1950 to 2011 are analyzed here in an attempt to determine (1) which type of storm events are most harmful to population health, and (2) which types of storm events have the greatest economic consequences. Results indicate that tornadoes cause the most fatalities and injuries, followed by heat, floods, and lightning. The highest property damage is caused by floods, hurricanes, tornadoes, and storm surges. Drought is the leading cause of crop damages, followed by floods, ice storms and hail. Maps are included to show which states had the most health and economic damages.

knitr::opts_chunk$set(echo = T, message = FALSE, warning = FALSE)
library(here)
library(tidyverse)
library(gridExtra)
library(maps)
# devtools::install_github("wmurphyrd/fiftystater")
library(fiftystater)
library(ggthemes)
library(mapproj)

Data Processing

NOAA storm data was downloaded from the Coursera website on June 8, 2020. Data is directly loaded into R and saved as an RDS file (which enables faster loading in future runs). If the data exists in an RDS file already then that is loaded instead.

# Load data from stored RDS file. If that does not exist, load from original .csv, but create RDS file to improve run time for future runs.
if(!exists('dat')){
    if(!file.exists(here('Data', 'storm_data.csv'))) {
        dat <- read.csv(here('Data', 'repdata_data_StormData.csv.bz2'))
        saveRDS(dat, file = here('Data', 'storm_data.csv'))
    } else {
        dat <- readRDS(here('Data', 'storm_data.csv')) 
    }
}

The following steps are used to process the data:

  1. All records with no fatalites, injuries, property damage, and crop damage are removed. This reduces the number of data records from ~900,000 to ~250,000.

  2. Actual damage is computed based on multipliers in the database. For example, the crop damage multiplier “CROPDMGEXP” is either ‘K’, ‘M’, or ‘B’. This indicates that the CROPDMG value is in thousands, millions, or billions. The actual damage calculation is done for both property damage and crop damage variables.

  3. Data is grouped by EVTYPE (the parameter that indicates storm type).

  4. The total fatalites, injuries, property damage, and crop damage are summed for each group.

  5. The top 5 storms for each group in step 4 are plotted to determine which storms have the greatest impacts to health and economy.

# remove records with no damage, injuries, or fatalities
dat1 <- dat %>% 
    filter(FATALITIES >0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0)

# Damage has a multiplier. Compute actual damage.
# K/k = thousand, M/m = million, B/b = billion
dat1 <- dat1 %>% 
    mutate(
        PROPDMG = case_when(
            PROPDMGEXP == 'K' | PROPDMGEXP == 'k' ~ PROPDMG * 1000,
            PROPDMGEXP == 'M' | PROPDMGEXP == 'm' ~ PROPDMG * 1000000,
            PROPDMGEXP == 'B' | PROPDMGEXP == 'b' ~ PROPDMG * 1000000000,
            TRUE ~ PROPDMG
        ),
        CROPDMG = case_when(
            CROPDMGEXP == 'K' | CROPDMGEXP == 'k' ~ CROPDMG * 1000,
            CROPDMGEXP == 'M' | CROPDMGEXP == 'm' ~ CROPDMG * 1000000,
            CROPDMGEXP == 'B' | CROPDMGEXP == 'b' ~ CROPDMG * 1000000000,
            TRUE ~ CROPDMG
            )
        )

# Summarize total damages based on storm type
dat2 <- dat1 %>%
    group_by(EVTYPE) %>% 
    summarise(fat = sum(FATALITIES),
              inj = sum(INJURIES),
              prop = sum(PROPDMG),
              crop = sum(CROPDMG))

# Select storms which cause the top 5 fatalities
dmg_fat <- dat2 %>% 
    slice_max(fat, n = 5) %>% 
    arrange(fat, desc = T)

# Select storms which cause the top 5 injuries
dmg_inj <- dat2 %>% 
    slice_max(inj, n = 5) %>% 
    arrange(inj, desc = T)

# Select storms which cause the most property damage
dmg_prop <- dat2 %>% 
    slice_max(prop, n = 5) %>% 
    mutate(prop = prop/1e9) %>% 
    arrange(prop, desc = T)

# Select storms which cause the most crop damage
dmg_crop <-  dat2 %>%  
    slice_max(crop, n = 5) %>% 
    mutate(crop = crop/1e9) %>% 
    arrange(crop, desc = T)

Results

The top five storm types that cause the most fatalities, injuries, property damage, and crop damage are shown in Figure 1. All damage categories include tornadoes and some type of flood. Tornadoes cause by far the most fatalities (over 6,000) and injuries (nearly 100,000) of all storm types. Floods cause the most property damage, totaling over $140,000,000,000 since 1950, and have been responsible for close to 1,000 fatalities and $6,000,000,000 in crop damage. The major cause of crop damage is drought, with an estimated $14,000,000,000 lost.

# Prepare canvas
par(mfrow = c(2, 2),
    mar = c(5, 8, 4, 2))

# Create barplots of the 4 categories
barplot(names = dmg_fat$EVTYPE,
        height = dmg_fat$fat,
        xlab = 'Number of Fatalities',
        cex.names = 0.7,
        horiz = T,
        las = 1)
title("Fatalities", line = 1, font.main = 1)

barplot(names = dmg_inj$EVTYPE,
        height = dmg_inj$inj,
        xlab = 'Number of Injuries',
        cex.names = 0.7,
        horiz = T,
        las = 1)
title("Injuries", line = 1, font.main = 1)

barplot(names = dmg_prop$EVTYPE,
        height = dmg_prop$prop,
        xlab = 'Dollars (billions)',
        cex.names = 0.7,
        horiz = T,
        las = 1)
title("Property Damage", line = 1, font.main = 1)

barplot(names = dmg_crop$EVTYPE,
        height = dmg_crop$crop,
        xlab = 'Dollars (billions)',
        cex.names = 0.7,
        horiz = T,
        las = 1)
title("Crop Damage", line = 1, font.main = 1)

# Create main title over all barplots
mtext("Figure 1. Top 5 Most Damaging Storm Types for Each Category", 
      side = 3, 
      line = -1, 
      outer = T, 
      font = 4,
      cex = 1.1)

Where does storm damage occur?

Storm damage and health data were grouped by state, then processed to determine which states had the most fatalities, injuries, and economic impacts. Figure 2 displays maps showing the impacts in each state. The highest number of fatalities occur in Texas and Illinois. The most injuries occur in Texas. Property damage is the highest in California, followed closely by Louisiana. The most crop damage occurs in Texas, followed by Mississippi.

# Grab state names and abbreviations
tbl <- state.x77 %>%
    as_tibble(rownames = "state") %>%
    bind_cols(state_name = state.abb) %>% 
    select(state, state_name) %>% 
    mutate(STATE = state_name)

# Calculate totals in each state
# Convert the dollar amounts to billions
datloc <- dat1 %>% 
    group_by(STATE) %>% 
    summarise(fat = sum(FATALITIES),
              inj = sum(INJURIES),
              prop = sum(PROPDMG)/1e9,
              crop = sum(CROPDMG)/1e9)

# Join the state names to the summarized data
datloc <- left_join(datloc, tbl, by = "STATE") %>% 
    na.omit()

# Need lowercase state names for ggplot::geom_map
datloc$region <- tolower(datloc$state)

# Create ggplot objects
p1 <- ggplot(datloc, aes(map_id = region)) + 
    geom_map(aes(fill = fat), map = fifty_states, color="white", size=0.1) + 
    scale_fill_viridis_c(option = 'C') +
    coord_map() +
    labs(title = 'Fatalities',
         fill = '') +
    expand_limits(x = fifty_states$long, y = fifty_states$lat) +
    ggthemes::theme_map() +
    theme(plot.title = element_text(hjust = 0.5),
          legend.position = 'right')

p2 <- ggplot(datloc, aes(map_id = region)) + 
    geom_map(aes(fill = inj), map = fifty_states, color="white", size=0.1) + 
    scale_fill_viridis_c(option = "C") +
    coord_map() +
    labs(title = 'Injuries',
         fill = '') +
    expand_limits(x = fifty_states$long, y = fifty_states$lat) +
    ggthemes::theme_map() +
    theme(plot.title = element_text(hjust = 0.5),
          legend.position = 'right')

p3 <- ggplot(datloc, aes(map_id = region)) + 
    geom_map(aes(fill = prop), map = fifty_states, color="white", size=0.1) + 
    scale_fill_viridis_c(option = "C") +
    coord_map() +
    labs(title = 'Property Damage (billions of $)',
         fill = '') +
    expand_limits(x = fifty_states$long, y = fifty_states$lat) +
    ggthemes::theme_map() +
    theme(plot.title = element_text(hjust = 0.5),
          legend.position = 'right')

p4 <- ggplot(datloc, aes(map_id = region)) + 
    geom_map(aes(fill = crop), map = fifty_states, color="white", size=0.1) + 
    scale_fill_viridis_c(option = "C") +
    coord_map() +
    labs(title = 'Crop Damage (billions of $)',
         fill = '') +
    expand_limits(x = fifty_states$long, y = fifty_states$lat) +
    ggthemes::theme_map() +
    theme(plot.title = element_text(hjust = 0.5),
          legend.position = 'right')

# Plot ggplot objects in grid fashion
grid.arrange(p1, p2, p3, p4, 
             nrow = 2,
             top = "Figure 2. Impacts to Health and Economy by State")