Reproducible Research Peer Assessment 2: Damages and Injuries caused by tornadoes versus other weather events

Synopsis: Tornadoes cause a disproportionate amount of property damage and injuries compared to other weather events in the United States. This report shows this by US region and injuries/fatalities in total.

Data Processing

The following libraries were used to create this report.

library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(stringr)
library(tidyr)
library(xts)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last

Get regions from US Census website

A file of US Census regions was downloaded from the census website and cleaned via the following code. This was used to plot the data by region. The US Census is a generally accepted source for population and region reporting.

getRegions <- function() {
    download.file("http://www2.census.gov/geo/docs/maps-data/maps/reg_div.txt", "reg_div.txt")
    reg_div = read.csv("reg_div.txt")
    names(reg_div) <- c('Description')
    #Pull out the division
    reg_div$DIVISION <- ifelse( str_detect( reg_div$Description, 'Division' ), str_extract( reg_div$Description, ":.+"), NA )
    reg_div$DIVISION <- str_replace( reg_div$DIVISION, ':', '')
    reg_div$DIVISION <- str_trim( reg_div$DIVISION )
    reg_div$DIVISION <- na.locf(reg_div$DIVISION, na.rm = F)

    #Pull out the regions
    reg_div$REGION <- ifelse( str_detect( reg_div$Description, 'REGION' ), str_extract( reg_div$Description, ":.+"), NA )
    reg_div$REGION <- str_replace( reg_div$REGION, ':', '')
    reg_div$REGION <- str_trim( reg_div$REGION )
    reg_div$REGION <- na.locf(reg_div$REGION, na.rm = F)

    #Pull out state numbers
    reg_div$STATENUM <- str_extract( reg_div$Description, "[:digit:]+")
    reg_div$STATENUM <- as.integer( reg_div$STATENUM )

    reg_div <- reg_div %>% filter( !is.na(STATENUM) & STATENUM < 100 & !str_detect(Description, 'Division') & !str_detect(Description, 'REGION') )

}

Read Storm data

The storm data was downloaded from the Coursera website and unzipped via the bzip2 algorithmn. The unzipped CSV file was read into R via the code below.

readData <- function() {
    storm <- read.csv("repdata-data-StormData.csv")
    storm <- storm %>% select(STATE__, STATE, EVTYPE, INJURIES, FATALITIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP )
    storm <- storm %>% rename( STATENUM = STATE__)
    storm <- tbl_df(storm)

    #Clean up EVTYPE
    storm$EVTYPE <- str_to_upper( as.character(storm$EVTYPE) )
    storm$EVTYPE <- factor( storm$EVTYPE )

    #Add Regions
    reg <- getRegions()
    storm <- merge( storm, reg, all.x = T)

    #Calculate Property and Crop damage
    storm$PROPDMG <- ifelse( storm$PROPDMGEXP == 'K', storm$PROPDMG * 1, ifelse( storm$PROPDMGEXP == 'M', storm$PROPDMG * 1000, ifelse( storm$PROPDMG == 'B', storm$PROPDMG * 1000000, storm$PROPDMG )))
    storm$CROPDMG <- ifelse( storm$CROPDMGEXP == 'K', storm$CROPDMG * 1, ifelse( storm$CROPDMGEXP == 'M', storm$CROPDMG * 1000, ifelse( storm$CROPDMG == 'B', storm$CROPDMG * 1000000, storm$CROPDMG )))

    tbl_df(storm)
}
storm <- readData()

The following functions were used to group the data before creating plots.

tornadoGrouping <- function( storm ) {
    injfat <- storm %>% group_by( EVTYPE ) %>% summarize( injuries = sum(INJURIES), fatalities = sum(FATALITIES), propdmg = sum(PROPDMG), cropdmg = sum(CROPDMG)  ) %>% arrange( desc(injuries), desc(fatalities) )
    injfat$tornado <- ifelse( injfat$EVTYPE == 'TORNADO', 'Tornado', 'Other')
    injfat$tornado <- factor( injfat$tornado )
    tornadoGroup <- injfat %>% group_by( tornado ) %>% summarize( injuries = sum(injuries), fatalities = sum(fatalities), propdmg = sum(propdmg), cropdmg = sum(cropdmg) )
    tornadoTidy <- tornadoGroup %>% gather( injuryType, val, c( injuries, fatalities, propdmg, cropdmg) )
    tornadoTidy$property <- ifelse( tornadoTidy$injuryType %in% c( 'propdmg', 'cropdmg') , 1, 0 )
    tornadoTidy
}

tornadoByRegion <- function( storm ) {
    injfat <- storm %>% group_by( REGION, EVTYPE ) %>% summarize( injuries = sum(INJURIES), fatalities = sum(FATALITIES), propdmg = sum(PROPDMG), cropdmg = sum(CROPDMG)  ) %>% arrange( desc(injuries), desc(fatalities) )
    injfat$tornado <- ifelse( injfat$EVTYPE == 'TORNADO', 'Tornado', 'Other')
    injfat$tornado <- factor( injfat$tornado )
    tornadoGroup <- injfat %>% group_by( REGION, tornado ) %>% summarize( injuries = sum(injuries), fatalities = sum(fatalities), propdmg = sum(propdmg), cropdmg = sum(cropdmg) )
    tornadoTidy <- tornadoGroup %>% gather( injuryType, val, c( injuries, fatalities, propdmg, cropdmg) )
    tornadoTidy$property <- ifelse( tornadoTidy$injuryType %in% c( 'propdmg', 'cropdmg') , 1, 0 )
    tornadoTidy
}

The three plots used in this analysis were created by the functions below.

tornadoBars <- function( tornadoData ) {
    injuryOnly <- tornadoData %>% filter( property == 0 )
    ggplot( injuryOnly, aes( x = tornado, y = val)) + geom_bar( stat = 'identity') + facet_wrap( ~ injuryType )
}

regionBars <- function( tornadoData ) {
    injuries <- tornadoData %>% filter( property == 0)
    plot1 <- ggplot( injuries, aes( x = REGION, y = val)) + geom_bar( stat = 'identity') + facet_wrap( injuryType ~ tornado )
    property <- tornadoData %>% filter( property == 1)
    plot2 <- ggplot( property, aes( x = REGION, y = val)) + geom_bar( stat = 'identity') + facet_wrap( injuryType ~ tornado )
    list( plot1, plot2 )
}

The first plot compares injuries/fatalities due to tornadoes versus other weather events. Tornadoes account for more injuries than all other weather events combined.

storm <- readData()
tornadoData <- tornadoGrouping( storm )
tornadoBars( tornadoData )

The next compares injuries/fatalities due to tornadoes versus other events, but also separates by region.

tornadoData <- tornadoByRegion( storm )
regionPlots <- regionBars( tornadoData )
regionPlots[[1]]

The last plot shows property and crop damage by region. Tornadoes cause a significant proportion of property damage. Tornadoes do not cause a significant portion of crop damage. Property and crop damages are by the 1000s.

regionPlots[[2]]

Results

Tornadoes account for the largest number of injuries, fatalities, and property damage. When combining all other events and comparing to injuries, fatalities and property damage, tornadoes are overwhelm any other events. Tornadoes are the most harmful to population health. Tornaodes also have the greatest economic impact – though their impact is much more significant when observing property damage versus cop damage.