Synopsis

This analysis examines the impact of storm events on communities in the United States covering the period from 1950 through 2011. Two types of impacts are noted: public health impacts and economic impacts. The public health impact is the sum of the fatalities and injuries, and the economic impact includes both property damage and crop damage. Unsurprisingly, Tornadoes top both lists, but for the most part the remaining public health impacts seem to lean toward extreme temperature events, while the remaining economic impacts are tilted toward high wind events.

Data Processing

Loading all necessary libraries:

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Reading the document from the supplied URL:

# Download the file
storm_data_url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(storm_data_url,"storm_data.csv.bz2",method="curl")
# Read the file
stormdata <- read_csv("storm_data.csv.bz2")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   BGN_DATE = col_character(),
##   BGN_TIME = col_character(),
##   TIME_ZONE = col_character(),
##   COUNTYNAME = col_character(),
##   STATE = col_character(),
##   EVTYPE = col_character(),
##   BGN_AZI = col_logical(),
##   BGN_LOCATI = col_logical(),
##   END_DATE = col_logical(),
##   END_TIME = col_logical(),
##   COUNTYENDN = col_logical(),
##   END_AZI = col_logical(),
##   END_LOCATI = col_logical(),
##   PROPDMGEXP = col_character(),
##   CROPDMGEXP = col_logical(),
##   WFO = col_logical(),
##   STATEOFFIC = col_logical(),
##   ZONENAMES = col_logical(),
##   REMARKS = col_logical()
## )
## See spec(...) for full column specifications.
## Warning: 5255570 parsing failures.
##  row col           expected actual                 file
## 1671 WFO 1/0/T/F/TRUE/FALSE     NG 'storm_data.csv.bz2'
## 1673 WFO 1/0/T/F/TRUE/FALSE     NG 'storm_data.csv.bz2'
## 1674 WFO 1/0/T/F/TRUE/FALSE     NG 'storm_data.csv.bz2'
## 1675 WFO 1/0/T/F/TRUE/FALSE     NG 'storm_data.csv.bz2'
## 1678 WFO 1/0/T/F/TRUE/FALSE     NG 'storm_data.csv.bz2'
## .... ... .................. ...... ....................
## See problems(...) for more details.

Analysis

Note: Assuming that “population health” includes both fatalities and injuries. Using the sum of those fields to quantify the impact on population health. Also, “economic damage” is assumed to be the sum of property damage and crop damage with the appropriate magnitude modifiers applied.

# Creating new column with the sum of FATALITIES and INJURIES
stormdata <- mutate(stormdata,PHEALTH_IMPACT = FATALITIES+INJURIES)
# Creating a new column with the sum of PROPDMG and CROPDMG
stormdata <- mutate(stormdata,
                    PMOD=case_when(PROPDMGEXP=="K" ~ 1000, PROPDMGEXP=="M" ~ 1000000),
                    PMOD=ifelse(is.na(PMOD),1,PMOD),
                    PDMG=PMOD*PROPDMG,
                    CMOD=case_when(CROPDMGEXP=="K" ~ 1000, CROPDMGEXP=="M" ~ 1000000),
                    CMOD=ifelse(is.na(CMOD),1,CMOD),
                    CDMG=CMOD*CROPDMG,
                    DMG =PDMG+CDMG)
# Grouping by Event Type (EVTYPE)
stormdata <- group_by(stormdata,EVTYPE)
# Creating a summary
EVTYPE_PHEALTH_IMPACT <- summarize(stormdata,PHEALTH_IMPACT=sum(PHEALTH_IMPACT))
# Sorting by impact
EVTYPE_PHEALTH_IMPACT <- arrange(EVTYPE_PHEALTH_IMPACT,desc(PHEALTH_IMPACT))
# Extracting the top 1% of events
HEALTH_TOP1 <- EVTYPE_PHEALTH_IMPACT[EVTYPE_PHEALTH_IMPACT$PHEALTH_IMPACT 
                                 >= quantile(EVTYPE_PHEALTH_IMPACT$PHEALTH_IMPACT,.99),]
HEALTH_TOP1 <- arrange(HEALTH_TOP1,PHEALTH_IMPACT)
# Showing the top 10 events by population health impacts
head(EVTYPE_PHEALTH_IMPACT,n=10)
## # A tibble: 10 x 2
##    EVTYPE            PHEALTH_IMPACT
##    <chr>                      <dbl>
##  1 TORNADO                    96979
##  2 EXCESSIVE HEAT              8428
##  3 TSTM WIND                   7461
##  4 FLOOD                       7259
##  5 LIGHTNING                   6046
##  6 HEAT                        3037
##  7 FLASH FLOOD                 2755
##  8 ICE STORM                   2064
##  9 THUNDERSTORM WIND           1621
## 10 WINTER STORM                1527
# Creating a summary
EVTYPE_ECON_IMPACT <- summarize(stormdata,ECON_IMPACT=sum(DMG))
# Sorting by impact
EVTYPE_ECON_IMPACT <- arrange(EVTYPE_ECON_IMPACT,desc(ECON_IMPACT))
# Extracting the top 1% of events
ECON_TOP1 <- EVTYPE_ECON_IMPACT[EVTYPE_ECON_IMPACT$ECON_IMPACT 
                                 >= quantile(EVTYPE_ECON_IMPACT$ECON_IMPACT,.99),]
ECON_TOP1 <- arrange(ECON_TOP1,ECON_IMPACT)
# Showing the top 10 events by economic impacts
head(EVTYPE_ECON_IMPACT,n=10)
## # A tibble: 10 x 2
##    EVTYPE             ECON_IMPACT
##    <chr>                    <dbl>
##  1 TORNADO           51625760814.
##  2 FLOOD             22157877967.
##  3 FLASH FLOOD       15141041269.
##  4 HAIL              13927946651.
##  5 HURRICANE          6168324355.
##  6 TSTM WIND          4493137698.
##  7 HIGH WIND          3970063580.
##  8 ICE STORM          3944929549.
##  9 HURRICANE/TYPHOON  3805844864.
## 10 WILDFIRE           3725118365.

Results

As stated in the synopsis, the top result for both economic impacts and health impacts was tornadoes, but the remaining order and severity of storm events by each criteria varies beyond that. The following figures show the top 1% in terms of damage by each criteria, and the accompanying bar indicates the magnitude of the severity.

par(mar=c(6,12,4,4),mfcol=c(1,1))
barplot(ECON_TOP1$ECON_IMPACT,names.arg = ECON_TOP1$EVTYPE,horiz = TRUE,las=2,
        cex.axis=0.8,main="Economic Impact of Storm Events", xlab = "Cost ($)")

barplot(HEALTH_TOP1$PHEALTH_IMPACT,names.arg = HEALTH_TOP1$EVTYPE,horiz = TRUE,las=2,
        cex.axis=0.8,main="Public Health Impact of Storm Events", xlab = "Lives Affected")