Synopsis

This analysis provides an examination of the the types of weather events that occur in the united states and specifically which events have the largest impact from the financial and health point of view. The data that this analysis is based on is available from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database which tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. The data that was used for this analysis is available here.

Documentation that is useful in understanding the data is availble from the National Weather Service and the National Climatic Data Center Storm Events FAQ.

In examining the data, the largest single event which causes death and injury is a tornado. The largest singe event which causes property and crop damage is a flood. A table of the events and their respective fatalities, injuries, and damage.

Data Processing

The data was read directly from the compresses archive and a lookup table was created with the valid event types per the documentation. The data was then filtered by valid events and limited to those events that had some kind of financial or health impact. It was then grouped by event and sums of impacts were gathered. Finally, the table of event impacts as well as plots of the impacts were created.

library(dplyr, warn.conflicts=FALSE)
library(lubridate)
library(stringr)

# Read in the raw data.  Since it is CSV with headers, we will use read.csv with default options.
rawStormData <- read.csv("repdata-data-StormData.csv.bz2", stringsAsFactors=FALSE)

# Create an event type table used to filter the events down to those specifically know by the documentation 
Known_Events <- c(
"ASTRONOMICAL LOW TIDE",
"AVALANCHE",
"BLIZZARD",
"COASTAL FLOOD",
"COLD/WIND CHILL",
"DEBRIS FLOW",
"DENSE FOG",
"DENSE SMOKE",
"DROUGHT",
"DUST DEVIL",
"DUST STORM",
"EXCESSIVE HEAT",
"EXTREME COLD/WIND CHILL",
"FLASH FLOOD",
"FLOOD",
"FROST/FREEZE",
"FUNNEL CLOUD",
"FREEZING FOG",
"HAIL",
"HEAT",
"HEAVY RAIN",
"HEAVY SNOW",
"HIGH SURF",
"HIGH WIND",
"HURRICANE (TYPHOON)",
"ICE STORM",
"LAKE-EFFECT SNOW",
"LAKESHORE FLOOD",
"LIGHTNING",
"MARINE HAIL",
"MARINE HIGH WIND",
"MARINE STRONG WIND",
"MARINE THUNDERSTORM WIND",
"RIP CURRENT",
"SEICHE",
"SLEET",
"STORM SURGE/TIDE",
"STRONG WIND",
"THUNDERSTORM WIND",
"TORNADO",
"TROPICAL DEPRESSION",
"TROPICAL STORM",
"TSUNAMI",
"VOLCANIC ASH",
"WATERSPOUT",
"WILDFIRE",
"WINTER STORM",
"WINTER WEATHER"
);


# Filter the events to those that are know and that have health or financial impact.
filteredSevereEvents <- tbl_df(filter(rawStormData, toupper(EVTYPE) %in% Known_Events, 
                               ((!is.na(F) & F > 0) | (!is.na(PROPDMG) & PROPDMG > 0))))

# Data summaries
str(filteredSevereEvents)
## Classes 'tbl_df', 'tbl' and 'data.frame':    166845 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
head(filteredSevereEvents)
## Source: local data frame [6 x 37]
## 
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
## Variables not shown: EVTYPE (chr), BGN_RANGE (dbl), BGN_AZI (chr),
##   BGN_LOCATI (chr), END_DATE (chr), END_TIME (chr), COUNTY_END (dbl),
##   COUNTYENDN (lgl), END_RANGE (dbl), END_AZI (chr), END_LOCATI (chr),
##   LENGTH (dbl), WIDTH (dbl), F (int), MAG (dbl), FATALITIES (dbl),
##   INJURIES (dbl), PROPDMG (dbl), PROPDMGEXP (chr), CROPDMG (dbl),
##   CROPDMGEXP (chr), WFO (chr), STATEOFFIC (chr), ZONENAMES (chr), LATITUDE
##   (dbl), LONGITUDE (dbl), LATITUDE_E (dbl), LONGITUDE_ (dbl), REMARKS
##   (chr), REFNUM (dbl)
multiplier <- function(x) {
  switch(toupper(x), "B"=1000000000, "M"=1000000, "K"=1000, "H"=100, 1)
}

restrictedSevereEvents <- select(filteredSevereEvents, BGN_DATE,
                                    EVTYPE, F, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP,
                                    CROPDMG, CROPDMGEXP) %>%
  mutate(eventDate = mdy(str_trim(str_extract(BGN_DATE,".* "))),
         propertyDamage = PROPDMG * sapply(PROPDMGEXP, multiplier),
         cropDamage = CROPDMG * sapply(CROPDMGEXP,multiplier))

# Get summaries by event by year across event health and financial impacts
eventGroup <- group_by(restrictedSevereEvents,EVTYPE)

impactByEvent <- summarize(eventGroup,
                           count = n(),
                           fatalityTotal = sum(FATALITIES),
                           injuryTotal = sum(INJURIES),
                           damageTotal = sum(propertyDamage + cropDamage))

Results

As stated before the largest single event which causes death and injury is a tornado. The largest singe event which causes property and crop damage is a flood. A table of the events and their respective fatalities, injuries, and damage.

#Raw data by event
impactByEvent
## Source: local data frame [50 x 5]
## 
##                      EVTYPE count fatalityTotal injuryTotal  damageTotal
## 1     ASTRONOMICAL LOW TIDE     2             0           0       320000
## 2                 AVALANCHE    54            24          22      3721800
## 3                  BLIZZARD   213            63         777    771273950
## 4             COASTAL FLOOD   161             0           1    237665560
## 5           COLD/WIND CHILL    14             0          10      1990000
## 6             Coastal Flood     4             0           0     21905000
## 7                 DENSE FOG    66            17         254      9674000
## 8               DENSE SMOKE     1             0           0       100000
## 9                   DROUGHT    54             0           4   2277861000
## 10               DUST DEVIL    81             1          27       700330
## 11               DUST STORM    76             5         181      7149000
## 12               Dust Devil     5             0           0        18300
## 13           EXCESSIVE HEAT    24            56          63    500153700
## 14  EXTREME COLD/WIND CHILL    19             0           0      8648000
## 15              FLASH FLOOD 20568           597        1505  17523102417
## 16                    FLOOD  9710           357        6735 148731153257
## 17             FREEZING FOG     7             0           0      2182000
## 18             FROST/FREEZE    18             0           0    127580000
## 19             FUNNEL CLOUD    12             0           2       194600
## 20             Frost/Freeze     1             0           0      1100000
## 21                     HAIL 23047             3         585  17610836646
## 22                     HEAT     8            30         470      2432000
## 23               HEAVY RAIN   958            43         126    725223890
## 24               HEAVY SNOW  1237            57         734   1064231342
## 25                HIGH SURF    56            14          31     89575000
## 26                HIGH WIND  5332           153         872   5886431395
## 27                High Surf     2             0           0       380000
## 28                ICE STORM   667            40        1842   8967041360
## 29         LAKE-EFFECT SNOW   194             0           0     40115000
## 30          LAKESHORE FLOOD     5             0           0      7540000
## 31                LIGHTNING 10288            49         609    933638597
## 32              MARINE HAIL     2             0           0         4000
## 33         MARINE HIGH WIND    18             0           0      1297010
## 34       MARINE STRONG WIND    34             4           4       418330
## 35 MARINE THUNDERSTORM WIND    23             2           4       436400
## 36              RIP CURRENT     1             0           0         1000
## 37                   SEICHE     9             0           0       980000
## 38         STORM SURGE/TIDE    47            11           5   4642038000
## 39              STRONG WIND  3207            49         176    239447950
## 40              Strong Wind     2             0           0        18000
## 41        THUNDERSTORM WIND 43269            86        1077   3819093834
## 42                  TORNADO 44364          5626       91200  57346747389
## 43      TROPICAL DEPRESSION    35             0           0      1737000
## 44           TROPICAL STORM   396            45         336   8155550550
## 45                  TSUNAMI    14            33         129    144082000
## 46             VOLCANIC ASH     2             0           0       500000
## 47               WATERSPOUT    45             2          28      9353700
## 48                 WILDFIRE   733            65         684   4868037900
## 49             WINTER STORM  1387            80        1004   6711721251
## 50           WINTER WEATHER   373            12         173     20866000
# Create a plot showing impact overall
barplot(impactByEvent$fatalityTotal, names.arg=impactByEvent$EVTYPE)

barplot(impactByEvent$injuryTotal, names.arg=impactByEvent$EVTYPE)

barplot(impactByEvent$damageTotal, names.arg=impactByEvent$EVTYPE)