This analysis provides an examination of the the types of weather events that occur in the united states and specifically which events have the largest impact from the financial and health point of view. The data that this analysis is based on is available from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database which tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. The data that was used for this analysis is available here.
Documentation that is useful in understanding the data is availble from the National Weather Service and the National Climatic Data Center Storm Events FAQ.
In examining the data, the largest single event which causes death and injury is a tornado. The largest singe event which causes property and crop damage is a flood. A table of the events and their respective fatalities, injuries, and damage.
The data was read directly from the compresses archive and a lookup table was created with the valid event types per the documentation. The data was then filtered by valid events and limited to those events that had some kind of financial or health impact. It was then grouped by event and sums of impacts were gathered. Finally, the table of event impacts as well as plots of the impacts were created.
library(dplyr, warn.conflicts=FALSE)
library(lubridate)
library(stringr)
# Read in the raw data. Since it is CSV with headers, we will use read.csv with default options.
rawStormData <- read.csv("repdata-data-StormData.csv.bz2", stringsAsFactors=FALSE)
# Create an event type table used to filter the events down to those specifically know by the documentation
Known_Events <- c(
"ASTRONOMICAL LOW TIDE",
"AVALANCHE",
"BLIZZARD",
"COASTAL FLOOD",
"COLD/WIND CHILL",
"DEBRIS FLOW",
"DENSE FOG",
"DENSE SMOKE",
"DROUGHT",
"DUST DEVIL",
"DUST STORM",
"EXCESSIVE HEAT",
"EXTREME COLD/WIND CHILL",
"FLASH FLOOD",
"FLOOD",
"FROST/FREEZE",
"FUNNEL CLOUD",
"FREEZING FOG",
"HAIL",
"HEAT",
"HEAVY RAIN",
"HEAVY SNOW",
"HIGH SURF",
"HIGH WIND",
"HURRICANE (TYPHOON)",
"ICE STORM",
"LAKE-EFFECT SNOW",
"LAKESHORE FLOOD",
"LIGHTNING",
"MARINE HAIL",
"MARINE HIGH WIND",
"MARINE STRONG WIND",
"MARINE THUNDERSTORM WIND",
"RIP CURRENT",
"SEICHE",
"SLEET",
"STORM SURGE/TIDE",
"STRONG WIND",
"THUNDERSTORM WIND",
"TORNADO",
"TROPICAL DEPRESSION",
"TROPICAL STORM",
"TSUNAMI",
"VOLCANIC ASH",
"WATERSPOUT",
"WILDFIRE",
"WINTER STORM",
"WINTER WEATHER"
);
# Filter the events to those that are know and that have health or financial impact.
filteredSevereEvents <- tbl_df(filter(rawStormData, toupper(EVTYPE) %in% Known_Events,
((!is.na(F) & F > 0) | (!is.na(PROPDMG) & PROPDMG > 0))))
# Data summaries
str(filteredSevereEvents)
## Classes 'tbl_df', 'tbl' and 'data.frame': 166845 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
head(filteredSevereEvents)
## Source: local data frame [6 x 37]
##
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## Variables not shown: EVTYPE (chr), BGN_RANGE (dbl), BGN_AZI (chr),
## BGN_LOCATI (chr), END_DATE (chr), END_TIME (chr), COUNTY_END (dbl),
## COUNTYENDN (lgl), END_RANGE (dbl), END_AZI (chr), END_LOCATI (chr),
## LENGTH (dbl), WIDTH (dbl), F (int), MAG (dbl), FATALITIES (dbl),
## INJURIES (dbl), PROPDMG (dbl), PROPDMGEXP (chr), CROPDMG (dbl),
## CROPDMGEXP (chr), WFO (chr), STATEOFFIC (chr), ZONENAMES (chr), LATITUDE
## (dbl), LONGITUDE (dbl), LATITUDE_E (dbl), LONGITUDE_ (dbl), REMARKS
## (chr), REFNUM (dbl)
multiplier <- function(x) {
switch(toupper(x), "B"=1000000000, "M"=1000000, "K"=1000, "H"=100, 1)
}
restrictedSevereEvents <- select(filteredSevereEvents, BGN_DATE,
EVTYPE, F, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP,
CROPDMG, CROPDMGEXP) %>%
mutate(eventDate = mdy(str_trim(str_extract(BGN_DATE,".* "))),
propertyDamage = PROPDMG * sapply(PROPDMGEXP, multiplier),
cropDamage = CROPDMG * sapply(CROPDMGEXP,multiplier))
# Get summaries by event by year across event health and financial impacts
eventGroup <- group_by(restrictedSevereEvents,EVTYPE)
impactByEvent <- summarize(eventGroup,
count = n(),
fatalityTotal = sum(FATALITIES),
injuryTotal = sum(INJURIES),
damageTotal = sum(propertyDamage + cropDamage))
As stated before the largest single event which causes death and injury is a tornado. The largest singe event which causes property and crop damage is a flood. A table of the events and their respective fatalities, injuries, and damage.
#Raw data by event
impactByEvent
## Source: local data frame [50 x 5]
##
## EVTYPE count fatalityTotal injuryTotal damageTotal
## 1 ASTRONOMICAL LOW TIDE 2 0 0 320000
## 2 AVALANCHE 54 24 22 3721800
## 3 BLIZZARD 213 63 777 771273950
## 4 COASTAL FLOOD 161 0 1 237665560
## 5 COLD/WIND CHILL 14 0 10 1990000
## 6 Coastal Flood 4 0 0 21905000
## 7 DENSE FOG 66 17 254 9674000
## 8 DENSE SMOKE 1 0 0 100000
## 9 DROUGHT 54 0 4 2277861000
## 10 DUST DEVIL 81 1 27 700330
## 11 DUST STORM 76 5 181 7149000
## 12 Dust Devil 5 0 0 18300
## 13 EXCESSIVE HEAT 24 56 63 500153700
## 14 EXTREME COLD/WIND CHILL 19 0 0 8648000
## 15 FLASH FLOOD 20568 597 1505 17523102417
## 16 FLOOD 9710 357 6735 148731153257
## 17 FREEZING FOG 7 0 0 2182000
## 18 FROST/FREEZE 18 0 0 127580000
## 19 FUNNEL CLOUD 12 0 2 194600
## 20 Frost/Freeze 1 0 0 1100000
## 21 HAIL 23047 3 585 17610836646
## 22 HEAT 8 30 470 2432000
## 23 HEAVY RAIN 958 43 126 725223890
## 24 HEAVY SNOW 1237 57 734 1064231342
## 25 HIGH SURF 56 14 31 89575000
## 26 HIGH WIND 5332 153 872 5886431395
## 27 High Surf 2 0 0 380000
## 28 ICE STORM 667 40 1842 8967041360
## 29 LAKE-EFFECT SNOW 194 0 0 40115000
## 30 LAKESHORE FLOOD 5 0 0 7540000
## 31 LIGHTNING 10288 49 609 933638597
## 32 MARINE HAIL 2 0 0 4000
## 33 MARINE HIGH WIND 18 0 0 1297010
## 34 MARINE STRONG WIND 34 4 4 418330
## 35 MARINE THUNDERSTORM WIND 23 2 4 436400
## 36 RIP CURRENT 1 0 0 1000
## 37 SEICHE 9 0 0 980000
## 38 STORM SURGE/TIDE 47 11 5 4642038000
## 39 STRONG WIND 3207 49 176 239447950
## 40 Strong Wind 2 0 0 18000
## 41 THUNDERSTORM WIND 43269 86 1077 3819093834
## 42 TORNADO 44364 5626 91200 57346747389
## 43 TROPICAL DEPRESSION 35 0 0 1737000
## 44 TROPICAL STORM 396 45 336 8155550550
## 45 TSUNAMI 14 33 129 144082000
## 46 VOLCANIC ASH 2 0 0 500000
## 47 WATERSPOUT 45 2 28 9353700
## 48 WILDFIRE 733 65 684 4868037900
## 49 WINTER STORM 1387 80 1004 6711721251
## 50 WINTER WEATHER 373 12 173 20866000
# Create a plot showing impact overall
barplot(impactByEvent$fatalityTotal, names.arg=impactByEvent$EVTYPE)
barplot(impactByEvent$injuryTotal, names.arg=impactByEvent$EVTYPE)
barplot(impactByEvent$damageTotal, names.arg=impactByEvent$EVTYPE)