This analysis utilizes the NOAA Storm Data from years 1950 through 2012 to answer two questions: first, over the range of recorded observations, what type of storm event resulted in the greatest loss of human life; and second, what storm event resulted in the greatest amount of property damage.
theURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
if(!file.exists("StormData.csv.bz2")){
download.file(theURL,"StormData.csv.bz2")
}
if(!exists("stormData")){
stormData <- read.csv("StormData.csv.bz2")
}
As with most real-world data sets, the NOAA Storm Data data set suffers from data entry inconsistencies. For example, the EVTYPE (short for “event type”) variable, which will be central to the analysis performed to answer the questions about what type of weather event resulted in the greatest number of fatalities and the most property damage, uses over 900 unique factors. For example, the following 257 “event designators” are used to record events involving wind:
unique(stormData$EVTYPE[grepl("*[Ww][Ii][Nn][Dd]*",stormData$EVTYPE)])
## [1] TSTM WIND WINTER STORM
## [3] HURRICANE OPAL/HIGH WINDS THUNDERSTORM WINDS
## [5] THUNDERSTORM WIND THUNDERSTORM WINS
## [7] HIGH WINDS THUNDERSTORM WINDS LIGHTNING
## [9] THUNDERSTORM WINDS/HAIL WIND
## [11] THUNDERSTORM WINDS HAIL HIGH WIND
## [13] WIND CHILL HIGH WIND/BLIZZARD
## [15] HIGH WIND AND HIGH TIDES HIGH WIND/BLIZZARD/FREEZING RA
## [17] HIGH WIND AND HEAVY SNOW RECORD COLD AND HIGH WIND
## [19] HIGH WINDS HEAVY RAINS HIGH WIND/ BLIZZARD
## [21] BLIZZARD/HIGH WIND HIGH WIND/LOW WIND CHILL
## [23] HIGH WINDS AND WIND CHILL HEAVY SNOW/HIGH WINDS/FREEZING
## [25] WIND CHILL/HIGH WIND HIGH WIND/WIND CHILL/BLIZZARD
## [27] HIGH WIND/WIND CHILL HIGH WIND/HEAVY SNOW
## [29] HIGH WIND/SEAS HIGH WINDS/HEAVY RAIN
## [31] HEAVY SNOW/WIND WIND DAMAGE
## [33] THUNDERSTORM WINDS/FUNNEL CLOU WINTER STORM/HIGH WIND
## [35] WINTER STORM/HIGH WINDS GUSTY WINDS
## [37] STRONG WINDS SNOW AND WIND
## [39] BLOWING DUST HIGH WINDS DUST STORM
## [41] WINTER STORM HIGH WINDS WINTER STORMS
## [43] SEVERE THUNDERSTORM WINDS THUNDERSTORMS WINDS
## [45] FLOOD/RAIN/WINDS WINDS
## [47] FLASH FLOOD WINDS STRONG WIND
## [49] HIGH WIND DAMAGE FLOOD/RAIN/WIND
## [51] DOWNBURST WINDS DRY MICROBURST WINDS
## [53] DRY MIRCOBURST WINDS MICROBURST WINDS
## [55] HIGH WINDS 57 HIGH WINDS 66
## [57] HIGH WINDS 76 HIGH WINDS 63
## [59] HIGH WINDS 67 HEAVY SNOW/HIGH WINDS
## [61] BLOWING SNOW HIGH WINDS 82
## [63] HIGH WINDS 80 HIGH WINDS 58
## [65] LIGHTNING THUNDERSTORM WINDSS HIGH WINDS 73
## [67] HIGH WINDS 55 THUNDERSTORM WINDS 60
## [69] THUNDERSTORM WINDSS WINTRY MIX
## [71] WINTER WEATHER HIGH WINDS/FLOODING
## [73] TORNADOES, TSTM WIND, HAIL LIGHTNING THUNDERSTORM WINDS
## [75] LIGHTNING AND THUNDERSTORM WIN THUNDERSTORM WINDS53
## [77] THUNDERSTORM WINDS 13 HEAVY SNOW/HIGH WIND
## [79] HIGH WINDS/ EXTREME WIND CHILLS
## [81] HIGH WINDS EXTREME WIND CHILL
## [83] GRADIENT WINDS HEAVY SNOW/BLOWING SNOW
## [85] THUNDERSTORM WINDS URBAN FLOOD THUNDERSTORM WINDS SMALL STREA
## [87] BLOWING SNOW- EXTREME WIND CHI SNOW- HIGH WIND- WIND CHILL
## [89] THUNDERSTORM WINDS 2 TSTM WIND 51
## [91] TSTM WIND 50 TSTM WIND 52
## [93] TSTM WIND 55 THUNDERSTORM WINDS 61
## [95] THUNDERTORM WINDS HAIL/WINDS
## [97] WIND STORM HAIL/WIND
## [99] WIND/HAIL THUNDERSTORMS WIND
## [101] THUNDERSTORM WINDS TUNDERSTORM WIND
## [103] THUNDERTSORM WIND THUNDERSTORM WINDS/ HAIL
## [105] THUNDERSTORM WIND/LIGHTNING THUNDESTORM WINDS
## [107] HIGH WIND 63 HIGH WINDS/COASTAL FLOOD
## [109] THUNDERSTORM WIND G50 THUNDERSTORM WINDS/HEAVY RAIN
## [111] THUNDERSTROM WINDS THUNDERSTORM WINDS LE CEN
## [113] HEAVY SNOW ANDBLOWING SNOW BLIZZARD AND EXTREME WIND CHIL
## [115] LOW WIND CHILL BLOWING SNOW & EXTREME WIND CH
## [117] THUNDERSTORM WINDS G HEAVY SNOW/WINTER STORM
## [119] BLIZZARD/WINTER STORM DUST STORM/HIGH WINDS
## [121] THUNDERSTORM WIND G60 THUNDERSTORM WINDS.
## [123] THUNDERSTORM WIND G55 THUNDERSTORM WINDS G60
## [125] THUNDERSTORM WINDS FUNNEL CLOU THUNDERSTORM WINDS 62
## [127] HEAVY SNOW AND HIGH WINDS HEAVY SNOW/HIGH WINDS & FLOOD
## [129] THUNDERSTORM WINDS/FLASH FLOOD HIGH WIND 70
## [131] THUNDERSTORM WINDS 53 RAIN AND WIND
## [133] THUNDERSTORM WIND 59 THUNDERSTORM WIND 52
## [135] THUNDERSTORM WIND 69 LIGHTNING AND WINDS
## [137] TSTM WIND G58 THUNDERSTORMW WINDS
## [139] THUNDERSTORM WIND 60 MPH THUNDERSTORM WIND 65MPH
## [141] THUNDERSTORM WIND/ TREES THUNDERSTORM WIND/AWNING
## [143] THUNDERSTORM WIND 98 MPH THUNDERSTORM WIND TREES
## [145] THUNDERSTORM WIND 59 MPH THUNDERSTORM WINDS 63 MPH
## [147] THUNDERSTORM WIND/ TREE THUNDERSTORM WIND 65 MPH
## [149] THUNDERSTORM WIND. THUNDERSTORM WIND 59 MPH.
## [151] THUNDERSTORM WINDSHAIL THUDERSTORM WINDS
## [153] STORM FORCE WINDS THUNDERSTORM WINDS AND
## [155] HEAVY RAIN; URBAN FLOOD WINDS; TSTM WIND DAMAGE
## [157] RAIN/WIND THUNDERSTORM WINDS 50
## [159] THUNDERSTORM WIND G52 THUNDERSTORM WINDS 52
## [161] THUNDERSTORM WIND G51 THUNDERSTORM WIND G61
## [163] THUNDERESTORM WINDS THUNDERSTORM WINDS/FLOODING
## [165] THUNDEERSTORM WINDS THUNDERSTORM WIND 50
## [167] THUNERSTORM WINDS HIGH WINDS/COLD
## [169] COLD/WINDS THUNDERSTORM WIND 56
## [171] ICE/STRONG WINDS EXTREME WIND CHILL/BLOWING SNO
## [173] SNOW/HIGH WINDS HIGH WINDS/SNOW
## [175] HEAVY SNOW AND STRONG WINDS BLOWING SNOW/EXTREME WIND CHIL
## [177] SNOW/BLOWING SNOW THUNDERSTORM WIND/HAIL
## [179] TSTM WINDS TSTM WIND 65)
## [181] THUNDERSTORM WINDS/ FLOOD HIGH WIND AND SEAS
## [183] THUNDERSTORMWINDS THUNDERSTORM WINDS HEAVY RAIN
## [185] THUNDERSTROM WIND WINTER MIX
## [187] HIGH WIND 48 EXTREME WINDCHILL
## [189] TSTM WIND/HAIL High Wind
## [191] Tstm Wind Wind
## [193] Wind Damage Strong Wind
## [195] Heavy Rain and Wind Thunderstorm Wind
## [197] Wintry Mix blowing snow
## [199] HEAVY RAIN/WIND Winter Weather
## [201] Strong Winds Strong winds
## [203] Whirlwind Record Winter Snow
## [205] Gusty Wind Gradient wind
## [207] Gusty wind/rain GUSTY WIND/HVY RAIN
## [209] Blowing Snow TSTM WIND (G45)
## [211] Gusty Winds GUSTY WIND
## [213] TSTM WIND 40 TSTM WIND 45
## [215] TSTM WIND (41) TSTM WIND (G40)
## [217] Wintry mix TSTM WIND
## [219] STRONG WIND GUST Gusty winds
## [221] GRADIENT WIND Flood/Strong Wind
## [223] TSTM WIND AND LIGHTNING gradient wind
## [225] Heavy surf and wind WINTERY MIX
## [227] TSTM WIND (G45) TSTM WIND (G45)
## [229] HIGH WIND (G40) TSTM WIND (G35)
## [231] WAKE LOW WIND COLD WIND CHILL TEMPERATURES
## [233] BITTER WIND CHILL BITTER WIND CHILL TEMPERATURES
## [235] WIND ADVISORY GUSTY WIND/HAIL
## [237] EXTREME WINDCHILL TEMPERATURES WIND AND WAVE
## [239] WIND TSTM WIND G45
## [241] NON-SEVERE WIND DAMAGE THUNDERSTORM WIND (G40)
## [243] WIND GUSTS GUSTY LAKE WIND
## [245] WINTER WEATHER MIX NON-TSTM WIND
## [247] NON TSTM WIND GUSTY THUNDERSTORM WINDS
## [249] MARINE TSTM WIND WINTER WEATHER/MIX
## [251] WHIRLWIND EXTREME COLD/WIND CHILL
## [253] GUSTY THUNDERSTORM WIND COLD/WIND CHILL
## [255] MARINE HIGH WIND MARINE THUNDERSTORM WIND
## [257] MARINE STRONG WIND
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
Further, the abbreviation “TSTM” stands for “THUNDERSTORM”. If we look for event designators that are describing some permutation of the “THUNDERSTROM” designator, we find 93 event designators (some already included in the search for wind, and others not included).
unique(stormData$EVTYPE[grepl("*[Tt][Hh][Uu][Nn]*",stormData$EVTYPE)])
## [1] THUNDERSTORM WINDS THUNDERSTORM WIND
## [3] THUNDERSTORM WINS THUNDERSTORM WINDS LIGHTNING
## [5] THUNDERSTORM WINDS/HAIL THUNDERSTORM WINDS HAIL
## [7] FLASH FLOODING/THUNDERSTORM WI THUNDERSTORM
## [9] THUNDERSTORM WINDS/FUNNEL CLOU SEVERE THUNDERSTORM
## [11] SEVERE THUNDERSTORMS SEVERE THUNDERSTORM WINDS
## [13] THUNDERSTORMS WINDS THUNDERSTORMS
## [15] LIGHTNING THUNDERSTORM WINDSS THUNDERSTORM WINDS 60
## [17] THUNDERSTORM WINDSS LIGHTNING THUNDERSTORM WINDS
## [19] LIGHTNING AND THUNDERSTORM WIN THUNDERSTORM WINDS53
## [21] THUNDERSTORM WINDS 13 THUNDERSNOW
## [23] THUNDERSTORM WINDS URBAN FLOOD THUNDERSTORM WINDS SMALL STREA
## [25] THUNDERSTORM WINDS 2 THUNDERSTORM WINDS 61
## [27] THUNDERSTORM DAMAGE THUNDERTORM WINDS
## [29] THUNDERSTORMW 50 THUNDERSTORMS WIND
## [31] THUNDERSTORM WINDS THUNDERTSORM WIND
## [33] THUNDERSTORM WINDS/ HAIL THUNDERSTORM WIND/LIGHTNING
## [35] THUNDESTORM WINDS THUNDERSTORM WIND G50
## [37] THUNDERSTORM WINDS/HEAVY RAIN THUNDERSTROM WINDS
## [39] THUNDERSTORM WINDS LE CEN THUNDERSTORM WINDS G
## [41] THUNDERSTORM WIND G60 THUNDERSTORM WINDS.
## [43] THUNDERSTORM WIND G55 THUNDERSTORM WINDS G60
## [45] THUNDERSTORM WINDS FUNNEL CLOU THUNDERSTORM WINDS 62
## [47] THUNDERSTORM WINDS/FLASH FLOOD THUNDERSTORM WINDS 53
## [49] THUNDERSTORM WIND 59 THUNDERSTORM WIND 52
## [51] THUNDERSTORM WIND 69 THUNDERSTORMW WINDS
## [53] THUNDERSTORM WIND 60 MPH THUNDERSTORM WIND 65MPH
## [55] THUNDERSTORM WIND/ TREES THUNDERSTORM WIND/AWNING
## [57] THUNDERSTORM WIND 98 MPH THUNDERSTORM WIND TREES
## [59] THUNDERSTORM WIND 59 MPH THUNDERSTORM WINDS 63 MPH
## [61] THUNDERSTORM WIND/ TREE THUNDERSTORM DAMAGE TO
## [63] THUNDERSTORM WIND 65 MPH THUNDERSTORM WIND.
## [65] THUNDERSTORM WIND 59 MPH. THUNDERSTORM HAIL
## [67] THUNDERSTORM WINDSHAIL THUDERSTORM WINDS
## [69] THUNDERSTORM WINDS AND THUNDERSTORM WINDS 50
## [71] THUNDERSTORM WIND G52 THUNDERSTORM WINDS 52
## [73] THUNDERSTORM WIND G51 THUNDERSTORM WIND G61
## [75] THUNDERESTORM WINDS THUNDERSTORM WINDS/FLOODING
## [77] THUNDEERSTORM WINDS THUNDERSTORM W INDS
## [79] THUNDERSTORM WIND 50 THUNERSTORM WINDS
## [81] THUNDERSTORM WIND 56 THUNDERSTORM WIND/HAIL
## [83] THUNDERSTORMW THUNDERSTORM WINDS/ FLOOD
## [85] THUNDERSTORMWINDS THUNDERSTORM WINDS HEAVY RAIN
## [87] THUNDERSTROM WIND Thunderstorm Wind
## [89] Thundersnow shower THUNDERSTORM WIND (G40)
## [91] GUSTY THUNDERSTORM WINDS GUSTY THUNDERSTORM WIND
## [93] MARINE THUNDERSTORM WIND
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
We notice one entry in this set where “WIND” was misspelled as “WINS” in one case and “W IND” in another, and another instance where “THUNDERSTORM” was misspelled as “THNUDERSTORM”. The problem with these inconsistencies is that each of these variations is being treated as its own factor, so that when we attempt to aggregate the fatality or damage amounts on EVTYPE, all of these variations will be treated as distinct categories. However, the number of fatalities and the amount of damage attributed to tornadoes in the dataset is an outlier (vastly more than any individual category as the data presently stands), so the question becomes, if we were to aggregate the wind and thunderstorm event variations into a single factor, would that aggregate be greater than the tornado event type? In order to get a better picture of the fatalities and damage within event types, we will attempt to consolidate some of the variations within the event categories. For example, we will consolidate variations of events including the keywords “wind”, “thun” and “tstm” into the event type “WIND”. We will consolidate any variation on the event type “torn” into the event type “TORNADO”, and so on.
sd2 <- stormData
sd2$EVTYPE <- as.character(sd2$EVTYPE)
sd2$EVTYPE[grepl("*[Ww][Ii][Nn][Dd]*",sd2$EVTYPE)] <- "WIND"
sd2$EVTYPE[grepl("*[Tt][Hh][Uu][Nn]*",sd2$EVTYPE)] <- "WIND"
sd2$EVTYPE[grepl("*[Tt][Ss][Tt][Mm]*",sd2$EVTYPE)] <- "WIND"
sd2$EVTYPE[grepl("*[Tt][Oo][Rr][Nn]*",sd2$EVTYPE)] <- "TORNADO"
sd2$EVTYPE[grepl("*[Hh][Aa][Ii][Ll]*",sd2$EVTYPE)] <- "HAIL"
sd2$EVTYPE[grepl("[Ff][Ll][Oo][Oo][Dd]*",sd2$EVTYPE)] <- "FLOOD"
sd2$EVTYPE[grepl("[Ll][Ii[Gg][Hh][Tt]*",sd2$EVTYPE)] <- "LIGHTNING"
sd2$EVTYPE[grepl("*[Ii][Cc][Ee]*",sd2$EVTYPE)] <- "ICE"
sd2$EVTYPE[grepl("*[Ss][Nn][Oo][Ww]*",sd2$EVTYPE)] <- "SNOW"
sd2$EVTYPE[grepl("*[Rr][Aa][Ii][Nn]*",sd2$EVTYPE)] <- "RAIN"
sd2$EVTYPE[grepl("*[Hh][Ee][Aa][Tt]*",sd2$EVTYPE)] <- "HEAT"
sd2$EVTYPE[grepl("*[Ww][Aa][Rr][Mm]*",sd2$EVTYPE)] <- "HEAT"
sd2$EVTYPE[grepl("*[Hh][Oo][Tt]*",sd2$EVTYPE)] <- "HEAT"
sd2$EVTYPE[grepl("*[Cc][Oo][Ll][Dd]*",sd2$EVTYPE)] <- "COLD"
sd2$EVTYPE[grepl("*[Cc][Oo][Oo][Ll]*",sd2$EVTYPE)] <- "COLD"
sd2$EVTYPE[grepl("*[Ss][Uu][Mm][Mm]*",sd2$EVTYPE)] <- "SUMMARY"
sd2$EVTYPE[grepl("*[Mm][Uu][Dd]*",sd2$EVTYPE)] <- "MUD"
sd2$EVTYPE[grepl("*[Hh][Uu][Rr][Rr]*",sd2$EVTYPE)] <- "HURRICANE"
sd2$EVTYPE[grepl("*[Tt][Yy][Pp][Hh]*",sd2$EVTYPE)] <- "TYPHOON"
sd2$EVTYPE[grepl("*[Ss][Uu][Rr][Ff]*",sd2$EVTYPE)] <- "SURF"
sd2$EVTYPE[grepl("*[Dd][Uu][Ss][Tt]*",sd2$EVTYPE)] <- "DUST"
sd2$EVTYPE[grepl("*[Ff][Oo][Gg]*",sd2$EVTYPE)] <- "FOG"
sd2$EVTYPE <- as.factor(sd2$EVTYPE)
sd2Deaths <- sd2 %>%
group_by(EVTYPE) %>%
summarize(total_deaths = sum(FATALITIES))
sd2Prop <- sd2 %>%
group_by(EVTYPE) %>%
summarize(total_damage = sum(PROPDMG))
sd2Deaths[sd2Deaths$total_deaths==max(sd2Deaths$total_deaths),]
## # A tibble: 1 x 2
## EVTYPE total_deaths
## <fct> <dbl>
## 1 TORNADO 5841
plot(sd2Deaths)
sd2Prop[sd2Prop$total_damage==max(sd2Prop$total_damage),]
## # A tibble: 1 x 2
## EVTYPE total_damage
## <fct> <dbl>
## 1 TORNADO 3364343.
plot(sd2Prop)