Synopsis

By using the data from NOAA Storm Database, I analyzed the data over the period 1996-2011. Find out that in 48 event types, the most harmful is Tornado, and Flood have the greatest economic consequences.

Data Processing

Setup the working directory.

This is my working environment.

sessionInfo()
## R version 3.1.2 (2014-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
## [1] digest_0.6.8    evaluate_0.5.5  formatR_1.0     htmltools_0.2.6
## [5] knitr_1.9       rmarkdown_0.4.2 stringr_0.6.2   tools_3.1.2    
## [9] yaml_2.1.13

Download the file.

download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "raw.csv.bz2", method = "curl", mode = "wb")

Load the data

eventwork <- read.csv("raw.csv.bz2", stringsAsFactors = FALSE)

IF we read this webpage from NOAA: http://www.ncdc.noaa.gov/stormevents/details.jsp

The 48 event types has been used since 1996, So I keep only the data from that period 1996-2011.

library("dplyr")
library("lubridate")
eventwork$BGN_DATE <- gsub(pattern = " 0:00:00$",replace = "", x= eventwork$BGN_DATE)
#clean date
eventwork$BGN_DATE <- mdy(eventwork$BGN_DATE)
#class date
eventwork <- filter(eventwork, BGN_DATE > ymd("1996-01-01") )
# the complete data since 1996-01-01.

eventwork <- filter(eventwork, !grepl(pattern = "summary",eventwork$EVTYPE, ignore.case = TRUE))

eventwork <- filter(eventwork, !grepl(pattern = "No Severe Weather",eventwork$EVTYPE, ignore.case = TRUE))
#remove strange event type.

Most of the even types are categorized according to the 48 kinds.

#library("dplyr")

#1
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "ASTRONOMICAL LOW TIDE" | eventwork$EVTYPE =="BLOW-OUT TIDE" | eventwork$EVTYPE =="BLOW-OUT TIDES", "Astronomical_Low_Tide"))
#2
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "AVALANCHE", "Avalanche"))
#3
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "Blizzard Summary" | eventwork$EVTYPE =="blowing snow" | eventwork$EVTYPE =="Blowing Snow", "Blizzard"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "SNOW SQUALL|blizzard",eventwork$EVTYPE, ignore.case = TRUE), "Blizzard"))

#4
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "ASTRONOMICAL HIGH TIDE" | eventwork$EVTYPE =="Beach Erosion" | eventwork$EVTYPE =="BEACH EROSION" | eventwork$EVTYPE =="COASTAL EROSION" | eventwork$EVTYPE =="Coastal Flood" | eventwork$EVTYPE =="COASTALFLOOD" | eventwork$EVTYPE ==" COASTAL FLOOD" | eventwork$EVTYPE =="COASTAL FLOOD" | eventwork$EVTYPE =="coastal flooding" | eventwork$EVTYPE =="Coastal Flooding" | eventwork$EVTYPE =="COASTAL FLOODING" | eventwork$EVTYPE =="COASTAL  FLOODING/EROSION" | eventwork$EVTYPE =="COASTAL FLOODING/EROSION" | eventwork$EVTYPE =="CSTL FLOODING/EROSION" | eventwork$EVTYPE =="Erosion/Cstl Flood" | eventwork$EVTYPE =="Tidal Flooding" | eventwork$EVTYPE =="TIDAL FLOODING", "Coastal_Flood"))
#5
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE,  eventwork$EVTYPE =="Cold" | eventwork$EVTYPE =="COLD" | eventwork$EVTYPE =="Cold and Frost" | eventwork$EVTYPE =="COLD AND FROST" | eventwork$EVTYPE =="COLD AND SNOW" | eventwork$EVTYPE =="Cold Temperature" | eventwork$EVTYPE =="COLD TEMPERATURES" | eventwork$EVTYPE =="COLD WEATHER" | eventwork$EVTYPE =="COLD/WIND CHILL" | eventwork$EVTYPE =="COLD WIND CHILL TEMPERATURES" | eventwork$EVTYPE =="COOL SPELL" | eventwork$EVTYPE =="Extended Cold" | eventwork$EVTYPE =="WIND CHILL" | eventwork$EVTYPE =="Prolong Cold" | eventwork$EVTYPE =="PROLONG COLD", "Cold_Wind_Chill"))

#6
#7
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "^fog|dense fog",eventwork$EVTYPE, ignore.case = TRUE), "DENSE_FOG"))

#8
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Smoke",eventwork$EVTYPE, ignore.case = TRUE), "Dense_Smoke"))

#9
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "ABNORMALLY DRY" | eventwork$EVTYPE =="DRIEST MONTH" | eventwork$EVTYPE =="DRY" | eventwork$EVTYPE =="DRY CONDITIONS" | eventwork$EVTYPE =="DRY MICROBURST" | eventwork$EVTYPE =="DRYNESS" | eventwork$EVTYPE =="DRY SPELL" | eventwork$EVTYPE =="DRY WEATHER" | eventwork$EVTYPE =="EXCESSIVELY DRY" | eventwork$EVTYPE =="RECORD LOW RAINFALL" | eventwork$EVTYPE =="Record dry month" | eventwork$EVTYPE =="RECORD DRYNESS" | eventwork$EVTYPE =="UNSEASONABLY DRY" | eventwork$EVTYPE =="VERY DRY", "Drought"))


#10
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "devil",eventwork$EVTYPE, ignore.case = TRUE), "Dust_Devil"))

#11
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "BLOWING DUST" | eventwork$EVTYPE == "DUST STORM", "Dust_Storm"))

#12
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "EXCESSIVE HEAT" | eventwork$EVTYPE =="EXCESSIVE HEAT/DROUGHT" | eventwork$EVTYPE =="HEAT WAVE", "Excessive_Heat"))

#13
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "Excessive Cold" | eventwork$EVTYPE == "BITTER WIND CHILL" | eventwork$EVTYPE =="BITTER WIND CHILL TEMPERATURES" | eventwork$EVTYPE =="EXTREME WIND CHILL" | eventwork$EVTYPE =="EXTREME WINDCHILL" | eventwork$EVTYPE =="Extreme Cold" | eventwork$EVTYPE =="EXTREME COLD" | eventwork$EVTYPE =="EXTREME COLD/WIND CHILL" | eventwork$EVTYPE =="EXTREME WINDCHILL TEMPERATURES", "Extreme_Cold_Wind_Chill"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "cool$|cold$|UNSEASONABLY COOL & WET",eventwork$EVTYPE, ignore.case = TRUE), "Extreme_Cold_Wind_Chill"))

#14
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "DAM BREAK" | eventwork$EVTYPE =="DROWNING", "Flash_Flood"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "flash flood",eventwork$EVTYPE, ignore.case = TRUE), "Flash_Flood"))

#15
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "^flood$|flood/|Minor Flood|RIVER FLOOD|SNOWMELT FLOOD|STREET FLOOD|Urban flood",eventwork$EVTYPE, ignore.case = TRUE), "Flood"))

#16
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "Early Frost" | eventwork$EVTYPE =="FIRST FROST", "Frost_Freeze"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "^Freeze|/freeze| freeze|damaging freeze",eventwork$EVTYPE, ignore.case = TRUE), "Frost_Freeze"))

#17
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "funnel cloud",eventwork$EVTYPE, ignore.case = TRUE), "Funnel Cloud"))

#18
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "freezing fog|ice fog",eventwork$EVTYPE, ignore.case = TRUE), "Freezing_Fog"))

#19
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "^hail|small hail|LATE SEASON HAIL",eventwork$EVTYPE, ignore.case = TRUE), "Hail"))

#20
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE =="ABNORMAL WARMTH", "HEAT"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "hot|heat",eventwork$EVTYPE, ignore.case = TRUE), "Heat"))

#21
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "ABNORMALLY WET" | eventwork$EVTYPE =="EARLY RAIN" | eventwork$EVTYPE =="EXCESSIVE RAIN" | eventwork$EVTYPE =="EXCESSIVE RAINFALL" | eventwork$EVTYPE =="EXTREMELY WET" | eventwork$EVTYPE =="RAIN", "Heavy_Rain"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "heavy rain|RECORD RAINFALL|RAIN (HEAVY)|TSTM HEAVY RAIN",eventwork$EVTYPE, ignore.case = TRUE), "Heavy_Rain"))

#22
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "HEAVY SNOW" | eventwork$EVTYPE =="Heavy snow shower" | eventwork$EVTYPE =="EXCESSIVE SNOW" | eventwork$EVTYPE =="RECORD SNOWFALL" | eventwork$EVTYPE =="RECORD SNOW" | eventwork$EVTYPE =="Record Winter Snow", "Heavy_Snow"))

#23
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "high surf",eventwork$EVTYPE, ignore.case = TRUE), "High Surf"))

#24
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "^wind$|^winds$|^high wind",eventwork$EVTYPE, ignore.case = TRUE), "High_Wind"))

#25
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Hurricane|typhoon",eventwork$EVTYPE, ignore.case = TRUE), "Hurricane_Typhoon"))

#26
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Ice Storm",eventwork$EVTYPE, ignore.case = TRUE), "Ice_Storm"))

#27
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Lake Effect Snow|Lake-Effect Snow",eventwork$EVTYPE, ignore.case = TRUE), "Lake_Effect_Snow"))


#28
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "LAKESHORE FLOOD",eventwork$EVTYPE, ignore.case = TRUE), "Lakeshore_Flood"))

#29
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Lightning",eventwork$EVTYPE, ignore.case = TRUE), "Lightning"))

#30
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Marine Hail",eventwork$EVTYPE, ignore.case = TRUE), "Marine_Hail"))

#31
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Marine High Wind",eventwork$EVTYPE, ignore.case = TRUE), "Marine_High_Wind"))

#32
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Marine Strong Wind",eventwork$EVTYPE, ignore.case = TRUE), "Marine_Strong_Wind"))

#33
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Marine Thunderstorm Wind|marine tstm",eventwork$EVTYPE, ignore.case = TRUE), "Marine_Thunderstorm_Wind"))

#34
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "rip",eventwork$EVTYPE, ignore.case = TRUE), "Rip_Current"))

#35
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Seiche",eventwork$EVTYPE, ignore.case = TRUE), "Seiche"))

#36
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "sleet",eventwork$EVTYPE, ignore.case = TRUE), "Sleet"))

#37
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Surge",eventwork$EVTYPE, ignore.case = TRUE), "Storm_Surge_Tide"))


#38
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "gusty wind|^strong wind|WIND GUSTS",eventwork$EVTYPE, ignore.case = TRUE), "Strong_Wind"))


#39
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "^thunder|gusty thunder|severe thunder|TSTM WND|^tstm wind|^ tstm wind",eventwork$EVTYPE, ignore.case = TRUE), "Thunderstorm_Wind"))

#40
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Tornado",eventwork$EVTYPE, ignore.case = TRUE), "Tornado"))

#41
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Tropical Depression",eventwork$EVTYPE, ignore.case = TRUE), "Tropical_Depression"))

#42
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Tropical Storm",eventwork$EVTYPE, ignore.case = TRUE), "Tropical_Storm"))

#43
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Tsunami",eventwork$EVTYPE, ignore.case = TRUE), "Tsunami"))

#44
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "ash$|ashfall$|plume$",eventwork$EVTYPE, ignore.case = TRUE), "Volcanic_Ash"))

#45
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Waterspout",eventwork$EVTYPE, ignore.case = TRUE), "Waterspout"))

#46
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "fire$",eventwork$EVTYPE, ignore.case = TRUE), "Wildfire"))

#47
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "snow squall|Winter Storm",eventwork$EVTYPE, ignore.case = TRUE), "Winter_Storm"))

#48
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "ACCUMULATED SNOWFALL" | eventwork$EVTYPE =="AGRICULTURAL FREEZE" | eventwork$EVTYPE =="Black Ice" | eventwork$EVTYPE =="BLACK ICE" | eventwork$EVTYPE =="Drifting Snow" | eventwork$EVTYPE =="FALLING SNOW/ICE" | eventwork$EVTYPE =="FIRST SNOW" | eventwork$EVTYPE =="FREEZING RAIN/SLEET" | eventwork$EVTYPE =="LIGHT FREEZING RAIN" | eventwork$EVTYPE =="SNOW/FREEZING RAIN", "Winter_Weather"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Winter weather",eventwork$EVTYPE, ignore.case = TRUE), "Winter_Weather"))



#sort(unique(eventwork$EVTYPE))

Find the most harmful event type by adding up the number of death and injury.

library("dplyr")
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
by_event <- group_by(eventwork, EVTYPE)
harm <- summarize(by_event, total_harm = sum(FATALITIES)+ sum(INJURIES))
most_harmful <- arrange(harm, desc(total_harm))[[1,1]]

Find the event type that cause the most economic impact.

eventwork <- mutate(eventwork, PROPDMGEXP= replace(eventwork$PROPDMGEXP, eventwork$PROPDMGEXP == "K", "1000"))
eventwork <- mutate(eventwork, PROPDMGEXP= replace(eventwork$PROPDMGEXP, eventwork$PROPDMGEXP == "", "0"))
eventwork <- mutate(eventwork, PROPDMGEXP= replace(eventwork$PROPDMGEXP, eventwork$PROPDMGEXP == "M", "1000000"))
eventwork <- mutate(eventwork, PROPDMGEXP= replace(eventwork$PROPDMGEXP, eventwork$PROPDMGEXP == "B", "1000000000"))

eventwork <- mutate(eventwork, CROPDMGEXP= replace(eventwork$CROPDMGEXP, eventwork$CROPDMGEXP == "K", "1000"))
eventwork <- mutate(eventwork, CROPDMGEXP= replace(eventwork$CROPDMGEXP, eventwork$CROPDMGEXP == "", "0"))
eventwork <- mutate(eventwork, CROPDMGEXP= replace(eventwork$CROPDMGEXP, eventwork$CROPDMGEXP == "M", "1000000"))
eventwork <- mutate(eventwork, CROPDMGEXP= replace(eventwork$CROPDMGEXP, eventwork$CROPDMGEXP == "B", "1000000000"))


eventwork <- mutate(eventwork, 
                   PROPDMG = as.numeric(eventwork$PROPDMG), 
                   PROPDMGEXP = as.numeric(eventwork$PROPDMGEXP),
                   CROPDMG = as.numeric(eventwork$CROPDMG), 
                   CROPDMGEXP = as.numeric(eventwork$CROPDMGEXP))


eventwork <- mutate(eventwork, 
                   pro_real = PROPDMG * PROPDMGEXP,
                   cro_real = CROPDMG * CROPDMGEXP,
                   total_dmg = pro_real + cro_real)
                   

by_event <- group_by(eventwork, EVTYPE)

tdmg <- summarize(by_event, total_event_dmg = sum(total_dmg))

evemost_damage <- arrange(tdmg, desc(total_event_dmg))[[1,1]]

Results

library(ggplot2)
most_10_harmful <- arrange(harm, desc(total_harm))[1:10,]
most_10_harmful <- arrange(most_10_harmful, total_harm)
#reorder

most_10_harmful$EVTYPE <- factor(most_10_harmful$EVTYPE, levels=unique(most_10_harmful$EVTYPE))
#factorize the variable "most_10_harmful$EVTYPE"

ggplot(most_10_harmful, aes(x=EVTYPE, y = total_harm)) + geom_bar(stat= "identity") + xlab("event type") +ylab("total number of casualty")+ coord_flip() + ggtitle("The Top 10 Most Harmful Events")

The most harmful event is Tornado.

#library(ggplot2)
most_10_ecodmg <- arrange(tdmg, desc(total_event_dmg))[1:10,]
most_10_ecodmg <- arrange(most_10_ecodmg, total_event_dmg)
#reorder

most_10_ecodmg$EVTYPE <- factor(most_10_ecodmg$EVTYPE, levels=unique(most_10_ecodmg$EVTYPE))
#factorize the variable "most_10_harmful$EVTYPE"

ggplot(most_10_ecodmg, aes(x=EVTYPE, y = total_event_dmg)) + geom_bar(stat= "identity") + xlab("event type") +ylab("economic lost (US dollar)")+ coord_flip() + ggtitle("The Top 10 Events Cause Most Economic Impact")

Across the United States, Flood have the greatest economic consequences