By using the data from NOAA Storm Database, I analyzed the data over the period 1996-2011. Find out that in 48 event types, the most harmful is Tornado, and Flood have the greatest economic consequences.
Setup the working directory.
This is my working environment.
sessionInfo()
## R version 3.1.2 (2014-10-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.8 evaluate_0.5.5 formatR_1.0 htmltools_0.2.6
## [5] knitr_1.9 rmarkdown_0.4.2 stringr_0.6.2 tools_3.1.2
## [9] yaml_2.1.13
Download the file.
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "raw.csv.bz2", method = "curl", mode = "wb")
Load the data
eventwork <- read.csv("raw.csv.bz2", stringsAsFactors = FALSE)
IF we read this webpage from NOAA: http://www.ncdc.noaa.gov/stormevents/details.jsp
The 48 event types has been used since 1996, So I keep only the data from that period 1996-2011.
library("dplyr")
library("lubridate")
eventwork$BGN_DATE <- gsub(pattern = " 0:00:00$",replace = "", x= eventwork$BGN_DATE)
#clean date
eventwork$BGN_DATE <- mdy(eventwork$BGN_DATE)
#class date
eventwork <- filter(eventwork, BGN_DATE > ymd("1996-01-01") )
# the complete data since 1996-01-01.
eventwork <- filter(eventwork, !grepl(pattern = "summary",eventwork$EVTYPE, ignore.case = TRUE))
eventwork <- filter(eventwork, !grepl(pattern = "No Severe Weather",eventwork$EVTYPE, ignore.case = TRUE))
#remove strange event type.
Most of the even types are categorized according to the 48 kinds.
#library("dplyr")
#1
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "ASTRONOMICAL LOW TIDE" | eventwork$EVTYPE =="BLOW-OUT TIDE" | eventwork$EVTYPE =="BLOW-OUT TIDES", "Astronomical_Low_Tide"))
#2
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "AVALANCHE", "Avalanche"))
#3
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "Blizzard Summary" | eventwork$EVTYPE =="blowing snow" | eventwork$EVTYPE =="Blowing Snow", "Blizzard"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "SNOW SQUALL|blizzard",eventwork$EVTYPE, ignore.case = TRUE), "Blizzard"))
#4
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "ASTRONOMICAL HIGH TIDE" | eventwork$EVTYPE =="Beach Erosion" | eventwork$EVTYPE =="BEACH EROSION" | eventwork$EVTYPE =="COASTAL EROSION" | eventwork$EVTYPE =="Coastal Flood" | eventwork$EVTYPE =="COASTALFLOOD" | eventwork$EVTYPE ==" COASTAL FLOOD" | eventwork$EVTYPE =="COASTAL FLOOD" | eventwork$EVTYPE =="coastal flooding" | eventwork$EVTYPE =="Coastal Flooding" | eventwork$EVTYPE =="COASTAL FLOODING" | eventwork$EVTYPE =="COASTAL FLOODING/EROSION" | eventwork$EVTYPE =="COASTAL FLOODING/EROSION" | eventwork$EVTYPE =="CSTL FLOODING/EROSION" | eventwork$EVTYPE =="Erosion/Cstl Flood" | eventwork$EVTYPE =="Tidal Flooding" | eventwork$EVTYPE =="TIDAL FLOODING", "Coastal_Flood"))
#5
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE =="Cold" | eventwork$EVTYPE =="COLD" | eventwork$EVTYPE =="Cold and Frost" | eventwork$EVTYPE =="COLD AND FROST" | eventwork$EVTYPE =="COLD AND SNOW" | eventwork$EVTYPE =="Cold Temperature" | eventwork$EVTYPE =="COLD TEMPERATURES" | eventwork$EVTYPE =="COLD WEATHER" | eventwork$EVTYPE =="COLD/WIND CHILL" | eventwork$EVTYPE =="COLD WIND CHILL TEMPERATURES" | eventwork$EVTYPE =="COOL SPELL" | eventwork$EVTYPE =="Extended Cold" | eventwork$EVTYPE =="WIND CHILL" | eventwork$EVTYPE =="Prolong Cold" | eventwork$EVTYPE =="PROLONG COLD", "Cold_Wind_Chill"))
#6
#7
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "^fog|dense fog",eventwork$EVTYPE, ignore.case = TRUE), "DENSE_FOG"))
#8
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Smoke",eventwork$EVTYPE, ignore.case = TRUE), "Dense_Smoke"))
#9
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "ABNORMALLY DRY" | eventwork$EVTYPE =="DRIEST MONTH" | eventwork$EVTYPE =="DRY" | eventwork$EVTYPE =="DRY CONDITIONS" | eventwork$EVTYPE =="DRY MICROBURST" | eventwork$EVTYPE =="DRYNESS" | eventwork$EVTYPE =="DRY SPELL" | eventwork$EVTYPE =="DRY WEATHER" | eventwork$EVTYPE =="EXCESSIVELY DRY" | eventwork$EVTYPE =="RECORD LOW RAINFALL" | eventwork$EVTYPE =="Record dry month" | eventwork$EVTYPE =="RECORD DRYNESS" | eventwork$EVTYPE =="UNSEASONABLY DRY" | eventwork$EVTYPE =="VERY DRY", "Drought"))
#10
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "devil",eventwork$EVTYPE, ignore.case = TRUE), "Dust_Devil"))
#11
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "BLOWING DUST" | eventwork$EVTYPE == "DUST STORM", "Dust_Storm"))
#12
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "EXCESSIVE HEAT" | eventwork$EVTYPE =="EXCESSIVE HEAT/DROUGHT" | eventwork$EVTYPE =="HEAT WAVE", "Excessive_Heat"))
#13
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "Excessive Cold" | eventwork$EVTYPE == "BITTER WIND CHILL" | eventwork$EVTYPE =="BITTER WIND CHILL TEMPERATURES" | eventwork$EVTYPE =="EXTREME WIND CHILL" | eventwork$EVTYPE =="EXTREME WINDCHILL" | eventwork$EVTYPE =="Extreme Cold" | eventwork$EVTYPE =="EXTREME COLD" | eventwork$EVTYPE =="EXTREME COLD/WIND CHILL" | eventwork$EVTYPE =="EXTREME WINDCHILL TEMPERATURES", "Extreme_Cold_Wind_Chill"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "cool$|cold$|UNSEASONABLY COOL & WET",eventwork$EVTYPE, ignore.case = TRUE), "Extreme_Cold_Wind_Chill"))
#14
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "DAM BREAK" | eventwork$EVTYPE =="DROWNING", "Flash_Flood"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "flash flood",eventwork$EVTYPE, ignore.case = TRUE), "Flash_Flood"))
#15
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "^flood$|flood/|Minor Flood|RIVER FLOOD|SNOWMELT FLOOD|STREET FLOOD|Urban flood",eventwork$EVTYPE, ignore.case = TRUE), "Flood"))
#16
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "Early Frost" | eventwork$EVTYPE =="FIRST FROST", "Frost_Freeze"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "^Freeze|/freeze| freeze|damaging freeze",eventwork$EVTYPE, ignore.case = TRUE), "Frost_Freeze"))
#17
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "funnel cloud",eventwork$EVTYPE, ignore.case = TRUE), "Funnel Cloud"))
#18
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "freezing fog|ice fog",eventwork$EVTYPE, ignore.case = TRUE), "Freezing_Fog"))
#19
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "^hail|small hail|LATE SEASON HAIL",eventwork$EVTYPE, ignore.case = TRUE), "Hail"))
#20
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE =="ABNORMAL WARMTH", "HEAT"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "hot|heat",eventwork$EVTYPE, ignore.case = TRUE), "Heat"))
#21
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "ABNORMALLY WET" | eventwork$EVTYPE =="EARLY RAIN" | eventwork$EVTYPE =="EXCESSIVE RAIN" | eventwork$EVTYPE =="EXCESSIVE RAINFALL" | eventwork$EVTYPE =="EXTREMELY WET" | eventwork$EVTYPE =="RAIN", "Heavy_Rain"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "heavy rain|RECORD RAINFALL|RAIN (HEAVY)|TSTM HEAVY RAIN",eventwork$EVTYPE, ignore.case = TRUE), "Heavy_Rain"))
#22
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "HEAVY SNOW" | eventwork$EVTYPE =="Heavy snow shower" | eventwork$EVTYPE =="EXCESSIVE SNOW" | eventwork$EVTYPE =="RECORD SNOWFALL" | eventwork$EVTYPE =="RECORD SNOW" | eventwork$EVTYPE =="Record Winter Snow", "Heavy_Snow"))
#23
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "high surf",eventwork$EVTYPE, ignore.case = TRUE), "High Surf"))
#24
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "^wind$|^winds$|^high wind",eventwork$EVTYPE, ignore.case = TRUE), "High_Wind"))
#25
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Hurricane|typhoon",eventwork$EVTYPE, ignore.case = TRUE), "Hurricane_Typhoon"))
#26
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Ice Storm",eventwork$EVTYPE, ignore.case = TRUE), "Ice_Storm"))
#27
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Lake Effect Snow|Lake-Effect Snow",eventwork$EVTYPE, ignore.case = TRUE), "Lake_Effect_Snow"))
#28
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "LAKESHORE FLOOD",eventwork$EVTYPE, ignore.case = TRUE), "Lakeshore_Flood"))
#29
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Lightning",eventwork$EVTYPE, ignore.case = TRUE), "Lightning"))
#30
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Marine Hail",eventwork$EVTYPE, ignore.case = TRUE), "Marine_Hail"))
#31
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Marine High Wind",eventwork$EVTYPE, ignore.case = TRUE), "Marine_High_Wind"))
#32
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Marine Strong Wind",eventwork$EVTYPE, ignore.case = TRUE), "Marine_Strong_Wind"))
#33
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Marine Thunderstorm Wind|marine tstm",eventwork$EVTYPE, ignore.case = TRUE), "Marine_Thunderstorm_Wind"))
#34
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "rip",eventwork$EVTYPE, ignore.case = TRUE), "Rip_Current"))
#35
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Seiche",eventwork$EVTYPE, ignore.case = TRUE), "Seiche"))
#36
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "sleet",eventwork$EVTYPE, ignore.case = TRUE), "Sleet"))
#37
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Surge",eventwork$EVTYPE, ignore.case = TRUE), "Storm_Surge_Tide"))
#38
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "gusty wind|^strong wind|WIND GUSTS",eventwork$EVTYPE, ignore.case = TRUE), "Strong_Wind"))
#39
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "^thunder|gusty thunder|severe thunder|TSTM WND|^tstm wind|^ tstm wind",eventwork$EVTYPE, ignore.case = TRUE), "Thunderstorm_Wind"))
#40
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Tornado",eventwork$EVTYPE, ignore.case = TRUE), "Tornado"))
#41
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Tropical Depression",eventwork$EVTYPE, ignore.case = TRUE), "Tropical_Depression"))
#42
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Tropical Storm",eventwork$EVTYPE, ignore.case = TRUE), "Tropical_Storm"))
#43
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Tsunami",eventwork$EVTYPE, ignore.case = TRUE), "Tsunami"))
#44
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "ash$|ashfall$|plume$",eventwork$EVTYPE, ignore.case = TRUE), "Volcanic_Ash"))
#45
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Waterspout",eventwork$EVTYPE, ignore.case = TRUE), "Waterspout"))
#46
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "fire$",eventwork$EVTYPE, ignore.case = TRUE), "Wildfire"))
#47
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "snow squall|Winter Storm",eventwork$EVTYPE, ignore.case = TRUE), "Winter_Storm"))
#48
eventwork <- mutate(eventwork, EVTYPE= replace(eventwork$EVTYPE, eventwork$EVTYPE == "ACCUMULATED SNOWFALL" | eventwork$EVTYPE =="AGRICULTURAL FREEZE" | eventwork$EVTYPE =="Black Ice" | eventwork$EVTYPE =="BLACK ICE" | eventwork$EVTYPE =="Drifting Snow" | eventwork$EVTYPE =="FALLING SNOW/ICE" | eventwork$EVTYPE =="FIRST SNOW" | eventwork$EVTYPE =="FREEZING RAIN/SLEET" | eventwork$EVTYPE =="LIGHT FREEZING RAIN" | eventwork$EVTYPE =="SNOW/FREEZING RAIN", "Winter_Weather"))
eventwork <- mutate(eventwork, EVTYPE = replace(eventwork$EVTYPE,grepl(pattern = "Winter weather",eventwork$EVTYPE, ignore.case = TRUE), "Winter_Weather"))
#sort(unique(eventwork$EVTYPE))
Find the most harmful event type by adding up the number of death and injury.
library("dplyr")
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
by_event <- group_by(eventwork, EVTYPE)
harm <- summarize(by_event, total_harm = sum(FATALITIES)+ sum(INJURIES))
most_harmful <- arrange(harm, desc(total_harm))[[1,1]]
Find the event type that cause the most economic impact.
eventwork <- mutate(eventwork, PROPDMGEXP= replace(eventwork$PROPDMGEXP, eventwork$PROPDMGEXP == "K", "1000"))
eventwork <- mutate(eventwork, PROPDMGEXP= replace(eventwork$PROPDMGEXP, eventwork$PROPDMGEXP == "", "0"))
eventwork <- mutate(eventwork, PROPDMGEXP= replace(eventwork$PROPDMGEXP, eventwork$PROPDMGEXP == "M", "1000000"))
eventwork <- mutate(eventwork, PROPDMGEXP= replace(eventwork$PROPDMGEXP, eventwork$PROPDMGEXP == "B", "1000000000"))
eventwork <- mutate(eventwork, CROPDMGEXP= replace(eventwork$CROPDMGEXP, eventwork$CROPDMGEXP == "K", "1000"))
eventwork <- mutate(eventwork, CROPDMGEXP= replace(eventwork$CROPDMGEXP, eventwork$CROPDMGEXP == "", "0"))
eventwork <- mutate(eventwork, CROPDMGEXP= replace(eventwork$CROPDMGEXP, eventwork$CROPDMGEXP == "M", "1000000"))
eventwork <- mutate(eventwork, CROPDMGEXP= replace(eventwork$CROPDMGEXP, eventwork$CROPDMGEXP == "B", "1000000000"))
eventwork <- mutate(eventwork,
PROPDMG = as.numeric(eventwork$PROPDMG),
PROPDMGEXP = as.numeric(eventwork$PROPDMGEXP),
CROPDMG = as.numeric(eventwork$CROPDMG),
CROPDMGEXP = as.numeric(eventwork$CROPDMGEXP))
eventwork <- mutate(eventwork,
pro_real = PROPDMG * PROPDMGEXP,
cro_real = CROPDMG * CROPDMGEXP,
total_dmg = pro_real + cro_real)
by_event <- group_by(eventwork, EVTYPE)
tdmg <- summarize(by_event, total_event_dmg = sum(total_dmg))
evemost_damage <- arrange(tdmg, desc(total_event_dmg))[[1,1]]
library(ggplot2)
most_10_harmful <- arrange(harm, desc(total_harm))[1:10,]
most_10_harmful <- arrange(most_10_harmful, total_harm)
#reorder
most_10_harmful$EVTYPE <- factor(most_10_harmful$EVTYPE, levels=unique(most_10_harmful$EVTYPE))
#factorize the variable "most_10_harmful$EVTYPE"
ggplot(most_10_harmful, aes(x=EVTYPE, y = total_harm)) + geom_bar(stat= "identity") + xlab("event type") +ylab("total number of casualty")+ coord_flip() + ggtitle("The Top 10 Most Harmful Events")
The most harmful event is Tornado.
#library(ggplot2)
most_10_ecodmg <- arrange(tdmg, desc(total_event_dmg))[1:10,]
most_10_ecodmg <- arrange(most_10_ecodmg, total_event_dmg)
#reorder
most_10_ecodmg$EVTYPE <- factor(most_10_ecodmg$EVTYPE, levels=unique(most_10_ecodmg$EVTYPE))
#factorize the variable "most_10_harmful$EVTYPE"
ggplot(most_10_ecodmg, aes(x=EVTYPE, y = total_event_dmg)) + geom_bar(stat= "identity") + xlab("event type") +ylab("economic lost (US dollar)")+ coord_flip() + ggtitle("The Top 10 Events Cause Most Economic Impact")
Across the United States, Flood have the greatest economic consequences