The purpose of this analysis is to figure out, which types of events are most harmful with respect to population health and which types of events have the greatest economic consequences ? These questions are limited to the United States.
The data comes from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.
First, the maximum of event type are mapped to the “Storm Data Event Table”“, that counts 48 entries.
After, the fatalities and injuries are grouped by event type, sorted and plotted.
And the property and crop damage amounts are summed by event type, sorted and plotted.
library(R.utils)
library(dplyr)
library(ggplot2)
library(reshape2)
Load the NOAA Storm Database from repdata-data-StormData.csv.bz2 file
Available here
The variables kept for this analysis are :
filename <- "repdata-data-StormData.csv"
fileevent <- "event.csv"
if(!file.exists(filename)) {
URL<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(URL,destfile=paste0(filename,".bz2"),method="curl")
bunzip2(paste0(filename,".bz2"), filename, remove = FALSE, skip = TRUE)
}
storm <- read.csv(filename)
event <- read.csv(fileevent)
stormTbl <- select(tbl_df(storm), BGN_DATE, EVTYPE, FATALITIES, INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)
Cleaning the data
# put the event type to lower case
event$EVENT <- tolower(event$EVENT)
stormTbl$EVTYPE <- tolower(stormTbl$EVTYPE)
stormTbl$EVENT <- NA
# Get the year of the event
stormTbl$BGN_DATE <- as.POSIXct(strptime(stormTbl$BGN_DATE, "%m/%d/%Y %H:%M:%S"))
stormTbl$year <- format(stormTbl$BGN_DATE,"%Y")
# convert the property damage amount - Put 0 if there is no value
stormTbl$DAMAGE_PROPERTY <- NA
stormTbl[stormTbl$PROPDMGEXP == "K",]$DAMAGE_PROPERTY <- stormTbl[stormTbl$PROPDMGEXP == "K",]$PROPDMG*1000
stormTbl[stormTbl$PROPDMGEXP == "M",]$DAMAGE_PROPERTY <- stormTbl[stormTbl$PROPDMGEXP == "M",]$PROPDMG*1000000
stormTbl[stormTbl$PROPDMGEXP == "B",]$DAMAGE_PROPERTY <- stormTbl[stormTbl$PROPDMGEXP == "B",]$PROPDMG*1000000000
stormTbl[which(is.na(stormTbl$DAMAGE_PROPERTY)),]$DAMAGE_PROPERTY <- 0
# convert the crop damage amount - Put 0 if there is no value
stormTbl$DAMAGE_CROP <- NA
stormTbl[stormTbl$CROPDMGEXP == "K",]$DAMAGE_CROP <- stormTbl[stormTbl$CROPDMGEXP == "K",]$CROPDMG*1000
stormTbl[stormTbl$CROPDMGEXP == "M",]$DAMAGE_CROP <- stormTbl[stormTbl$CROPDMGEXP == "M",]$CROPDMG*1000000
stormTbl[stormTbl$CROPDMGEXP == "B",]$DAMAGE_CROP <- stormTbl[stormTbl$CROPDMGEXP == "B",]$CROPDMG*1000000000
stormTbl[which(is.na(stormTbl$DAMAGE_CROP)),]$DAMAGE_CROP <- 0
Group the event type with the 48 standard event types
for (var in event$EVENT){
test <- grepl(var, stormTbl$EVTYPE,ignore.case = TRUE)
if (sum(test)>0) stormTbl[test,]$EVENT <- var
}
# Custom correction
stormTbl[grepl("TSTM WIND", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "thunderstorm wind"
stormTbl[grepl("FUNNEL|WATERSPOUT", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "thunderstorm wind"
stormTbl[grepl("COLD", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "cold"
stormTbl[grepl("FLD|FLOOD", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "flood"
stormTbl[grepl("FIRE", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "fire"
stormTbl[grepl("SNOW", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "snow"
stormTbl[grepl("Storm Surge|Storm Tide", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "storm surge/tide"
stormTbl[grepl("hurricane|typhoon", stormTbl$EVTYPE,ignore.case = TRUE),]$EVENT <- "hurricane"
stormTbl[which(is.na(stormTbl$EVENT)),]$EVENT <- stormTbl[which(is.na(stormTbl$EVENT)),]$EVTYPE
result <- stormTbl %>%
group_by(EVENT) %>%
summarize(count=n(),
fatalities=sum(FATALITIES),
injuries=sum(INJURIES)) %>%
select(EVENT, fatalities,injuries)
result <- arrange(result, -fatalities,-injuries)
result$EVENT <- factor(result$EVENT, levels=unique(result$EVENT))
In USA from 1950 to 2011
melted <- melt(result[1:10,], id.vars=c("EVENT"))
ggplot(melted, aes(x=EVENT, y= value , fill=variable)) + geom_bar(stat="identity",position="dodge") + ylab("Number of fatalities or injuries") + xlab("") + theme(axis.text.x=element_text(angle=90,hjust=0.9, vjust=0.2,size=14))
resultDamage <- stormTbl %>%
group_by(EVENT) %>%
summarize(property=sum(DAMAGE_PROPERTY), crop=sum(DAMAGE_CROP),total = crop+property) %>%
select(EVENT, property,crop,total)
resultDamage <- arrange(resultDamage, -total)
resultDamage$EVENT <- factor(resultDamage$EVENT, levels=unique(resultDamage$EVENT))
In USA from 1950 to 2011
melted <- melt(resultDamage[1:10,1:3], id.vars=c("EVENT"))
ggplot(melted, aes(x=EVENT, y= value , fill=variable)) + geom_bar(stat="identity",position="dodge")+ ylab("Amount $") + xlab("") + theme(axis.text.x=element_text(angle=90,hjust=0.9, vjust=.2,size=14)) + ggtitle("Damage")