The basic goal of this analysis is to explore the U.S. National Oceanic and Atmospheric Administration’s (NOAA) Storm Database and help answer the following questions:
THe NOAA Storm Database is have records of events from 1950 to 2011. More recent years should be more complete.
There should be a section titled Data Processing which describes (in words and code) how the data were loaded into R and processed for analysis. In particular, your analysis must start from the raw CSV file containing the data. You cannot do any preprocessing outside the document. If preprocessing is time-consuming you may consider using the cache = TRUE option for certain code chunks.
Load necessary libraries:
library(R.utils)
library(car)
library(ggplot2)
Get data set from the link given in the assignment:
if (!file.exists("data.csv")) {
fileurl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileurl, destfile = "data.bz2", method="curl")
bunzip2("data.bz2",destname='data.csv')
}
data <- read.csv('data.csv')
Calculate the damage value of each record and calculate the dollar amount as a number for properties and crops
data$PROPDMGDOL <- recode(data$PROPDMGEXP,"'0'=1;'1'=10;'2'=100;'3'=1000;'4'=10000;'5'=100000;'6'=1000000;'7'=10000000;'8'=100000000;'B'=1000000000;'h'=100;'H'=100;'K'=1000;'m'=1000000;'M'=1000000;'-'=0;'?'=0;'+'=0", as.factor.result = FALSE, as.numeric.result=TRUE)
data$PROPDMGDOL <- as.numeric(data$PROPDMGDOL)*as.numeric(data$PROPDMG)
data$CROPDMGDOL <- recode(data$CROPDMGEXP,"'0'=1;'1'=10;'2'=100;'3'=1000;'4'=10000;'5'=100000;'6'=1000000;'7'=10000000;'8'=100000000;'B'=1000000000;'h'=100;'H'=100;'K'=1000;'m'=1000000;'M'=1000000;'-'=0;'?'=0;'+'=0", as.factor.result = FALSE, as.numeric.result=TRUE)
data$CROPDMGDOL <- as.numeric(data$CROPDMGDOL)*as.numeric(data$CROPDMG)
## Warning: NAs introduced by coercion
Clean up event types by grouping them together
# Assign tornado/wind/storm event group
windGroupLabel <- "Tornado/Wind/Storm"
data$EVTYPE[grepl("torn", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
data$EVTYPE[grepl("dust", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
data$EVTYPE[grep("wind", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
data$EVTYPE[grep("storm", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
data$EVTYPE[grep("rain", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
data$EVTYPE[grep("hurricane", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
data$EVTYPE[grep("hail", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
data$EVTYPE[grep("precip", data$EVTYPE, ignore.case = TRUE)] <- windGroupLabel
# Assign heat event group
heatGroupLabel <- "Heat"
data$EVTYPE[grep("heat", data$EVTYPE, ignore.case = TRUE)] <- heatGroupLabel
# Assign lightning event group
lightningGroupLabel <- "Lightining"
data$EVTYPE[grep("lig", data$EVTYPE, ignore.case = TRUE)] <- lightningGroupLabel
# Assign flood event group
floodGroupLabel <- "Flood"
data$EVTYPE[grep("flood", data$EVTYPE, ignore.case = TRUE)] <- floodGroupLabel
data$EVTYPE[grep("tsunami", data$EVTYPE, ignore.case = TRUE)] <- floodGroupLabel
data$EVTYPE[grep("fld", data$EVTYPE, ignore.case = TRUE)] <- floodGroupLabel
data$EVTYPE[grep("drizzle", data$EVTYPE, ignore.case = TRUE)] <- floodGroupLabel
# Assign fire event group
fireGroupLabel <- "Fire"
data$EVTYPE[grep("fire", data$EVTYPE, ignore.case = TRUE)] <- fireGroupLabel
# Assign snow event group
snowGroupLabel <- "Snow"
data$EVTYPE[grep("snow", data$EVTYPE, ignore.case = TRUE)] <- snowGroupLabel
data$EVTYPE[grep("avalanche", data$EVTYPE, ignore.case = TRUE)] <- snowGroupLabel
data$EVTYPE[grep("blizzard", data$EVTYPE, ignore.case = TRUE)] <- snowGroupLabel
# Assign rip currents event group
rcGroupLabel <- "Rip currents/Surf/Sea"
data$EVTYPE[grep("current", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel
data$EVTYPE[grep("astro", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel
data$EVTYPE[grep("surf", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel
data$EVTYPE[grep("sea", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel
data$EVTYPE[grep("marine", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel
data$EVTYPE[grep("water", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel
data$EVTYPE[grep("high s", data$EVTYPE, ignore.case = TRUE)] <- rcGroupLabel
# Assign fog event group
fogGroupLabel <- "Rip currents"
data$EVTYPE[grep("fog", data$EVTYPE, ignore.case = TRUE)] <- fogGroupLabel
# Assign cold event group
coldGroupLabel <- "Cold"
data$EVTYPE[grep("cold", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("winter", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("glaze", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("icy", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("ice", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("frost", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("freez", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
data$EVTYPE[grep("wint", data$EVTYPE, ignore.case = TRUE)] <- coldGroupLabel
# Assign landslide event group
landslideGroupLabel <- "Landslide"
data$EVTYPE[grep("land", data$EVTYPE, ignore.case = TRUE)] <- landslideGroupLabel
data$EVTYPE[grep("mud", data$EVTYPE, ignore.case = TRUE)] <- landslideGroupLabel
Compare the damage caused by event
dmgdata <- aggregate(as.numeric(data$PROPDMGDOL) ~ data$EVTYPE, FUN=sum, data=data)
colnames(dmgdata) <- c('Type','Cost')
attach(dmgdata)
mostcost <- dmgdata[order(Cost),]
detach(dmgdata)
Plot the events showing the amount of economic cost. I will use a log to scale the costs.
ggplot(data = dmgdata, aes(x = Type, y = log(Cost),
fill = Cost)) + geom_bar(stat = "identity") + theme(axis.text.x = element_text(angle = 90)) +
labs(title = "Most costly disasters") +
labs(y = "log(USD) in damage") + labs(x = "")
## Warning: Stacking not well defined when ymin != 0