This is an exploratory analysis of the social and economic impacts of severe weather events in terms of:
The analysis result shows the top 10 events corresponding to each consequence listed above.
setwd("/Users/Nan/Dropbox/DatSci_coursera/rep_research/")
if (!require("car")) {
install.packages("car") #pakage for function 'recode'
library("car")
}
storm <- read.csv("repdata-data-StormData.csv.bz2")
levels(storm$PROPDMGEXP)
## [1] "" "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K"
## [18] "m" "M"
prop_dmg_exp = recode(storm$PROPDMGEXP, "c('','-','?','+','0')=0;'B'=9;c('h','H')=2;\n 'K'=3;c('m','M')=6") #recode the exponentials
storm$prop_dmg_exp <- as.numeric(levels(prop_dmg_exp)[prop_dmg_exp])
storm$prop_dmg <- storm$PROPDMG * 10^storm$prop_dmg_exp
levels(storm$CROPDMGEXP)
## [1] "" "?" "0" "2" "B" "k" "K" "m" "M"
crop_dmg_exp = recode(storm$CROPDMGEXP, "c('','?','0')=0;'B'=9;c('K','k')=3;c('m','M')=6")
storm$crop_dmg_exp <- as.numeric(levels(crop_dmg_exp)[crop_dmg_exp])
storm$crop_dmg <- storm$CROPDMG * 10^storm$crop_dmg_exp
Data pre-processing basically converts the character-formatted exponentials into corresponding numeric values, and then the damage value can be calculated.
library("ggplot2")
library("gridExtra")
## Loading required package: grid
# Fatalities by type of event
fatal <- tapply(storm$FATALITIES, storm$EVTYPE, sum)
fatal = fatal[order(fatal, decreasing = T)][1:10] #top 10
fatal_x = factor(names(fatal), levels = names(fatal))
p1 <- ggplot() + geom_bar(aes(x = fatal_x, y = fatal), stat = "identity", fill = "red") +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) + xlab("Event Type") +
ylab("Count") + ggtitle("Fatalities by type of event")
# Injuries by type of event
inj <- tapply(storm$INJURIES, storm$EVTYPE, sum)
inj = inj[order(inj, decreasing = T)][1:10] #top 10
inj_x = factor(names(inj), levels = names(inj))
p2 <- ggplot() + geom_bar(aes(x = inj_x, y = inj), stat = "identity", fill = "red") +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) + xlab("Event Type") +
ylab("Count") + ggtitle("Injuries by type of event")
# plot
grid.arrange(p1, p2, nrow = 1)
The result shows that tornado is the first cause of fatality and injury, and its impact is much more severe compared to the other weather events.
# Property damage by type of event
prop <- tapply(storm$prop_dmg, storm$EVTYPE, sum)
prop = prop[order(prop, decreasing = T)][1:10] #top 10
prop_x = factor(names(prop), levels = names(prop))
e1 <- ggplot() + geom_bar(aes(x = prop_x, y = prop), stat = "identity", fill = "red") +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) + xlab("Damage") +
ylab("Count") + ggtitle("Property damage by type of event")
# Crop damage by type of event
crop <- tapply(storm$crop_dmg, storm$EVTYPE, sum)
crop = crop[order(crop, decreasing = T)][1:10] #top 10
crop_x = factor(names(crop), levels = names(crop))
e2 <- ggplot() + geom_bar(aes(x = crop_x, y = crop), stat = "identity", fill = "red") +
theme(axis.text.x = element_text(angle = 30, hjust = 1)) + xlab("Damage") +
ylab("Count") + ggtitle("Crop damage by type of event")
# plot
grid.arrange(e1, e2, nrow = 1)
The result shows that flood is the first cause for property damage, while drought for crop damage.