knitr:: opts_chunk$set(cache=TRUE)
Author: Konstantin Mingoulin
Impact of Natural Disasters on Population and Economy in the United States
The report examines the Storm Data for the US from 1950 to 2011.The main purpose of the report is to show which types of natural disaster events have the highest human and economic costs.
Human cost are represented by the total of injuries and fatalities.
Economic cost are represented by the sum of property and crop damages.
Data Processing
#Import data. Make sure downloaded file is in the project folder
raw <- read.csv(bzfile("repdata_data_StormData.csv.bz2"), stringsAsFactors = FALSE)
#Convert event type into Factor
raw$EVTYPE<-factor(raw$EVTYPE)
#Load dplyr for future use
library(dplyr)
Property and crop damages need to be converted into proper units. The original data reports values and units(Millions, Thousands, etc.) in a different column:
#Function convertUnits converts character codes to a corresponding numeric value
convertUnits<-function(x){
if(x %in% c("b","B")) {t<-1000000000}
else if(x %in% c("m","M","6")) {t<-1000000}
else if(x %in% c("k","K","3")) {t<-1000}
else if(x %in% c("h","H","2")) {t<-100}
else {t<-1}
}
#Add two extra columns that contain the conversion value for each, Property and Crop
raw<-cbind(raw,
PropertyUnits=sapply(raw$PROPDMGEXP,convertUnits),
CropUnits=sapply(raw$CROPDMGEXP,convertUnits))
Generate summary datasets for each question
1.1 Health
#Health impact is determined as the sum of INJURIES and FATALITIES.
#No weighting is assumed
#HelathImpact is grouped by EVTYPE, sumed and sorted is descending order
health<-
raw %>%
mutate(HealthImpact=INJURIES+FATALITIES)%>%
select(HealthImpact,EVTYPE) %>%
group_by(EVTYPE) %>%
summarise_each(funs(sum))%>%
arrange(-HealthImpact)
2.1 Damages
#Damages for each of the Property and Crop converted to proper units, sumed by EVTYPE
##and sorted in descending order
damages<-
raw %>%
mutate(EconomicImpact=PROPDMG*PropertyUnits+CROPDMG*CropUnits)%>%
select(EconomicImpact,EVTYPE) %>%
group_by(EVTYPE) %>%
summarise_each(funs(sum))%>%
arrange(-EconomicImpact)
Results
1.2 Health
#data in [health] is already sorted in data processing part
#select top 7 EVTYPEs for TopHealth
TopHealth<-head(health,7)
#Create Bar Chart for top 7 event types
#convert labels into proper case: First letter is upper case
barplot(TopHealth$HealthImpact,names.arg = gsub("(^|[[:space:]])([[:alpha:]])", "\\1\\U\\2",tolower(TopHealth$EVTYPE), perl=TRUE),
xlab="Event Type",
ylab="Persons affected",
main="Total Health Impact",
col="blue",
axis.lty=1,
cex.lab=1, cex.axis=1, cex.main=1, cex.sub=1, cex=.7)

2.2 Damages
#data in [damages] is already sorted in descending order
#select top 7 EVTYPEs for TopEcon
TopEcon<-head(damages,7)
#Create Bar Chart for top 7 event types
#convert labels into proper case: First letter is upper case
barplot(TopEcon$EconomicImpact,names.arg = gsub("(^|[[:space:]])([[:alpha:]])", "\\1\\U\\2",tolower(TopEcon$EVTYPE), perl=TRUE),
xlab="Event Type",
ylab="Damages $$",
main="Total Economic Impact",
col="green",
axis.lty=1,
cex.lab=1, cex.axis=1, cex.main=1, cex.sub=1, cex=.7)
