Every year severe weather events cause great economic damage and devastating loss of human life in the U.S. After analyzing the U.S. National Oceanic and Atmospheric Administration's (NOAA) storm database, it is evidently clear that the most deadly natural disaster is Tornado, which claims 91 lives and injures over 1450 people annually; and Flood causes the greatest economic loss totaling $9.5 Billion annually in combined property and crop damage.
This study aims to measure devastation caused by severe weather events in the U.S. by investigating the storm database maintained by the U.S. National Oceanic and Atmospheric Administration (NOAA). It seeks to answer two main questions:
Data processing details, analysis and final results and discussion are presented in this report.
knitrrequire(knitr)
require(data.table)
require(reshape2)
require(lattice)
require(plyr)
require(RColorBrewer)
require(googleVis)
options(width = 100)
opts_chunk$set(message = F, error = F, warning = F, comment = NA,
fig.align = 'center', dpi = 100, tidy = F,
cache.path = '.cache/', fig.path = 'fig/')
options(xtable.type = 'html')
knit_hooks$set(inline = function(x) {
if(is.numeric(x)) {
round(x, getOption('digits'))
} else {
paste(as.character(x), collapse = ', ')
}
})
knit_hooks$set(plot = knitr:::hook_plot_html)
Download StormData.csv.bz2 to newly created directory ./data.
ifelse(!file.exists("data"),dir.create("data"),"dir exists")
file<-"http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
ifelse(!file.exists("./data/StormData.csv.bz2"),download.file(file,"./data/StormData.csv.bz2",mode='wb'), "Skip! File downloaded")
Regroup of common event names: e.g. Flash Flood to Flood
## Read in data using 'read.csv'.
storm <-read.csv("./data/StormData.csv.bz2",stringsAsFactors=FALSE)
## Convert Date to Year variable
storm$year<-as.numeric(format(strptime(storm$BGN_DATE,format="%m/%d/%Y"),format="%Y"))
## Clean up Event Type names
storm$EVTYPE<-gsub(".*FLOOD[A-Z]|.*FLOODING$|.*FLOOD$|FLOODS",'FLOOD',toupper(storm$EVTYPE))
storm$EVTYPE<-gsub("(^ )*","",storm$EVTYPE)
storm$EVTYPE<-gsub("HURRICANE/TYPHOON","HURRICANE",storm$EVTYPE)
storm$EVTYPE<-gsub("STORM SURGE/TIDE","STORM SURGE",storm$EVTYPE)
To calculate property and crop damage correctly, raw variables 'PROPDMGEXP' and 'CROPDMGEXP' need to be recorded.
I will use mapvalues function from plyr package to map them to valid numeric exponential value. e.g. 250 B = 250 Billion. 1.5 K = 1,500.
Final damage values ($) are calculated as
Note: inflation adjustment is not done since expected impact of inflation adjustment would be small as most of damage occurred after 1995.
## replaing
# c("H","K","M","B","","+","-","?",0) by
## c(2,3,6,9,-Inf,-Inf,-Inf,-Inf,-Inf)
#c(unique(toupper(unique(storm$PROPDMGEXP))))
#c(unique(toupper(unique(storm$CROPDMGEXP))))
storm$propvalue<-storm$PROPDMG*10^c(as.numeric(mapvalues(toupper(storm$PROPDMGEXP),
from=c("H","K","M","B","","+","-","?",0),
to=c(2,3,6,9,-Inf,-Inf,-Inf,-Inf,-Inf))))
storm$cropvalue<-storm$CROPDMG*10^c(as.numeric(mapvalues(toupper(storm$CROPDMGEXP),
from=c("H","K","M","B","","+","-","?",0),
to=c(2,3,6,9,-Inf,-Inf,-Inf,-Inf,-Inf))))
## Checking for missing value
## After Calculation,No Missing value for both PropertyDamageValue and CropDamageValue
sum(is.na((storm$propvalue)))+sum(is.na((storm$cropvalue)))
## Combining Property Damage and Crop Damage to create Total Damage (Totaldmg)
storm$Totaldmg<-storm$propvalue + storm$cropvalue
dt<-data.table(storm)
Calculating nation-wide Annual Total Fatality and Annual Total Injury by Event (EVTYPE) and by Year.
Totalinjury<-dt[FATALITIES>0 | INJURIES>0,list(T_FATALITIES=sum(FATALITIES,na.rm=TRUE),
T_INJURIES=sum(INJURIES,na.rm=TRUE)),
by=list(EVTYPE,year)][order(EVTYPE,year)]
Summarize Weather Event:
history<-Totalinjury[,
list(N_years=length(unique(year)),
First_year=min(unique(year)),
Last_year=max(unique(year)),
Avgdeath=mean(T_FATALITIES,na.rm=TRUE),
Avginjury=mean(T_INJURIES,na.rm=TRUE),
Totaldeath=sum(T_FATALITIES,na.rm=TRUE),
Totalinjury=sum(T_INJURIES,na.rm=TRUE)
),
by=EVTYPE][N_years>=1][order(-Totaldeath)]
history_gvis<-data.frame(format(history,digits=0))
T <- gvisTable(history_gvis, options = list(width = 800, height = 300,page = "enable"))
print(T, "chart")
top10eventname<-c(history[as.numeric(rownames(history))<11,EVTYPE])
top10summary<-history[EVTYPE %in% c(top10eventname)]
melt<-melt(top10summary,id=c("EVTYPE"))[variable %in% c("Avgdeath","Avginjury")][order(variable,-value)]
color<-rev(brewer.pal(3,"Paired"))
barchart(value ~ EVTYPE ,data=melt,
groups=variable,origin=0,box.ratio=10,
main='Chart 1. Top 10 Natural Disasters Most Harmful to \nPopulation Health in the U.S.\nby Annually Average of # of Fatalities and Injuries',
ylab="Annual Avg. Fatality/Injury",
auto.key=list(text=c("Injury","Fatality"),space='inside',columns=1,
title="Type of Injury",cex.title=1.1,cex=1,between.columns=5,
rectangles=TRUE,points=F),
scales=list(x=list(rot=45),
y=list(at=c(seq(0,1600,100))),
alternating=1), ,
panel = function(...) {
panel.abline(h=c(100),lty = "dotted", col = "grey8")
panel.barchart(...)},
par.settings=list(superpose.polygon=list(col=color[1:2],border="transparent"))
)
Calculating country-wise Total Property Damage Value in $ and Total Crop Damage Value in $ by Event (EVTYPE) and Year.
value<-data.table(storm)
Totalvalue<-value[propvalue>0 | cropvalue>0,list(T_propvalue=sum(propvalue,na.rm=TRUE),
T_cropvalue=sum(cropvalue,na.rm=TRUE)),
by=list(EVTYPE,year)][order(EVTYPE,year)]
Summarize Weather Events: damage in $ million
damage<-Totalvalue[,
list(N_years=length(unique(year)),
First_year=min(unique(year)),
Last_year=max(unique(year)),
Avgpropdmg=mean(T_propvalue/1e6,na.rm=TRUE),
Avgcropdmg=mean(T_cropvalue/1e6,na.rm=TRUE),
Totalpropdmg=sum(T_propvalue/1e6,na.rm=TRUE),
Totalcropdmg=sum(T_cropvalue/1e6,na.rm=TRUE)
),
by=EVTYPE][N_years>1][order(-Avgpropdmg)]
damage$comb_avgdmg<- damage$Avgpropdmg + damage$Avgcropdmg
damage2<-damage[order(-comb_avgdmg)]
setnames(damage2,c(names(damage2)),c(paste0(names(damage2),c(rep("",4),rep("(Million)",5)))))
damage2$First_year<-as.character(damage2$First_year)
damage2$Last_year<-as.character(damage2$Last_year)
damage2_gvis<-data.frame(format(damage2,scientific = F,digits=0,big.mark=","))
T2 <- gvisTable(damage2_gvis, options = list(width = 1200, height = 300,page = "enable"))
print(T2, "chart")
damage2<-damage[order(-comb_avgdmg)]
top10dmgname<-c(damage2[as.numeric(rownames(damage2))<11,EVTYPE])
top10dmg<-damage2[EVTYPE %in% c(top10dmgname)]
melt<-melt(top10dmg,id=c("EVTYPE"))[variable %in% c("Avgpropdmg","Avgcropdmg")][order(variable,-value)]
color<-rev(brewer.pal(6,"Paired"))
barchart(value ~ EVTYPE ,data=melt,
groups=variable,origin=0,box.ratio=10,
main='Chart 2. Top 10 Most Costly Natural Disasters in the U.S.\nby Annually Average of Total Damage',
ylab="Property/Crop Damage (million $)",
auto.key=list(text=c("Crop Damage","Proporty Damage"),corner = c(1, 1),columns=1,
title="Type of Damage",cex.title=1.1,cex=1,between.columns=5,
rectangles=TRUE,points=F),
scales=list(x=list(rot=45)),
panel = function(...) {
#panel.abline(h=c(seq(100,300,200)),lty = "dotted", col = "grey8")
panel.barchart(...)},
par.settings=list(superpose.polygon=list(col=color[1:2],border="transparent"))
)
sessionInfo()
R version 3.1.0 (2014-04-10)
Platform: x86_64-w64-mingw32/x64 (64-bit)
locale:
[1] LC_COLLATE=English_United States.1252 LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252 LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] googleVis_0.5.2 RColorBrewer_1.0-5 plyr_1.8.1 lattice_0.20-29 reshape2_1.4
[6] data.table_1.9.2 knitr_1.5
loaded via a namespace (and not attached):
[1] codetools_0.2-8 digest_0.6.4 evaluate_0.5.5 formatR_0.10 grid_3.1.0 Rcpp_0.11.1
[7] RJSONIO_1.2-0.2 stringr_0.6.2 tools_3.1.0