Impact of storms and weather events on population health and economy

Synopsis

We analyzed the data from NOAA Storm Database to investigate which kind of events are most harmful with respect to (1)population health (2)econonomic damage. We begin with data processing by getting true value of property and crop loss, tidy up event names and then sum up by type of events. In brief, tornadoes cause the most total harm to population health, while heat waves are most harmful per event. In terms of econological damage, ice storms cause the most total econological damage to US, while hurricanes were most harmful in economy per event.

Data Processing

We read in the data from the csv file, and select only columns needed for analysis, which includes EVTYPE, FATALITIES, INJURIES, PROPDMG,PROPDMGEXP, CROPDMG, CROPDMGEXP

suppressPackageStartupMessages(library(dplyr))
storm<-read.csv("StormData.csv")
storm<-tbl_df(data = storm)
subset<-select(storm,EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)
subset<-mutate(subset,PROPDMGEXP=as.character(PROPDMGEXP))
subset<-mutate(subset,CROPDMGEXP=as.character(CROPDMGEXP))

Substitute numbers into characters that denotes the unit for PROPDMGEXP and CROPDMGEXP

subset$PROPDMGEXP <- gsub("^[^0-9A-Za-z]$","1",subset$PROPDMGEXP)
subset$PROPDMGEXP <- gsub("^0*$","1",subset$PROPDMGEXP)
subset$PROPDMGEXP <- gsub("^[Kk]$","1000",subset$PROPDMGEXP)
subset$PROPDMGEXP <- gsub("^[Mm]$","1000000",subset$PROPDMGEXP)
subset$PROPDMGEXP <- gsub("^[Bb]$","1000000000",subset$PROPDMGEXP)
subset$PROPDMGEXP <- gsub("^[Hh]$","10000",subset$PROPDMGEXP)
subset$PROPDMGEXP <- gsub("^2$","100",subset$PROPDMGEXP)
subset$PROPDMGEXP <- gsub("^3$","1000",subset$PROPDMGEXP)
subset$PROPDMGEXP <- gsub("^4$","10000",subset$PROPDMGEXP)
subset$PROPDMGEXP <- gsub("^5$","100000",subset$PROPDMGEXP)
subset$PROPDMGEXP <- gsub("^6$","1000000",subset$PROPDMGEXP)
subset$PROPDMGEXP <- gsub("^7$","10000000",subset$PROPDMGEXP)
subset$PROPDMGEXP <- gsub("^8$","100000000",subset$PROPDMGEXP)

subset<-mutate(subset, PROPVALUE = as.numeric(PROPDMGEXP)*PROPDMG)

subset$CROPDMGEXP <- gsub("^[^0-9A-Za-z]$","1",subset$CROPDMGEXP)
subset$CROPDMGEXP <- gsub("^0*$","1",subset$CROPDMGEXP)
subset$CROPDMGEXP <- gsub("^2$","100",subset$CROPDMGEXP)
subset$CROPDMGEXP <- gsub("^[Kk]$","1000",subset$CROPDMGEXP)
subset$CROPDMGEXP <- gsub("^[Mm]$","1000000",subset$CROPDMGEXP)
subset$CROPDMGEXP <- gsub("^[Bb]$","1000000000",subset$CROPDMGEXP)

subset<-mutate(subset, CROPVALUE = as.numeric(CROPDMGEXP)*CROPDMG)

Clean up for events name that duplicated or wrongly labeled

subset<- mutate(subset, EVTYPE=as.character(EVTYPE))
subset$EVTYPE <- gsub("^.*DROUGHT.*$","DROUGHT",subset$EVTYPE)
subset$EVTYPE <- gsub("^.*FLASH.*FLOOD.*","FLASH FLOOD",subset$EVTYPE)
subset$EVTYPE <- gsub("^ICE JAM.*","FLASH FLOOD",subset$EVTYPE)
subset$EVTYPE <- gsub("^SNOWMELT FLOODING$","FLASH FLOOD",subset$EVTYPE)
subset$EVTYPE <- gsub("^URBAN.*$","FLOOD",subset$EVTYPE)
subset$EVTYPE <- gsub("^RIVER.*$","FLOOD",subset$EVTYPE)
subset$EVTYPE <- gsub(".*HAIL.*","HAIL",subset$EVTYPE)
subset$EVTYPE <- gsub(".*HEAVY.*RAIN.*","HEAVY RAIN",subset$EVTYPE)
subset$EVTYPE <- gsub(".*HIGH.*WIND.*","HIGH WIND",subset$EVTYPE)
subset$EVTYPE <- gsub(".*HURRICANE.*","HURRICANE (TYPHOON)",subset$EVTYPE)
subset$EVTYPE <- gsub(".*SURGE.*","STORM SURGE/TIDE",subset$EVTYPE)
subset$EVTYPE <- gsub(".*TORNADO.*","TORNADO",subset$EVTYPE)
subset$EVTYPE <- gsub("^.*TROPICAL.*STORM.*$","TROPICAL STORM",subset$EVTYPE)
subset$EVTYPE <- gsub("^.*THUNDER.*WIND.*$","THUNDERSTORM WIND",subset$EVTYPE)
subset$EVTYPE <- gsub(".*TSTM.*","THUNDERSTORM WIND",subset$EVTYPE)
subset$EVTYPE <- gsub("^.*WINTER STORM.*$","WINTER STORM",subset$EVTYPE)
subset$EVTYPE <- gsub("^.*FIRE.*$","WILDFIRE",subset$EVTYPE)

Generating sorted list according to mean fatality and injury by event

suppressPackageStartupMessages(library(dplyr))
subset<-mutate(subset, EVTYPE=as.factor(EVTYPE))
subset<-group_by(subset,EVTYPE)

fatality<-summarize(subset,mean(FATALITIES))
colnames(fatality)<-c("EVTYPE","FATALITIES")

injuries<-summarize(subset,mean(INJURIES))
colnames(injuries)<-c("EVTYPE","INJURIES")

avg_population<-cbind(fatality, INJURIES = injuries$INJURIES)
avg_population<-arrange(avg_population,desc(INJURIES + FATALITIES))

Generating sorted list according to injury by event

fatality_total<-summarize(subset,sum(FATALITIES))
colnames(fatality_total)<-c("EVTYPE","FATALITIES")


injuries_total<-summarize(subset,sum(INJURIES))
colnames(injuries_total)<-c("EVTYPE","INJURIES")

total_population<-cbind(fatality_total, INJURIES = injuries_total$INJURIES)
total_population<-arrange(total_population,desc(INJURIES + FATALITIES))

Generating sorted list according to mean economic damage

econoloss<-summarize(subset,mean( PROPVALUE + CROPVALUE ))
colnames(econoloss)<-c("EVTYPE","ECONOLOSS")
econoloss<-arrange(econoloss,desc(ECONOLOSS))

econoloss_total<-summarize(subset,sum(PROPVALUE+CROPVALUE))
colnames(econoloss_total)<-c("EVTYPE","ECONOLOSS")
econoloss_total<-arrange(econoloss_total,desc(ECONOLOSS))

Results

Q1.Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

We plot the barplot of five most harmful events to population, left one is the total casualty by event, right one is average casualty by event. We can see that tornadoes cause the most total harm to population health, while heat waves are most harmful per event. If we take fatalities into consideration, then cold and snow might be the most harmful per event.

suppressPackageStartupMessages(library(reshape2))
suppressPackageStartupMessages(library(ggplot2))
draw_total_population<-total_population[1:5,]
draw_total_population$fx<-1:5
draw_total_population$EVTYPE<-reorder(draw_total_population$EVTYPE,draw_total_population$fx)
draw_total_population<-melt(draw_total_population[,1:3])
## Using EVTYPE as id variables
plot1<-qplot(EVTYPE, value, data=draw_total_population,fill=variable, geom="bar", stat="identity",ylab="count", main="total casualty by event")

draw_avg_population<-avg_population[1:5,]
draw_avg_population$fx<-1:5
draw_avg_population$EVTYPE<-reorder(draw_avg_population$EVTYPE,draw_avg_population$fx)
draw_avg_population<-melt(draw_avg_population[,1:3])
## Using EVTYPE as id variables
plot2<-qplot(EVTYPE, value, data=draw_avg_population,fill=variable, geom="bar", stat="identity",ylab="count",main="average casualties per event")

suppressMessages(require(gridExtra))

grid.arrange(plot1, plot2, ncol=2)

plot of chunk unnamed-chunk-7

Q2. Across the United States, which types of events have the greatest economic consequences?

We also draw the barplot of economy loss per event and total economy loss by certain event here. We can see ice storms cause the most total econological damage to US, while hurricanes were most harmful in economy per event.

draw_total_economy<-econoloss_total[1:5,]
draw_total_economy$fx<-1:5
draw_total_economy$EVTYPE<-reorder(draw_total_economy$EVTYPE,draw_total_economy$fx)
suppressMessages(draw_total_economy<-melt(draw_total_economy[,1:3]))
plot1<-qplot(EVTYPE, value, data=draw_total_economy, fill=EVTYPE, geom="bar", stat="identity",ylab="US dollar",main="total econological loss by event")

draw_avg_economy<-econoloss[1:5,]
draw_avg_economy$fx<-1:5
draw_avg_economy$EVTYPE<-reorder(draw_avg_economy$EVTYPE,draw_avg_economy$fx)
suppressMessages(draw_avg_economy<-melt(draw_avg_economy[,1:3]))
plot2<-qplot(EVTYPE, value, data=draw_avg_economy, fill=EVTYPE,geom="bar", stat="identity",ylab="US dollar",main="average econological lose per event")

suppressMessages(require(gridExtra))

grid.arrange(plot1, plot2, ncol=2)

plot of chunk unnamed-chunk-8