Synopsis

This report is aimed at analyse the damage of severe weather events on population health and economic loss.
Several points should be explained. First, this report classifies the 48 events into 11 categories. Second, when I calculate the population health damage, I put weight 10 vs 1 to variables fatalities vs injuries. Third, the economic loss is calculated by summing the property loss and crop damage.
According to the analysis, the most dangerous events to population health are events related to severe rains and the drought causes largest economic loss.

Data Processing

Step1. Load the data into Rstudio

raw.dat<-read.csv(bzfile("repdata-data-StormData.csv.bz2")) #load the raw data

Step2. Tidy and clean data

library(plyr)
library(dplyr)
#1. Choosing the useful variables
use.data<-select(raw.dat,EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)
#2. Convert all character into lower case
names(use.data)<-tolower(names(use.data))
str(use.data)
## 'data.frame':    902297 obs. of  7 variables:
##  $ evtype    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ fatalities: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ injuries  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ propdmg   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ propdmgexp: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
##  $ cropdmg   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ cropdmgexp: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
data.lc<-mutate(use.data,evtype=tolower(as.character(evtype)),propdmgexp=tolower(as.character(propdmgexp)),cropdmgexp=tolower(as.character(cropdmgexp)))

#3. select the observations whose `evtype` belong to 48 standard types mentioned by NOAA.
keywordmatch<-c("wind","rain","flood","snow","tornado","winter","fire","water","volcanic","tsunami","storm","depression","sleet","seiche","current","hail","lighting","typhoon","hurricane","surf","heat","clod","frost","freez","dust","drought","dense","debris","chill","blizzard","avalanche","tide")
keyword1<-c("wind","typhoon","tornado")#all event related to wind will be put into one group
data.lc[grep(paste(keyword1,collapse="|"), data.lc$evtype),"evtype"]<-"wind"
keyword2<-c("rain","storm","flood","water","hurricane","sleet","debris")# rain group
data.lc[grep(paste(keyword2,collapse="|"), data.lc$evtype),"evtype"]<-"rain"
keyword3<-c("snow","blizzard","avalanche")# snow group
data.lc[grep(paste(keyword3,collapse="|"), data.lc$evtype),"evtype"]<-"snow"
keyword4<-c("winter","clod","frost","freez","chill")# freeze group
data.lc[grep(paste(keyword4,collapse="|"), data.lc$evtype),"evtype"]<-"freeze"
keyword5<-c("fire","heat")# fire group
data.lc[grep(paste(keyword5,collapse="|"), data.lc$evtype),"evtype"]<-"fire"
keyword6<-c("tsunami","seiche","current","surf","tide")# ocean event group
data.lc[grep(paste(keyword6,collapse="|"), data.lc$evtype),"evtype"]<-"ocean"
keyword7<-c("volcanic")# volcanic group
data.lc[grep(keyword7, data.lc$evtype),"evtype"]<-"volcanic"
keyword8<-c("hail") #hail group
data.lc[grep(keyword8, data.lc$evtype),"evtype"]<-"hail"
keyword9<-c("lighting")#lighting group
data.lc[grep(keyword9, data.lc$evtype),"evtype"]<-"lighting"
keyword10<-c("dust","dense")# dust/fog group
data.lc[grep(paste(keyword10,collapse="|"), data.lc$evtype),"evtype"]<-"dust/fog"
keyword11<-c("drought")# drought group
data.lc[grep(keyword11, data.lc$evtype),"evtype"]<-"drought"
data.use<-filter(data.lc,evtype%in%c("drought","dust/fog","lighting","hail","volcanic","ocean","fire","freeze","snow","rain","wind"))
cat("the number of useful observation is",nrow(data.use))
## the number of useful observation is 872615
#4.deal with the variable-`propdmgexp`
unique(data.use$propdmgexp)
##  [1] "k" "m" ""  "b" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "-" "1" "8"
data.use[grep("h",data.use$propdmgexp),"propdmgexp"]<-"2"
data.use[grep("k",data.use$propdmgexp),"propdmgexp"]<-"3"
data.use[grep("m",data.use$propdmgexp),"propdmgexp"]<-"6"
data.use[grep("b",data.use$propdmgexp),"propdmgexp"]<-"9"
data.use[-(data.use$propdmgexp%in%c("1","2","3","4","5","6","7","8","9")),"propdmgexp"]<-"0"
#5.deal with the variable-`cropdmgexp`
unique(data.use$cropdmgexp)
## [1] ""  "m" "k" "b" "?" "0" "2"
data.use[grep("k",data.use$cropdmgexp),"cropdmgexp"]<-"3"
data.use[grep("m",data.use$cropdmgexp),"cropdmgexp"]<-"6"
data.use[grep("b",data.use$cropdmgexp),"cropdmgexp"]<-"9"
data.use[-(data.use$cropdmgexp%in%c("2","3","6","9")),"propdmgexp"]<-"0"
tidydata<-mutate(data.use,propdmgexp=as.numeric(propdmgexp),cropdmgexp=as.numeric(cropdmgexp))

Step4. Calculate the economic consequences of different type of events.

ecodmg.data<-select(tidydata,evtype,propdmg,propdmgexp,cropdmg,cropdmgexp)
ecodmg.data$propdmg<-ifelse(is.na(ecodmg.data$propdmg),0,ecodmg.data$propdmg)
ecodmg.data$propdmgexp<-ifelse(is.na(ecodmg.data$propdmgexp),0,ecodmg.data$propdmgexp)  
ecodmg.data$cropdmg<-ifelse(is.na(ecodmg.data$cropdmg),0,ecodmg.data$cropdmg)
ecodmg.data$cropdmgexp<-ifelse(is.na(ecodmg.data$cropdmgexp),0,ecodmg.data$cropdmgexp)
s2<-split(ecodmg.data,ecodmg.data$evtype)
eco.dag<-sapply(s2,function(x) mean(x[[2]]*(10^x[[3]])+x[[4]]*(10^x[[5]])))
barplot(eco.dag,col="blue",main="economic loss")

plot of chunk unnamed-chunk-4

Results

  1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
    Note: From the figure named “health damage”, we can see that severe weather events related to “rain” damages population health most.

  2. Across the United States, which types of events have the greatest economic consequences?
    Note: From the figure named “economic loss”, we can see that drought has the greatest economic consequences.