Evaluating the damage of US severe weather events

Synopsis:

In this analysis, StormData from NOAA website has been analyzed to investigate the damage caused by severe weather in the US. In Q1, we used the fatalities and the injuries variables provided in the dataset to evaluate the damage related to population health. We use mean value of the data as an indicator. We found that TSUNAMI caused the greatest damage in the US. In Q2, we analyzed the property damage and crop damage, to identify the weather that caused the greatest economic loss. We found that WND is the type that caused the greatest economic loss.

Data source: NOAA website - Storm data - Please see readme file for further details

Data processing

##load data
    data=read.csv(file=c('repdata-data-StormData.csv'),header=T,sep=",")
    names(data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
##subset the data if FATALITIES+INJURIES=0, then dicard this type of data since it is not useful for the analysis
    bad1=data$FATALITIES!=0
    bad2=data$INJURIES!=0
    bad3<-bad1+bad2
    bad.total1=bad3==0
    data1=data[!bad.total1,]
##

Question1: Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

Use FATALITIES and INJURIES to evaluate this question. We wil calculated the mean of FATALITIES/INJURIES per event
fatalities.mean=tapply(INDEX=data1$EVTYPE,FUN=mean,data1$FATALITIES)
injuries.mean=tapply(INDEX=data1$EVTYPE,FUN=mean,data1$INJURIES)
##sort the mean by descending order
    fatalities.mean=sort(fatalities.mean,decreasing=T)
    injuries.mean=sort(injuries.mean,decreasing=T)
##rank the mean value, and get an index for the combinated rank of fatalities and injuries
    rank1=rank(fatalities.mean)
    rank2=rank(injuries.mean)
    rank3.names=names(rank1)
    rank3.num=NULL
for (i in 1:length(rank1)) {
    rank1.cur=rank1[i]
    name.cur=names(rank1)[i]
    a=which(names(rank2)==name.cur)
    rank2.cur=rank2[a]
    rank3.cur=rank1.cur+rank2.cur
    rank3.num=c(rank3.num,rank3.cur)
}
    rank3=data.frame(rank3.names,rank3.num)
    answer=which(rank3[,2]==max(rank3[,2]))
    answer.1=rank3[answer,1]

##plot the mean number
    x=barplot(fatalities.mean[1:20],xaxt="n",main="Mean of the fatalities by type of weather",ylab="Number of fatalities")
    labs<-paste(rownames(fatalities.mean[1:20]))
    text(cex=0.7,x=x-.1,y=-5,labs,xpd=TRUE,srt=90)

plot of chunk unnamed-chunk-2

    x=barplot(injuries.mean[1:20],xaxt="n",main="Mean of the injuries by type of weather",ylab="Number of injuries")
    labs<-paste(rownames(injuries.mean[1:20]))
    text(cex=0.7,x=x-.1,y=-5,labs,xpd=TRUE,srt=90)

plot of chunk unnamed-chunk-2

Question2: Across the United States, which types of events have the greatest economic consequences?

(drop data before 1996 for this question)
Use PROPDMG and CROPDMG to rank the EVTYPE. We calculate the sum of the ecomonic loss per event.
###drop the data before 1996
    drop=strptime(data$BGN_DATE,format="%m/%d/%Y")
    drop=drop<'1996-01-01'
    data2=data[!drop,]
###drop the data if PROPDMG+CROPDMG=0
    bad1=data2$PROPDMG!=0
    bad2=data2$CROPDMG!=0
    bad3<-bad1+bad2
    bad.total=bad3==0
    data2=data2[!bad.total,]
###get the exp from PROPDMGEXP and CROPDMGEXP
    prop=data2$PROPDMG
    crop=data2$CROPDMG
    prop.k=data2$PROPDMGEXP=='K'
    prop.m=data2$PROPDMGEXP=='M'
    prop.b=data2$PROPDMGEXP=='B'
    crop.k=data2$CROPDMGEXP=='K'
    crop.m=data2$CROPDMGEXP=='M'
    crop.b=data2$CROPDMGEXP=='B'
    ####
    prop[prop.k]=prop[prop.k]*10^3
    prop[prop.m]=prop[prop.m]*10^6
    prop[prop.b]=prop[prop.b]*10^9
    crop[crop.k]=crop[crop.k]*10^3
    crop[crop.m]=crop[crop.m]*10^6
    crop[crop.b]=crop[crop.b]*10^9
    data2=cbind(data2,prop)
    data2=cbind(data2,crop)
###
    prop.mean=tapply(INDEX=data2$EVTYPE,FUN=sum,data2$prop)
    crop.mean=tapply(INDEX=data2$EVTYPE,FUN=sum,data2$crop)
###
     rank1=rank(prop.mean)
    rank2=rank(crop.mean)
    rank3.names=names(rank1)
    rank3.num=NULL
for (i in 1:length(rank1)) {
    rank1.cur=rank1[i]
    name.cur=names(rank1)[i]
    a=which(names(rank2)==name.cur)
    rank2.cur=rank2[a]
    rank3.cur=rank1.cur+rank2.cur
    rank3.num=c(rank3.num,rank3.cur)
}
    rank3=data.frame(rank3.names,rank3.num)
    answer=which(rank3[,2]==max(rank3[,2]))
    answer.2=rank3[answer,1]

Results

Q1. The type that is most harmful for public health is TSUNAMI
Q2. The type that have the greatest economic loss is WND