Synopsis

Severe weather events including storms can cause damage on public health and economy. Based on an available database originally from U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database, ranging from 1950 to 2011, the project is to find out the following two main questons: 1. Which types of events are most harmful to pubic health? 2. Which types of events have greatest economic consequences? The results would be showed in the Knit HTML document generated throuhg the .Rmd document and published on RPubs.com.

Data Processing

1. Download and read the .csv raw data

dir.create("RR Project")
## Warning in dir.create("RR Project"): 'RR Project' already exists
URL<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(URL,"./RR Project/Storm Data.csv.bz2")
rawdata<- read.csv("./RR Project/Storm Data.csv.bz2")

2.Process data

data<-data.frame(rawdata[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")],stringsAsFactors=F)
data[] <- lapply(data,as.character)
#Replace character letter into number, "+","-","","?"=0, and K=1e3, M=1e6, B=1e9
data$PROPDMGEXP[data$PROPDMGEXP == c("K")] <- 3
data$PROPDMGEXP[data$PROPDMGEXP == c("k")] <- 3
data$PROPDMGEXP[data$PROPDMGEXP == c("M")] <- 6
data$PROPDMGEXP[data$PROPDMGEXP == c("m")] <- 6
data$PROPDMGEXP[data$PROPDMGEXP == c("B")] <- 9
data$PROPDMGEXP[data$PROPDMGEXP == c("b")] <- 9
data$PROPDMGEXP[data$PROPDMGEXP == c("h")] <- 1
data$PROPDMGEXP[data$PROPDMGEXP == c("H")] <- 1
data$PROPDMGEXP[data$PROPDMGEXP == c("+")] <- 0
data$PROPDMGEXP[data$PROPDMGEXP == c("-")] <- 0
data$PROPDMGEXP[data$PROPDMGEXP == c("?")] <- 0
data$PROPDMGEXP[data$PROPDMGEXP == c("")] <- 0
data$PROPDMGEXP<- 1*10^(as.numeric(data$PROPDMGEXP))
#The same replacement for CROPDMGEXP
data$CROPDMGEXP[data$CROPDMGEXP == c("")] <- 0
data$CROPDMGEXP[data$CROPDMGEXP == c("?")] <- 0
data$CROPDMGEXP[data$CROPDMGEXP == c("K")] <- 3
data$CROPDMGEXP[data$CROPDMGEXP == c("k")] <- 3
data$CROPDMGEXP[data$CROPDMGEXP == c("M")] <- 6
data$CROPDMGEXP[data$CROPDMGEXP == c("m")] <- 6
data$CROPDMGEXP[data$CROPDMGEXP == c("B")] <- 9
data$CROPDMGEXP[data$CROPDMGEXP == c("b")] <- 9
data$CROPDMGEXP<- 1*10^(as.numeric(data$CROPDMGEXP))
#Get the dataframe
data$FATALITIES<- as.numeric(data$FATALITIES)
data$INJURIES<- as.numeric(data$INJURIES)
data$PROPDMG<- as.numeric(data$PROPDMG)
data$CROPDMG<- as.numeric(data$CROPDMG)

df<- aggregate(data[-1],by=list(data$EVTYPE),sum)

Results

1.Damage rank on health

Assume 1 fatalities is equal to 10 injuries (only for the purpose of this assignment and has no meaning)

df$health <- df$FATALITIES+10*df$INJURIES
healthdamage<- df[order(df$health,decreasing = T),]
hd5<-healthdamage[1:5,]
barplot(hd5$health,names.arg=hd5$Group.1)

x<-hd5$Group.1[1]
print(x)
## [1] "TORNADO"

So, the most harmful event is Tornado

2.Damage rank on health

Assume all affacts on economy can be equally valued by money in all of the four variables of PROPDMG, PROPDMGEXP CROPDMG CROPDMGEXP.

df$economy <- df$PROPDMG+df$PROPDMGEXP+df$CROPDMG+df$CROPDMGEXP
ecodamage<- df[order(df$economy,decreasing = T),]
eco5<-ecodamage[1:5,]
barplot(eco5$economy,names.arg=eco5$Group.1)

y<-eco5$Group.1[1]

So, the most harmful event is Hurricane/Typhoon