This analysis sought to explore the NOAA Storm Database and answer some basic questions about severe weather events: (i) across the United States, which types of events are most harmful with respect to population health, and (ii) across the United States, which types of events have the greatest economic consequences?
if(!file.exists('data.csv.bz2')){
download.file('https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2',
destfile='data.csv.bz2',method='curl')
}
data<-read.csv(bzfile('data.csv.bz2'),sep=',',header=T)
library(ggplot2)
data2<-aggregate(FATALITIES~EVTYPE,data,sum,na.rm=TRUE) ## sum up fatalities by event types
data3<-data2[order(-data2$FATALITIES),] ## order event types in descending order of fatalities
data3<-data3[1:10,] ## select top 10 only
ggplot(data3,aes(x=reorder(EVTYPE,-FATALITIES),y=FATALITIES))+geom_bar(stat='identity')+theme(axis.text.x=element_text(angle=90))+labs(x='Event type',y='Number of fatalities',caption='Top 10 fatalities by event types') ## create a barplot
Conclusion: Tornado causes by far the most fatalities among all the event types.
data[data$PROPDMGEXP=='H',]$PROPDMG<-data[data$PROPDMGEXP=='H',]$PROPDMG*10^2
data[data$PROPDMGEXP=='K',]$PROPDMG<-data[data$PROPDMGEXP=='K',]$PROPDMG*10^3
data[data$PROPDMGEXP=='M',]$PROPDMG<-data[data$PROPDMGEXP=='M',]$PROPDMG*10^6
data[data$PROPDMGEXP=='B',]$PROPDMG<-data[data$PROPDMGEXP=='B',]$PROPDMG*10^9
data[data$CROPDMGEXP=='H',]$CROPDMG<-data[data$CROPDMGEXP=='H',]$CROPDMG*10^2
data[data$CROPDMGEXP=='K',]$CROPDMG<-data[data$CROPDMGEXP=='K',]$CROPDMG*10^3
data[data$CROPDMGEXP=='M',]$CROPDMG<-data[data$CROPDMGEXP=='M',]$CROPDMG*10^6
data[data$CROPDMGEXP=='B',]$CROPDMG<-data[data$CROPDMGEXP=='B',]$CROPDMG*10^9
data4<-aggregate(CROPDMG+PROPDMG~EVTYPE,data,sum,na.rm=TRUE) ## sum up economic damages by event types
names(data4) = c('EVTYPE','TOTDAMAGE') ## update column names
data5<-data4[order(-data4$TOTDAMAGE),] ## order events in descending order of economic damages
data5<-data5[1:10,] ## select top 10 only
ggplot(data5,aes(x=reorder(EVTYPE,-TOTDAMAGE),y=TOTDAMAGE))+geom_bar(stat='identity')+theme(axis.text.x=element_text(angle=90))+labs(x='Event type',y='Economic damage ($)',caption='Top 10 economic damage by event types') ## create a barplot
Conclusion: Flood causes by far the most economic damage among all the event types.