Synposis

This report is the anylysis about the Storms and other severe weather events can cause both public health and economic problems using the data published by NOAA, in the final results, concluding that wich whether event is most harmful related to population healthy and which one causes the greatest economic consequences. The fooloowing two figures only show the top 10 weather events which most harmful.

Data Processing

knitr::opts_chunk$set(fig.width=15, fig.height=10) Read the data into a csv file

 rawdata<-read.csv("repdata%2Fdata%2FStormData.csv.bz2")

Extract the data related the population healthy and draw thw figure

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(gridExtra)
## Loading required package: gridExtra
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
question1a<-aggregate(FATALITIES ~ EVTYPE, rawdata,sum)
question1b<-aggregate(INJURIES ~ EVTYPE, rawdata,sum)
question1<-aggregate(INJURIES+FATALITIES ~ EVTYPE, rawdata, sum)
question1ay<-question1a%>%arrange(desc(FATALITIES))
question1by<-question1b%>%arrange(desc(INJURIES))
question1y<-question1%>%arrange(desc(`INJURIES + FATALITIES`))
question1atop10<-question1ay[1:10,]
question1btop10<-question1by[1:10,]
question1top10<-question1y[1:10,]
question1atop10<- question1atop10 %>% mutate(rank1 =ifelse(EVTYPE=="TORNADO","yes","no"))
question1btop10<- question1btop10 %>% mutate(rank1 =ifelse(EVTYPE=="TORNADO","yes","no"))
question1top10<- question1top10 %>% mutate(rank1 =ifelse(EVTYPE=="TORNADO","yes","no"))
p11<-ggplot(question1atop10,aes(EVTYPE,FATALITIES,fill=rank1))+geom_bar(stat="identity")+scale_fill_manual(values = c( "yes"="tomato", "no"="gray" ), guide = FALSE )+coord_flip()
p12<-ggplot(question1btop10,aes(EVTYPE,INJURIES,fill=rank1))+geom_bar(stat="identity")+scale_fill_manual(values = c( "yes"="tomato", "no"="gray" ), guide = FALSE )+coord_flip()
p13<-ggplot(question1top10,aes(EVTYPE,`INJURIES + FATALITIES`,fill=rank1))+geom_bar(stat="identity")+scale_fill_manual(values = c( "yes"="tomato", "no"="gray" ), guide = FALSE )+coord_flip()
grid.arrange(p11,p12,p13) 

As the figure shows,the “TORNADO” is the most harmful with respect to population health, which causes the injuries and fatalities 95978.

Extract the data related the economic damage.

require(gridExtra)
x<-data.frame(rawdata)
x$PROPDMGEXP<-toupper(x$PROPDMGEXP)
x$CROPDMGEXP<-toupper(x$CROPDMGEXP)
x$PROPDMGEXP<-factor(x$PROPDMGEXP,levels = list("K","M","B"),labels = list(10^3,10^6,10^9))
x$CROPDMGEXP<-factor(x$CROPDMGEXP,levels = list("K","M","B"),labels = list(10^3,10^6,10^9))
x$PROPDMGV<-mapply(function(a,b) a*b,x$PROPDMG,as.numeric(levels(x$PROPDMGEXP))[x$PROPDMGEXP])
x$CROPDMGV<-mapply(function(a,b) a*b,x$CROPDMG,as.numeric(levels(x$CROPDMGEXP))[x$CROPDMGEXP])
x[is.na(x$PROPDMGV),]$PROPDMGV<-0
x[is.na(x$CROPDMGV),]$CROPDMGV<-0
question2a<-aggregate(PROPDMGV ~ EVTYPE, x,sum)
question2b<-aggregate(CROPDMGV ~ EVTYPE, x,sum)
question2<-aggregate(PROPDMGV+CROPDMGV~ EVTYPE,x,sum)
question2ay<-question2a%>%arrange(desc(PROPDMGV))
question2by<-question2b%>%arrange(desc(CROPDMGV))
question2y<-question2%>%arrange(desc(`PROPDMGV + CROPDMGV`))
question2atop10<-question2ay[1:10,]
question2btop10<-question2by[1:10,]
question2ytop10<-question2y[1:10,]
question2atop10<- question2atop10 %>% mutate(rank1 =ifelse(EVTYPE=="FLOOD","yes","no"))
question2btop10<- question2btop10 %>% mutate(rank1 =ifelse(EVTYPE=="DROUGHT","yes","no"))
question2ytop10<- question2ytop10 %>% mutate(rank1 =ifelse(EVTYPE=="FLOOD","yes","no"))
p1<-ggplot(question2btop10,aes(EVTYPE,CROPDMGV,fill=rank1))+geom_bar(stat="identity")+scale_y_log10()+coord_flip()+scale_fill_manual(values = c( "yes"="tomato", "no"="gray" ),guide = FALSE)
p2<-ggplot(question2atop10,aes(EVTYPE,PROPDMGV,fill=rank1))+geom_bar(stat="identity")+scale_y_log10()+coord_flip()+scale_fill_manual(values = c( "yes"="tomato", "no"="gray" ),guide =FALSE)
p3<-ggplot(question2ytop10,aes(EVTYPE,`PROPDMGV + CROPDMGV`,fill=rank1))+geom_bar(stat="identity")+scale_y_log10()+scale_fill_manual(values = c( "yes"="tomato", "no"="gray" ),guide = FALSE)+coord_flip()
grid.arrange(p1,p2,p3)

The above figure related to economic consequences only shows the top 10 harmful weather events.

Results

For question1, the “TORNADO” is the most harmful with respect to population health, which causes the injuries and fatalities 95978.
For question2, the “Flood” causes the most total damage $1.5e+11.