The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events.THe data analysis must address the following questions:

Data Downloading

if(!file.exists("project")){
      dir.create("project")
}
setwd("./project")
getwd()
## [1] "C:/Users/Houying/Documents/R/20180915/project"
#Download the data
URL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(URL,destfile = "./data.bz2")

Data Processing

# Read in the file
data <- read.csv("data.bz2")

#Group the data by Event Type and summarize the total FATALITIES and INJURIES for each group, and then get the top five events
library(dplyr)
summarybyhealth <- group_by(data,EVTYPE) %>% summarise(FATALITIES=sum(FATALITIES),INJURIES=sum(INJURIES)) 
topfatalities <- head(arrange(summarybyhealth,desc(FATALITIES)),5)
topinjuiries <- head(arrange(summarybyhealth,desc(INJURIES)),5)

#Group the data by Event Type and summarize the total economic consequencies for each group, and then get the top five events
summarybyeconomic <- group_by(data,EVTYPE) %>% summarise(economicconsequences=sum(PROPDMG)+sum(CROPDMG)) 
topdmg <- head(arrange(summarybyeconomic,desc(economicconsequences)),5)

Results

# Display the top five events causing most fatalities and injuries and generate a bar chart as well
par(mfrow=c(2,1))
print("The top5 events causing most fatalities and injuries are shown as follows:")
## [1] "The top5 events causing most fatalities and injuries are shown as follows:"
topfatalities
## # A tibble: 5 x 3
##   EVTYPE         FATALITIES INJURIES
##   <fct>               <dbl>    <dbl>
## 1 TORNADO              5633    91346
## 2 EXCESSIVE HEAT       1903     6525
## 3 FLASH FLOOD           978     1777
## 4 HEAT                  937     2100
## 5 LIGHTNING             816     5230
topinjuiries
## # A tibble: 5 x 3
##   EVTYPE         FATALITIES INJURIES
##   <fct>               <dbl>    <dbl>
## 1 TORNADO              5633    91346
## 2 TSTM WIND             504     6957
## 3 FLOOD                 470     6789
## 4 EXCESSIVE HEAT       1903     6525
## 5 LIGHTNING             816     5230
attach(topfatalities)
barplot(height =FATALITIES,names.arg = EVTYPE,main ="Top Events Causing Fatalities",xlab="Events",cex.names = 0.7)
detach(topfatalities)
attach(topinjuiries)
barplot(height =INJURIES,names.arg = EVTYPE,main ="Top Events Causing Injuiry",xlab="Events",cex.names = 0.7)

detach(topinjuiries)
print("It is shown from the plots above that the type of events that are most harmful to population health is TORNADO")
## [1] "It is shown from the plots above that the type of events that are most harmful to population health is TORNADO"
# Display the top five events causing highest economic consequencies and generate a bar chart as well
par(mfrow=c(1,1))
print("The top5 events having greatest economic consequences are shown as follows:")
## [1] "The top5 events having greatest economic consequences are shown as follows:"
topdmg
## # A tibble: 5 x 2
##   EVTYPE      economicconsequences
##   <fct>                      <dbl>
## 1 TORNADO                 3312277.
## 2 FLASH FLOOD             1599325.
## 3 TSTM WIND               1445168.
## 4 HAIL                    1268290.
## 5 FLOOD                   1067976.
attach(topdmg)
barplot(height =economicconsequences,names.arg = EVTYPE,main ="Top Events Causing Economic Consequences",xlab="Events",cex.names = 0.7)

detach(topdmg)

print("It is shown from the plot above that the type of events that have greatest economic consequences is TORNADO")
## [1] "It is shown from the plot above that the type of events that have greatest economic consequences is TORNADO"
setwd("../")