The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events.THe data analysis must address the following questions:
if(!file.exists("project")){
dir.create("project")
}
setwd("./project")
getwd()
## [1] "C:/Users/Houying/Documents/R/20180915/project"
#Download the data
URL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(URL,destfile = "./data.bz2")
# Read in the file
data <- read.csv("data.bz2")
#Group the data by Event Type and summarize the total FATALITIES and INJURIES for each group, and then get the top five events
library(dplyr)
summarybyhealth <- group_by(data,EVTYPE) %>% summarise(FATALITIES=sum(FATALITIES),INJURIES=sum(INJURIES))
topfatalities <- head(arrange(summarybyhealth,desc(FATALITIES)),5)
topinjuiries <- head(arrange(summarybyhealth,desc(INJURIES)),5)
#Group the data by Event Type and summarize the total economic consequencies for each group, and then get the top five events
summarybyeconomic <- group_by(data,EVTYPE) %>% summarise(economicconsequences=sum(PROPDMG)+sum(CROPDMG))
topdmg <- head(arrange(summarybyeconomic,desc(economicconsequences)),5)
# Display the top five events causing most fatalities and injuries and generate a bar chart as well
par(mfrow=c(2,1))
print("The top5 events causing most fatalities and injuries are shown as follows:")
## [1] "The top5 events causing most fatalities and injuries are shown as follows:"
topfatalities
## # A tibble: 5 x 3
## EVTYPE FATALITIES INJURIES
## <fct> <dbl> <dbl>
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 FLASH FLOOD 978 1777
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
topinjuiries
## # A tibble: 5 x 3
## EVTYPE FATALITIES INJURIES
## <fct> <dbl> <dbl>
## 1 TORNADO 5633 91346
## 2 TSTM WIND 504 6957
## 3 FLOOD 470 6789
## 4 EXCESSIVE HEAT 1903 6525
## 5 LIGHTNING 816 5230
attach(topfatalities)
barplot(height =FATALITIES,names.arg = EVTYPE,main ="Top Events Causing Fatalities",xlab="Events",cex.names = 0.7)
detach(topfatalities)
attach(topinjuiries)
barplot(height =INJURIES,names.arg = EVTYPE,main ="Top Events Causing Injuiry",xlab="Events",cex.names = 0.7)
detach(topinjuiries)
print("It is shown from the plots above that the type of events that are most harmful to population health is TORNADO")
## [1] "It is shown from the plots above that the type of events that are most harmful to population health is TORNADO"
# Display the top five events causing highest economic consequencies and generate a bar chart as well
par(mfrow=c(1,1))
print("The top5 events having greatest economic consequences are shown as follows:")
## [1] "The top5 events having greatest economic consequences are shown as follows:"
topdmg
## # A tibble: 5 x 2
## EVTYPE economicconsequences
## <fct> <dbl>
## 1 TORNADO 3312277.
## 2 FLASH FLOOD 1599325.
## 3 TSTM WIND 1445168.
## 4 HAIL 1268290.
## 5 FLOOD 1067976.
attach(topdmg)
barplot(height =economicconsequences,names.arg = EVTYPE,main ="Top Events Causing Economic Consequences",xlab="Events",cex.names = 0.7)
detach(topdmg)
print("It is shown from the plot above that the type of events that have greatest economic consequences is TORNADO")
## [1] "It is shown from the plot above that the type of events that have greatest economic consequences is TORNADO"
setwd("../")