The purpose of this brief study is to determine which events cause most damage to 1) populaton health and 2) economic consequences. The data used in the study can be obtained from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. The database provides facts about major storms and weather events; the facts include the time, location, estimated fatalities, injuries, and property damage. The study answers two following questions:
Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
Across the United States, which types of events have the greatest economic consequences?
Load the necessary library
library(dplyr); library(ggplot2); library(knitr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'ggplot2' was built under R version 3.2.2
## Warning: package 'knitr' was built under R version 3.2.2
Load the data
file <- "./Coursera_5. Reproducible Research/repdata-data-StormData.csv/repdata-data-StormData.csv"
df <- read.csv(file, header = TRUE, stringsAsFactors = FALSE)
Process the data for question #1.
#Extract necessary data
df1<- select(df, EVTYPE, FATALITIES, INJURIES)
df1 <- na.omit(df1)
#Calculate the total damage to population by events and arrange the rows in descending order.
dmgpop <- aggregate(FATALITIES + INJURIES ~ EVTYPE, df1, sum)
names(dmgpop) <- c("EVTYPE", "DMGTOPOP")
dmgpop <- arrange(dmgpop, desc(DMGTOPOP))
Process the data for question #2.
#Extract necessary data
df2<- select(df, EVTYPE, PROPDMG, PROPDMGEXP)
df2 <- na.omit(df2)
# Combine PROPDMG and PROPDMGEXP.
df2$PROPDMGEXP <- gsub("[M-m]", "6", df2$PROPDMGEXP)
df2$PROPDMGEXP <- gsub("[K-k]", "3", df2$PROPDMGEXP)
df2$PROPDMGEXP <- gsub("[H-h]", "2", df2$PROPDMGEXP)
df2$PROPDMGEXP <- gsub("[B-b]", "9", df2$PROPDMGEXP)
df2$PROPDMGEXP <- as.integer(df2$PROPDMGEXP)
## Warning: NAs introduced by coercion
df2$PROPDMGEXP[is.na(df2$PROPDMGEXP)] <- 0
combineEXP <- function(x, y){
x = x * 10^y
return(x)
}
df2$PROPDMG <- combineEXP(df2$PROPDMG, df2$PROPDMGEXP)
df2$PROPDMGEXP <- NULL
#Calculate the total property damage by events and arrange the rows in descending order.
propdmg <- aggregate(PROPDMG ~ EVTYPE, df2, sum)
propdmg <- arrange(propdmg, desc(PROPDMG))
For Question #1: The top 10 events that cause most damage to populaton health and its bar plot are shown below
dmgpop[1:10,]
## EVTYPE DMGTOPOP
## 1 TORNADO 96979
## 2 EXCESSIVE HEAT 8428
## 3 TSTM WIND 7461
## 4 FLOOD 7259
## 5 LIGHTNING 6046
## 6 HEAT 3037
## 7 FLASH FLOOD 2755
## 8 ICE STORM 2064
## 9 THUNDERSTORM WIND 1621
## 10 WINTER STORM 1527
ggplot(dmgpop[1:10,], aes(x = EVTYPE, y = DMGTOPOP)) + geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90))
For Question #2: The top 10 events that cause most property damage and its bar plot are shown below
propdmg[1:10,]
## EVTYPE PROPDMG
## 1 FLOOD 144657709807
## 2 HURRICANE/TYPHOON 69305840000
## 3 TORNADO 56947380677
## 4 STORM SURGE 43323536000
## 5 FLASH FLOOD 16822673979
## 6 HAIL 15735267513
## 7 HURRICANE 11868319010
## 8 TROPICAL STORM 7703890550
## 9 WINTER STORM 6688497251
## 10 HIGH WIND 5270046295
ggplot(propdmg[1:10,], aes(x = EVTYPE, y = PROPDMG)) + geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90))