Synopsis

The purpose of this brief study is to determine which events cause most damage to 1) populaton health and 2) economic consequences. The data used in the study can be obtained from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. The database provides facts about major storms and weather events; the facts include the time, location, estimated fatalities, injuries, and property damage. The study answers two following questions:

  1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

  2. Across the United States, which types of events have the greatest economic consequences?

Data Processing

Load the necessary library

library(dplyr); library(ggplot2); library(knitr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Warning: package 'ggplot2' was built under R version 3.2.2
## Warning: package 'knitr' was built under R version 3.2.2

Load the data

file <- "./Coursera_5. Reproducible Research/repdata-data-StormData.csv/repdata-data-StormData.csv"
df <- read.csv(file, header = TRUE, stringsAsFactors = FALSE)

Process the data for question #1.

#Extract necessary data
df1<- select(df, EVTYPE, FATALITIES, INJURIES)
df1 <- na.omit(df1)

#Calculate the total damage to population by events and arrange the rows in descending order.
dmgpop <- aggregate(FATALITIES + INJURIES ~ EVTYPE, df1, sum)
names(dmgpop) <- c("EVTYPE", "DMGTOPOP")
dmgpop <- arrange(dmgpop, desc(DMGTOPOP))

Process the data for question #2.

#Extract necessary data
df2<- select(df, EVTYPE, PROPDMG, PROPDMGEXP)
df2 <- na.omit(df2)

# Combine PROPDMG and PROPDMGEXP.
df2$PROPDMGEXP <- gsub("[M-m]", "6", df2$PROPDMGEXP)
df2$PROPDMGEXP <- gsub("[K-k]", "3", df2$PROPDMGEXP)
df2$PROPDMGEXP <- gsub("[H-h]", "2", df2$PROPDMGEXP)
df2$PROPDMGEXP <- gsub("[B-b]", "9", df2$PROPDMGEXP)
df2$PROPDMGEXP <- as.integer(df2$PROPDMGEXP)
## Warning: NAs introduced by coercion
df2$PROPDMGEXP[is.na(df2$PROPDMGEXP)] <- 0

combineEXP <- function(x, y){
    x = x * 10^y
    return(x)
}
df2$PROPDMG <- combineEXP(df2$PROPDMG, df2$PROPDMGEXP)
df2$PROPDMGEXP <- NULL

#Calculate the total property damage by events and arrange the rows in descending order.
propdmg <- aggregate(PROPDMG ~ EVTYPE, df2, sum)
propdmg <- arrange(propdmg, desc(PROPDMG))

Results

For Question #1: The top 10 events that cause most damage to populaton health and its bar plot are shown below

dmgpop[1:10,]
##               EVTYPE DMGTOPOP
## 1            TORNADO    96979
## 2     EXCESSIVE HEAT     8428
## 3          TSTM WIND     7461
## 4              FLOOD     7259
## 5          LIGHTNING     6046
## 6               HEAT     3037
## 7        FLASH FLOOD     2755
## 8          ICE STORM     2064
## 9  THUNDERSTORM WIND     1621
## 10      WINTER STORM     1527
ggplot(dmgpop[1:10,], aes(x = EVTYPE, y = DMGTOPOP)) + geom_bar(stat = "identity") +
    theme(axis.text.x = element_text(angle = 90))

For Question #2: The top 10 events that cause most property damage and its bar plot are shown below

propdmg[1:10,]
##               EVTYPE      PROPDMG
## 1              FLOOD 144657709807
## 2  HURRICANE/TYPHOON  69305840000
## 3            TORNADO  56947380677
## 4        STORM SURGE  43323536000
## 5        FLASH FLOOD  16822673979
## 6               HAIL  15735267513
## 7          HURRICANE  11868319010
## 8     TROPICAL STORM   7703890550
## 9       WINTER STORM   6688497251
## 10         HIGH WIND   5270046295
ggplot(propdmg[1:10,], aes(x = EVTYPE, y = PROPDMG)) + geom_bar(stat = "identity") +
    theme(axis.text.x = element_text(angle = 90))