Synopsis

This analysis is based on the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. The purpose is to find out across the United States,

Data Processing

download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
              destfile = "StormData.csv.bz2")
storm <- read.csv("StormData.csv.bz2")

Results

1. Most health harmful types of events

library(dplyr)
library(scales)
sum_HARM <- storm %>%
    group_by(EVTYPE) %>%
    summarise(sum_HARM = sum(FATALITIES + INJURIES)) %>%
    arrange(desc(sum_HARM))

top10ratio_HARM <- sum(sum_HARM$sum_HARM[1:10]) / sum(sum_HARM$sum_HARM)
top10ratio_HARM
## [1] 0.8811868

Top 10 event types caused 88.1% of the whole health harm.

The list and grap of top 10 types of events:

head(sum_HARM, 10)
## # A tibble: 10 × 2
##               EVTYPE sum_HARM
##               <fctr>    <dbl>
## 1            TORNADO    96979
## 2     EXCESSIVE HEAT     8428
## 3          TSTM WIND     7461
## 4              FLOOD     7259
## 5          LIGHTNING     6046
## 6               HEAT     3037
## 7        FLASH FLOOD     2755
## 8          ICE STORM     2064
## 9  THUNDERSTORM WIND     1621
## 10      WINTER STORM     1527
library(ggplot2)
ggplot(head(sum_HARM, 10), 
       aes(x = reorder(EVTYPE, -sum_HARM), 
           y = sum_HARM,
           fill = EVTYPE)) + 
    geom_bar(stat = "identity") +
    labs(title = "Health Harms by Event Types", 
         x = "Event Type", y = "Health Harm") +
    theme(axis.text.x = element_text(size = 8, angle = 45),
          legend.position="none") +
    scale_y_continuous(labels = scientific)

2. Most economy damagable types of events

sum_DAMAGE <- storm %>%
    group_by(EVTYPE) %>%
    summarise(sum_DAMAGE = sum(PROPDMG + CROPDMG)) %>%
    arrange(desc(sum_DAMAGE))

top10ratio_DAMAGE <- sum(sum_DAMAGE$sum_DAMAGE[1:10]) / sum(sum_DAMAGE$sum_DAMAGE)
top10ratio_DAMAGE
## [1] 0.9121675

Top 10 event types caused 91.2% of the whole economic damage.

The list and grap of top 10 types of events:

head(sum_DAMAGE, 10)
## # A tibble: 10 × 2
##                EVTYPE sum_DAMAGE
##                <fctr>      <dbl>
## 1             TORNADO  3312276.7
## 2         FLASH FLOOD  1599325.1
## 3           TSTM WIND  1445168.2
## 4                HAIL  1268289.7
## 5               FLOOD  1067976.4
## 6   THUNDERSTORM WIND   943635.6
## 7           LIGHTNING   606932.4
## 8  THUNDERSTORM WINDS   464978.1
## 9           HIGH WIND   342014.8
## 10       WINTER STORM   134699.6
ggplot(head(sum_DAMAGE, 10), 
       aes(x = reorder(EVTYPE, -sum_DAMAGE), 
           y = sum_DAMAGE,
           fill = EVTYPE)) + 
    geom_bar(stat = "identity") +
    labs(title = "Economic Damages by Event Types", 
         x = "Event Type", y = "Economic Damage") +
    theme(axis.text.x = element_text(size = 8, angle = 45),
          legend.position="none") +
    scale_y_continuous(labels = scientific)