This analysis is based on the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. The purpose is to find out across the United States,
which types of events are most harmful with respect to population health (fatalities and injuries), and
which types of events have the greatest economic consequences (property damages and crop damages).
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
destfile = "StormData.csv.bz2")
storm <- read.csv("StormData.csv.bz2")
library(dplyr)
library(scales)
sum_HARM <- storm %>%
group_by(EVTYPE) %>%
summarise(sum_HARM = sum(FATALITIES + INJURIES)) %>%
arrange(desc(sum_HARM))
top10ratio_HARM <- sum(sum_HARM$sum_HARM[1:10]) / sum(sum_HARM$sum_HARM)
top10ratio_HARM
## [1] 0.8811868
Top 10 event types caused 88.1% of the whole health harm.
The list and grap of top 10 types of events:
head(sum_HARM, 10)
## # A tibble: 10 × 2
## EVTYPE sum_HARM
## <fctr> <dbl>
## 1 TORNADO 96979
## 2 EXCESSIVE HEAT 8428
## 3 TSTM WIND 7461
## 4 FLOOD 7259
## 5 LIGHTNING 6046
## 6 HEAT 3037
## 7 FLASH FLOOD 2755
## 8 ICE STORM 2064
## 9 THUNDERSTORM WIND 1621
## 10 WINTER STORM 1527
library(ggplot2)
ggplot(head(sum_HARM, 10),
aes(x = reorder(EVTYPE, -sum_HARM),
y = sum_HARM,
fill = EVTYPE)) +
geom_bar(stat = "identity") +
labs(title = "Health Harms by Event Types",
x = "Event Type", y = "Health Harm") +
theme(axis.text.x = element_text(size = 8, angle = 45),
legend.position="none") +
scale_y_continuous(labels = scientific)
sum_DAMAGE <- storm %>%
group_by(EVTYPE) %>%
summarise(sum_DAMAGE = sum(PROPDMG + CROPDMG)) %>%
arrange(desc(sum_DAMAGE))
top10ratio_DAMAGE <- sum(sum_DAMAGE$sum_DAMAGE[1:10]) / sum(sum_DAMAGE$sum_DAMAGE)
top10ratio_DAMAGE
## [1] 0.9121675
Top 10 event types caused 91.2% of the whole economic damage.
The list and grap of top 10 types of events:
head(sum_DAMAGE, 10)
## # A tibble: 10 × 2
## EVTYPE sum_DAMAGE
## <fctr> <dbl>
## 1 TORNADO 3312276.7
## 2 FLASH FLOOD 1599325.1
## 3 TSTM WIND 1445168.2
## 4 HAIL 1268289.7
## 5 FLOOD 1067976.4
## 6 THUNDERSTORM WIND 943635.6
## 7 LIGHTNING 606932.4
## 8 THUNDERSTORM WINDS 464978.1
## 9 HIGH WIND 342014.8
## 10 WINTER STORM 134699.6
ggplot(head(sum_DAMAGE, 10),
aes(x = reorder(EVTYPE, -sum_DAMAGE),
y = sum_DAMAGE,
fill = EVTYPE)) +
geom_bar(stat = "identity") +
labs(title = "Economic Damages by Event Types",
x = "Event Type", y = "Economic Damage") +
theme(axis.text.x = element_text(size = 8, angle = 45),
legend.position="none") +
scale_y_continuous(labels = scientific)