This project involves exploring the U.S. National Oceanic and Atmospheric Administration's (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
The main goal is to answer the following two questions:
Across the United States, which types of events are most harmful with respect to population health?
Across the United States, which types of events have the greatest economic consequences?
Load required library
library(ggplot2)
Processing data
if (!file.exists("Storm_Data.csv.bz2")) {
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv \n .bz2",
"Storm_Data.csv.bz2", method = "curl")
}
table = read.csv("Storm_Data.csv.bz2")
columns <- c("EVTYPE", "INJURIES", "FATALITIES", "PROPDMG")
filter_table <- table[, columns]
result <- aggregate(cbind(INJURIES, FATALITIES, PROPDMG) ~ EVTYPE, data = filter_table,
sum)
result$EVTYPE <- as.character(result$EVTYPE)
Get the total number of different event types:
length(unique(result$EVTYPE))
## [1] 977
Get the top 10 harmful weathers from all types:
injuries_top10 <- result[order(result$INJURIES, decreasing = T), c("EVTYPE",
"INJURIES")][1:10, ]
fatalities_top10 <- result[order(result$FATALITIES, decreasing = T), c("EVTYPE",
"FATALITIES")][1:10, ]
dmg_top10 <- result[order(result$PROPDMG, decreasing = T), c("EVTYPE", "PROPDMG")][1:10,
]
Top 10 events cause injuries:
plot1 <- ggplot(injuries_top10, aes(reorder(injuries_top10$EVTYPE, injuries_top10$INJURIES),
INJURIES)) + geom_bar(stat = "identity", fill = "steelblue") + theme(axis.text.x = element_text(angle = -90)) +
labs(x = "Events", title = "Top 10 events cause injuries")
print(plot1)
rownames(injuries_top10) <- injuries_top10$EVTYPE
injuries_top10
## EVTYPE INJURIES
## TORNADO TORNADO 80690
## TSTM WIND TSTM WIND 6683
## FLOOD FLOOD 6606
## EXCESSIVE HEAT EXCESSIVE HEAT 4652
## LIGHTNING LIGHTNING 4022
## ICE STORM ICE STORM 1963
## FLASH FLOOD FLASH FLOOD 1442
## HURRICANE/TYPHOON HURRICANE/TYPHOON 1272
## WINTER STORM WINTER STORM 1255
## HAIL HAIL 1162
Top 10 events cause fatalities:
plot2 <- ggplot(fatalities_top10, aes(reorder(fatalities_top10$EVTYPE, fatalities_top10$FATALITIES),
FATALITIES)) + geom_bar(stat = "identity", fill = "steelblue") + theme(axis.text.x = element_text(angle = -90)) +
labs(x = "Events", title = "Top 10 events cause fatalities")
print(plot2)
rownames(fatalities_top10) <- NULL
fatalities_top10
## EVTYPE FATALITIES
## 1 TORNADO 4700
## 2 EXCESSIVE HEAT 1527
## 3 HEAT 708
## 4 FLASH FLOOD 622
## 5 LIGHTNING 608
## 6 TSTM WIND 488
## 7 FLOOD 287
## 8 RIP CURRENTS 204
## 9 HIGH WIND 199
## 10 WINTER STORM 182
Top 10 events cause lossing properties:
plot3 <- ggplot(dmg_top10, aes(reorder(dmg_top10$EVTYPE, dmg_top10$PROPDMG),
PROPDMG)) + geom_bar(stat = "identity", fill = "steelblue") + theme(axis.text.x = element_text(angle = -90)) +
labs(x = "Events", title = "Top 10 events cause lossing properties")
print(plot3)
rownames(dmg_top10) <- NULL
dmg_top10
## EVTYPE PROPDMG
## 1 TORNADO 2653286
## 2 TSTM WIND 1195630
## 3 FLASH FLOOD 845838
## 4 THUNDERSTORM WINDS 446293
## 5 HAIL 441428
## 6 FLOOD 409389
## 7 LIGHTNING 376062
## 8 HIGH WIND 186278
## 9 HEAVY SNOW 99542
## 10 WINTER STORM 71162