This analysis explores the U.S. National Oceanic and Atmospheric Administration (NOAA) Storm Database. The goal is to identify (1) which event types are most harmful to population health and (2) which event types have the greatest economic consequences. The findings can help prioritize resources for severe weather preparedness.
library(dplyr)
library(ggplot2)
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
dest <- "StormData.csv.bz2"
if (!file.exists(dest)) {
download.file(url, dest, mode = "wb")
}
storm <- read.csv(dest, stringsAsFactors = FALSE)
dim(storm)
## [1] 902297 37
storm2 <- storm %>%
select(EVTYPE, FATALITIES, INJURIES,
PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
exp_map <- function(x) {
x <- toupper(x)
ifelse(x == "K", 1e3,
ifelse(x == "M", 1e6,
ifelse(x == "B", 1e9, 1)))
}
storm2$PROPDMG_REAL <- storm2$PROPDMG * exp_map(storm2$PROPDMGEXP)
storm2$CROPDMG_REAL <- storm2$CROPDMG * exp_map(storm2$CROPDMGEXP)
storm2$ECONOMIC_DMG <- storm2$PROPDMG_REAL + storm2$CROPDMG_REAL
summary(storm2$ECONOMIC_DMG)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00e+00 0.00e+00 0.00e+00 5.28e+05 1.00e+03 1.15e+11
## Most harmful events to population health
health_impact <- storm2 %>%
group_by(EVTYPE) %>%
summarise(
fatalities = sum(FATALITIES, na.rm = TRUE),
injuries = sum(INJURIES, na.rm = TRUE),
total = fatalities + injuries
) %>%
arrange(desc(total)) %>%
slice(1:10)
health_impact
## # A tibble: 10 × 4
## EVTYPE fatalities injuries total
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
ggplot(health_impact, aes(x = reorder(EVTYPE, total), y = total)) +
geom_col() +
coord_flip() +
labs(
title = "Top 10 Weather Events Most Harmful to Population Health",
x = "Event Type",
y = "Total Fatalities and Injuries"
)
## Events with greatest economic consequences
economic_impact <- storm2 %>%
group_by(EVTYPE) %>%
summarise(total_damage = sum(ECONOMIC_DMG, na.rm = TRUE)) %>%
arrange(desc(total_damage)) %>%
slice(1:10)
economic_impact
## # A tibble: 10 × 2
## EVTYPE total_damage
## <chr> <dbl>
## 1 FLOOD 150319678257
## 2 HURRICANE/TYPHOON 71913712800
## 3 TORNADO 57352114049.
## 4 STORM SURGE 43323541000
## 5 HAIL 18758221521.
## 6 FLASH FLOOD 17562129167.
## 7 DROUGHT 15018672000
## 8 HURRICANE 14610229010
## 9 RIVER FLOOD 10148404500
## 10 ICE STORM 8967041360
ggplot(economic_impact, aes(x = reorder(EVTYPE, total_damage), y = total_damage / 1e9)) +
geom_col() +
coord_flip() +
labs(
title = "Top 10 Weather Events Causing Greatest Economic Damage",
x = "Event Type",
y = "Total Economic Damage (Billion USD)"
)