We analyzed the NOAA Storm Events Database (1950–2011) to identify which types of weather events caused the greatest harm to population health and the highest economic losses in the United States. After cleaning and standardizing event labels, we converted property and crop damage figures into USD using exponent codes. Tornadoes emerged as the most harmful to human health, responsible for the highest combined fatalities and injuries. In contrast, floods, hurricanes, and storm surges caused the most economic damage, with property losses far outweighing crop losses. These findings highlight the need for targeted disaster preparedness and mitigation strategies based on event-specific risk profiles.
# Download raw compressed CSV if missing
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
bz <- "StormData.csv.bz2"
if (!file.exists(bz)) download.file(url, destfile = bz, mode = "wb", quiet = TRUE)
# Read directly from compressed file
dat <- read.csv(bz, stringsAsFactors = FALSE)
# Keep only columns we need
dat <- dat %>%
select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
# Quick structure and basic NA check for reproducibility log
list(
n_rows = nrow(dat),
n_cols = ncol(dat),
na_counts = sapply(dat, function(x) sum(is.na(x)))
)
## $n_rows
## [1] 902297
##
## $n_cols
## [1] 7
##
## $na_counts
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 0 0 0 0 0 0 0
# Normalize event type (trim and upper-case)
dat <- dat %>%
mutate(EVTYPE = str_to_upper(str_trim(EVTYPE)))
# Map exponent codes to numeric multipliers
exp_map <- function(x) {
x <- str_to_upper(str_trim(as.character(x)))
dplyr::case_when(
x == "K" ~ 1e3,
x == "M" ~ 1e6,
x == "B" ~ 1e9,
x %in% c("", NA) ~ 1,
str_detect(x, "^[0-9]$") ~ 10^as.numeric(x), # rare numeric codes
TRUE ~ 1 # treat unknown conservatively as 1
)
}
dat <- dat %>%
mutate(
PROP_MULT = exp_map(PROPDMGEXP),
CROP_MULT = exp_map(CROPDMGEXP),
PROP_DMG_USD = PROPDMG * PROP_MULT,
CROP_DMG_USD = CROPDMG * CROP_MULT,
ECON_DMG_USD = PROP_DMG_USD + CROP_DMG_USD
)
# Sanity checks
stopifnot(all(dat$PROP_MULT >= 1, na.rm = TRUE))
stopifnot(all(dat$CROP_MULT >= 1, na.rm = TRUE))
summary(select(dat, FATALITIES, INJURIES, PROP_DMG_USD, CROP_DMG_USD))
## FATALITIES INJURIES PROP_DMG_USD
## Min. : 0.00000 Min. : 0.0000 Min. :0.000e+00
## 1st Qu.: 0.00000 1st Qu.: 0.0000 1st Qu.:0.000e+00
## Median : 0.00000 Median : 0.0000 Median :0.000e+00
## Mean : 0.01678 Mean : 0.1557 Mean :4.746e+05
## 3rd Qu.: 0.00000 3rd Qu.: 0.0000 3rd Qu.:5.000e+02
## Max. :583.00000 Max. :1700.0000 Max. :1.150e+11
## CROP_DMG_USD
## Min. :0.000e+00
## 1st Qu.:0.000e+00
## Median :0.000e+00
## Mean :5.442e+04
## 3rd Qu.:0.000e+00
## Max. :5.000e+09
health <- dat %>%
group_by(EVTYPE) %>%
summarise(
FATALITIES = sum(FATALITIES, na.rm = TRUE),
INJURIES = sum(INJURIES, na.rm = TRUE),
TOTAL_HARM = FATALITIES + INJURIES,
.groups = "drop"
) %>%
arrange(desc(TOTAL_HARM)) %>%
slice_head(n = 10)
health_long <- health %>%
pivot_longer(c(FATALITIES, INJURIES), names_to = "type", values_to = "count")
ggplot(health_long, aes(x = reorder(EVTYPE, TOTAL_HARM), y = count, fill = type)) +
geom_col() +
coord_flip() +
scale_fill_manual(values = c("FATALITIES" = "#cb181d", "INJURIES" = "#3182bd")) +
labs(x = "Event type", y = "People affected", fill = "", title = "Population health impact: Top 10 event types") +
theme_minimal(base_size = 12)
Top 10 event types by total injuries and fatalities (1950–2011).
econ <- dat %>%
group_by(EVTYPE) %>%
summarise(
PROP = sum(PROP_DMG_USD, na.rm = TRUE),
CROP = sum(CROP_DMG_USD, na.rm = TRUE),
ECON = PROP + CROP,
.groups = "drop"
) %>%
arrange(desc(ECON)) %>%
slice_head(n = 10)
econ_long <- econ %>%
pivot_longer(c(PROP, CROP), names_to = "type", values_to = "usd")
ggplot(econ_long, aes(x = reorder(EVTYPE, ECON), y = usd/1e9, fill = type)) +
geom_col() +
coord_flip() +
scale_fill_manual(values = c("PROP" = "#636363", "CROP" = "#31a354"), labels = c("Property", "Crop")) +
labs(x = "Event type", y = "Damage (Billions USD)", fill = "", title = "Economic consequences: Top 10 event types") +
theme_minimal(base_size = 12)
Top 10 event types by economic damage (property + crop), billions USD (1950–2011).
sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 27938)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: Asia/Dhaka
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tidyr_1.3.1 stringr_1.5.1 readr_2.1.5 ggplot2_3.5.2 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 jsonlite_2.0.0 compiler_4.5.1 tidyselect_1.2.1
## [5] jquerylib_0.1.4 scales_1.4.0 yaml_2.3.10 fastmap_1.2.0
## [9] R6_2.6.1 generics_0.1.4 knitr_1.50 tibble_3.3.0
## [13] bslib_0.9.0 pillar_1.11.0 RColorBrewer_1.1-3 tzdb_0.5.0
## [17] rlang_1.1.6 cachem_1.1.0 stringi_1.8.7 xfun_0.52
## [21] sass_0.4.10 cli_3.6.5 withr_3.0.2 magrittr_2.0.3
## [25] digest_0.6.37 grid_4.5.1 rstudioapi_0.17.1 hms_1.1.3
## [29] lifecycle_1.0.4 vctrs_0.6.5 evaluate_1.0.4 glue_1.8.0
## [33] farver_2.1.2 purrr_1.1.0 rmarkdown_2.29 tools_4.5.1
## [37] pkgconfig_2.0.3 htmltools_0.5.8.1