Synopsis

We analyzed the NOAA Storm Events Database (1950–2011) to identify which types of weather events caused the greatest harm to population health and the highest economic losses in the United States. After cleaning and standardizing event labels, we converted property and crop damage figures into USD using exponent codes. Tornadoes emerged as the most harmful to human health, responsible for the highest combined fatalities and injuries. In contrast, floods, hurricanes, and storm surges caused the most economic damage, with property losses far outweighing crop losses. These findings highlight the need for targeted disaster preparedness and mitigation strategies based on event-specific risk profiles.

Data processing

# Download raw compressed CSV if missing
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
bz <- "StormData.csv.bz2"
if (!file.exists(bz)) download.file(url, destfile = bz, mode = "wb", quiet = TRUE)

# Read directly from compressed file
dat <- read.csv(bz, stringsAsFactors = FALSE)

# Keep only columns we need
dat <- dat %>%
  select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

# Quick structure and basic NA check for reproducibility log
list(
  n_rows = nrow(dat),
  n_cols = ncol(dat),
  na_counts = sapply(dat, function(x) sum(is.na(x)))
)
## $n_rows
## [1] 902297
## 
## $n_cols
## [1] 7
## 
## $na_counts
##     EVTYPE FATALITIES   INJURIES    PROPDMG PROPDMGEXP    CROPDMG CROPDMGEXP 
##          0          0          0          0          0          0          0
# Normalize event type (trim and upper-case)
dat <- dat %>%
  mutate(EVTYPE = str_to_upper(str_trim(EVTYPE)))

# Map exponent codes to numeric multipliers
exp_map <- function(x) {
  x <- str_to_upper(str_trim(as.character(x)))
  dplyr::case_when(
    x == "K" ~ 1e3,
    x == "M" ~ 1e6,
    x == "B" ~ 1e9,
    x %in% c("", NA) ~ 1,
    str_detect(x, "^[0-9]$") ~ 10^as.numeric(x), # rare numeric codes
    TRUE ~ 1 # treat unknown conservatively as 1
  )
}

dat <- dat %>%
  mutate(
    PROP_MULT = exp_map(PROPDMGEXP),
    CROP_MULT = exp_map(CROPDMGEXP),
    PROP_DMG_USD = PROPDMG * PROP_MULT,
    CROP_DMG_USD = CROPDMG * CROP_MULT,
    ECON_DMG_USD = PROP_DMG_USD + CROP_DMG_USD
  )

# Sanity checks
stopifnot(all(dat$PROP_MULT >= 1, na.rm = TRUE))
stopifnot(all(dat$CROP_MULT >= 1, na.rm = TRUE))

summary(select(dat, FATALITIES, INJURIES, PROP_DMG_USD, CROP_DMG_USD))
##    FATALITIES           INJURIES          PROP_DMG_USD      
##  Min.   :  0.00000   Min.   :   0.0000   Min.   :0.000e+00  
##  1st Qu.:  0.00000   1st Qu.:   0.0000   1st Qu.:0.000e+00  
##  Median :  0.00000   Median :   0.0000   Median :0.000e+00  
##  Mean   :  0.01678   Mean   :   0.1557   Mean   :4.746e+05  
##  3rd Qu.:  0.00000   3rd Qu.:   0.0000   3rd Qu.:5.000e+02  
##  Max.   :583.00000   Max.   :1700.0000   Max.   :1.150e+11  
##   CROP_DMG_USD      
##  Min.   :0.000e+00  
##  1st Qu.:0.000e+00  
##  Median :0.000e+00  
##  Mean   :5.442e+04  
##  3rd Qu.:0.000e+00  
##  Max.   :5.000e+09
health <- dat %>%
  group_by(EVTYPE) %>%
  summarise(
    FATALITIES = sum(FATALITIES, na.rm = TRUE),
    INJURIES   = sum(INJURIES,   na.rm = TRUE),
    TOTAL_HARM = FATALITIES + INJURIES,
    .groups = "drop"
  ) %>%
  arrange(desc(TOTAL_HARM)) %>%
  slice_head(n = 10)

health_long <- health %>%
  pivot_longer(c(FATALITIES, INJURIES), names_to = "type", values_to = "count")

ggplot(health_long, aes(x = reorder(EVTYPE, TOTAL_HARM), y = count, fill = type)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values = c("FATALITIES" = "#cb181d", "INJURIES" = "#3182bd")) +
  labs(x = "Event type", y = "People affected", fill = "", title = "Population health impact: Top 10 event types") +
  theme_minimal(base_size = 12)
Top 10 event types by total injuries and fatalities (1950–2011).

Top 10 event types by total injuries and fatalities (1950–2011).

econ <- dat %>%
  group_by(EVTYPE) %>%
  summarise(
    PROP = sum(PROP_DMG_USD, na.rm = TRUE),
    CROP = sum(CROP_DMG_USD, na.rm = TRUE),
    ECON = PROP + CROP,
    .groups = "drop"
  ) %>%
  arrange(desc(ECON)) %>%
  slice_head(n = 10)

econ_long <- econ %>%
  pivot_longer(c(PROP, CROP), names_to = "type", values_to = "usd")

ggplot(econ_long, aes(x = reorder(EVTYPE, ECON), y = usd/1e9, fill = type)) +
  geom_col() +
  coord_flip() +
  scale_fill_manual(values = c("PROP" = "#636363", "CROP" = "#31a354"), labels = c("Property", "Crop")) +
  labs(x = "Event type", y = "Damage (Billions USD)", fill = "", title = "Economic consequences: Top 10 event types") +
  theme_minimal(base_size = 12)
Top 10 event types by economic damage (property + crop), billions USD (1950–2011).

Top 10 event types by economic damage (property + crop), billions USD (1950–2011).

sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 27938)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: Asia/Dhaka
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] tidyr_1.3.1   stringr_1.5.1 readr_2.1.5   ggplot2_3.5.2 dplyr_1.1.4  
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6       jsonlite_2.0.0     compiler_4.5.1     tidyselect_1.2.1  
##  [5] jquerylib_0.1.4    scales_1.4.0       yaml_2.3.10        fastmap_1.2.0     
##  [9] R6_2.6.1           generics_0.1.4     knitr_1.50         tibble_3.3.0      
## [13] bslib_0.9.0        pillar_1.11.0      RColorBrewer_1.1-3 tzdb_0.5.0        
## [17] rlang_1.1.6        cachem_1.1.0       stringi_1.8.7      xfun_0.52         
## [21] sass_0.4.10        cli_3.6.5          withr_3.0.2        magrittr_2.0.3    
## [25] digest_0.6.37      grid_4.5.1         rstudioapi_0.17.1  hms_1.1.3         
## [29] lifecycle_1.0.4    vctrs_0.6.5        evaluate_1.0.4     glue_1.8.0        
## [33] farver_2.1.2       purrr_1.1.0        rmarkdown_2.29     tools_4.5.1       
## [37] pkgconfig_2.0.3    htmltools_0.5.8.1