Synopsys: This documen analyzes the U.S. NOAA Storm Database, covering the period from 1950 to November 2011, to identify the most impactful severe weather events in the United States. The analysis addresses two primary concerns for municipal managers: public health damage and overall economic consequences . The raw data, loaded directly from the compressed .csv.bz2 file, were processed to standardize economic damage figures using their respective alphanumeric multipliers (K, M, B). The results reveal that tornadoes are by far the most damaging events to public health. Conversely, floods and hurricanes/typhoons are responsible for the greatest overall economic losses. This report provides a quantitative basis for prioritizing disaster preparedness resources.

Load Libraries and Data

library(dplyr)     
library(stringr)  
library(ggplot2)  
library(knitr)     
library(scales)    


sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_Canada.utf8  LC_CTYPE=English_Canada.utf8   
## [3] LC_MONETARY=English_Canada.utf8 LC_NUMERIC=C                   
## [5] LC_TIME=English_Canada.utf8    
## 
## time zone: America/Guatemala
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] scales_1.4.0  knitr_1.50    ggplot2_4.0.0 stringr_1.5.2 dplyr_1.1.4  
## 
## loaded via a namespace (and not attached):
##  [1] vctrs_0.6.5        cli_3.6.5          rlang_1.1.6        xfun_0.54         
##  [5] stringi_1.8.7      generics_0.1.4     S7_0.2.0           jsonlite_2.0.0    
##  [9] glue_1.8.0         htmltools_0.5.8.1  sass_0.4.10        rmarkdown_2.30    
## [13] grid_4.5.1         evaluate_1.0.5     jquerylib_0.1.4    tibble_3.3.0      
## [17] fastmap_1.2.0      yaml_2.3.10        lifecycle_1.0.4    compiler_4.5.1    
## [21] RColorBrewer_1.1-3 pkgconfig_2.0.3    rstudioapi_0.17.1  farver_2.1.2      
## [25] digest_0.6.37      R6_2.6.1           tidyselect_1.2.1   pillar_1.11.1     
## [29] magrittr_2.0.4     bslib_0.9.0        withr_3.0.2        gtable_0.3.6      
## [33] tools_4.5.1        cachem_1.1.0
data_file <- "repdata_data_StormData.csv.bz2"


storm <- read.csv(data_file, stringsAsFactors = FALSE)

dim(storm)   # colmms and rows 
## [1] 902297     37
str(storm[, c("BGN_DATE", "STATE", "EVTYPE", "FATALITIES", "INJURIES",
              "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")])
## 'data.frame':    902297 obs. of  9 variables:
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
exp_to_num <- function(exp) {
  exp <- toupper(trimws(exp))  # limpiar y convertir a mayúsculas
  mult <- ifelse(exp == "H", 1e2,     # H = hundred
          ifelse(exp == "K", 1e3,     # K = thousand
          ifelse(exp == "M", 1e6,     # M = million
          ifelse(exp == "B", 1e9,     # B = billion
          NA_real_))))               # otros casos → NA
  is_digit <- grepl("^[0-9]$", exp)
  mult[is_digit] <- 10 ^ as.numeric(exp[is_digit])
  mult[is.na(mult)] <- 1
  return(mult)
}

# chang the columms in prper way
storm <- storm %>%
  mutate(
    PROP_MULT = exp_to_num(PROPDMGEXP),
    CROP_MULT = exp_to_num(CROPDMGEXP),
    PROPDMG_USD = PROPDMG * PROP_MULT,
    CROPDMG_USD = CROPDMG * CROP_MULT,
    TOTALDMG_USD = PROPDMG_USD + CROPDMG_USD
  )

summary(storm$TOTALDMG_USD)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.00e+00 0.00e+00 0.00e+00 5.29e+05 1.00e+03 1.15e+11
#Summary helth impact


health_by_event <- storm %>%
  dplyr::group_by(EVTYPE) %>%
  dplyr::summarise(
    Fatalities   = sum(FATALITIES, na.rm = TRUE),
    Injuries     = sum(INJURIES,   na.rm = TRUE),
    Health_Total = Fatalities + Injuries
  ) %>%
  dplyr::arrange(dplyr::desc(Health_Total))

# Top 10 
top_health <- head(health_by_event, 10)
knitr::kable(
  top_health,
  caption = "Top 10 event types by total population health impact (fatalities + injuries)."
)
Top 10 event types by total population health impact (fatalities + injuries).
EVTYPE Fatalities Injuries Health_Total
TORNADO 5633 91346 96979
EXCESSIVE HEAT 1903 6525 8428
TSTM WIND 504 6957 7461
FLOOD 470 6789 7259
LIGHTNING 816 5230 6046
HEAT 937 2100 3037
FLASH FLOOD 978 1777 2755
ICE STORM 89 1975 2064
THUNDERSTORM WIND 133 1488 1621
WINTER STORM 206 1321 1527
#  Fig 1
ggplot2::ggplot(top_health, ggplot2::aes(x = reorder(EVTYPE, Health_Total), y = Health_Total)) +
  ggplot2::geom_col() +
  ggplot2::coord_flip() +
  ggplot2::labs(
    title = "Population Health Impact by Event Type (Top 10)",
    x = "Event Type (EVTYPE)",
    y = "Fatalities + Injuries"
  ) +
  ggplot2::theme_minimal(base_size = 12)

if ("package:plyr" %in% search()) detach("package:plyr", unload = TRUE)


if (!exists("exp_to_num")) {
  exp_to_num <- function(exp) {
    exp <- toupper(trimws(exp))
    mult <- ifelse(exp == "H", 1e2,
            ifelse(exp == "K", 1e3,
            ifelse(exp == "M", 1e6,
            ifelse(exp == "B", 1e9, NA_real_))))
    is_digit <- grepl("^[0-9]$", exp)
    mult[is_digit] <- 10 ^ as.numeric(exp[is_digit])
    mult[is.na(mult)] <- 1
    mult
  }
}


storm_econ <- storm %>%
  dplyr::mutate(
    PROP_MULT = exp_to_num(PROPDMGEXP),
    CROP_MULT = exp_to_num(CROPDMGEXP),
    PROPDMG_USD = as.numeric(PROPDMG) * PROP_MULT,
    CROPDMG_USD = as.numeric(CROPDMG) * CROP_MULT
  )


econ_by_event <- storm_econ %>%
  dplyr::group_by(EVTYPE) %>%
  dplyr::summarise(
    Property_USD = sum(as.numeric(PROPDMG_USD), na.rm = TRUE),
    Crop_USD     = sum(as.numeric(CROPDMG_USD),  na.rm = TRUE),
    .groups = "drop"
  ) %>%
  dplyr::mutate(
    Property_USD = as.numeric(Property_USD),
    Crop_USD     = as.numeric(Crop_USD),
    Econ_Total   = Property_USD + Crop_USD
  ) %>%
  dplyr::arrange(dplyr::desc(Econ_Total)) %>%
  dplyr::ungroup()

print(colnames(econ_by_event))
## [1] "EVTYPE"       "Property_USD" "Crop_USD"     "Econ_Total"
print(summary(econ_by_event$Econ_Total))
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.000e+00 0.000e+00 0.000e+00 4.846e+08 8.500e+04 1.503e+11
top_econ <- head(econ_by_event, 10)

knitr::kable(
  top_econ %>%
    dplyr::mutate(
      Property_USD = scales::label_dollar()(Property_USD),
      Crop_USD     = scales::label_dollar()(Crop_USD),
      Econ_Total   = scales::label_dollar()(Econ_Total)
    ),
  caption = "Top 10 event types by total economic impact (USD)."
)
Top 10 event types by total economic impact (USD).
EVTYPE Property_USD Crop_USD Econ_Total
FLOOD $144,657,709,807 $5,661,968,450 $150,319,678,257
HURRICANE/TYPHOON $69,305,840,000 $2,607,872,800 $71,913,712,800
TORNADO $56,947,380,676 $414,953,270 $57,362,333,946
STORM SURGE $43,323,536,000 $5,000 $43,323,541,000
HAIL $15,735,267,513 $3,025,954,473 $18,761,221,986
FLASH FLOOD $16,822,673,978 $1,421,317,100 $18,243,991,078
DROUGHT $1,046,106,000 $13,972,566,000 $15,018,672,000
HURRICANE $11,868,319,010 $2,741,910,000 $14,610,229,010
RIVER FLOOD $5,118,945,500 $5,029,459,000 $10,148,404,500
ICE STORM $3,944,927,860 $5,022,113,500 $8,967,041,360
ggplot2::ggplot(top_econ, ggplot2::aes(x = reorder(EVTYPE, Econ_Total), y = Econ_Total)) +
  ggplot2::geom_col() +
  ggplot2::coord_flip() +
  ggplot2::scale_y_continuous(labels = scales::label_dollar()) +
  ggplot2::labs(
    title = "Economic Impact by Event Type (Top 10)",
    x = "Event Type (EVTYPE)",
    y = "USD (Property + Crop Damages)"
  ) +
  ggplot2::theme_minimal(base_size = 12)

if (!exists("health_by_event")) {
  health_by_event <- storm %>%
    dplyr::group_by(EVTYPE) %>%
    dplyr::summarise(
      Fatalities   = sum(FATALITIES, na.rm = TRUE),
      Injuries     = sum(INJURIES,   na.rm = TRUE),
      Health_Total = Fatalities + Injuries
    ) %>%
    dplyr::arrange(dplyr::desc(Health_Total))
}
if (!exists("econ_by_event")) {
  # construcción económica segura
  exp_to_num <- function(exp) {
    exp <- toupper(trimws(exp))
    mult <- ifelse(exp == "H", 1e2,
            ifelse(exp == "K", 1e3,
            ifelse(exp == "M", 1e6,
            ifelse(exp == "B", 1e9, NA_real_))))
    is_digit <- grepl("^[0-9]$", exp)
    mult[is_digit] <- 10 ^ as.numeric(exp[is_digit])
    mult[is.na(mult)] <- 1
    mult
  }
  storm_econ <- storm %>%
    dplyr::mutate(
      PROP_MULT    = exp_to_num(PROPDMGEXP),
      CROP_MULT    = exp_to_num(CROPDMGEXP),
      PROPDMG_USD  = as.numeric(PROPDMG) * PROP_MULT,
      CROPDMG_USD  = as.numeric(CROPDMG) * CROP_MULT
    )
  econ_by_event <- storm_econ %>%
    dplyr::group_by(EVTYPE) %>%
    dplyr::summarise(
      Property_USD = sum(PROPDMG_USD, na.rm = TRUE),
      Crop_USD     = sum(CROPDMG_USD,  na.rm = TRUE),
      .groups      = "drop"
    ) %>%
    dplyr::mutate(Econ_Total = as.numeric(Property_USD) + as.numeric(Crop_USD)) %>%
    dplyr::arrange(dplyr::desc(Econ_Total))
}

# Top 3 salud y economía
top3_health <- head(health_by_event$EVTYPE, 3)
top3_econ   <- head(econ_by_event$EVTYPE, 3)

cat("### Conclusions\n\n")
## ### Conclusions
cat("- Most harmful to population health  ",
    paste(top3_health, collapse = ", "), ".\n", sep = "")
## - Most harmful to population health  TORNADO, EXCESSIVE HEAT, TSTM WIND.
cat("- Greatest economic consequences  ",
    paste(top3_econ, collapse = ", "), ".\n", sep = "")
## - Greatest economic consequences  FLOOD, HURRICANE/TYPHOON, TORNADO.
cat("- Results are derived from the original NOAA Storm Database (1950–2011) .\n")
## - Results are derived from the original NOAA Storm Database (1950–2011) .