Synopsis

Using the U.S. National Oceanic and Atmospheric Administration (NOAA) Storm Database (1950–Nov 2011), this analysis identifies which event types are most harmful to population health and which have the greatest economic impact. After cleaning event labels and converting damage estimates using the provided exponent fields, tornadoes account for the most fatalities and injuries, while floods and hurricanes/typhoons dominate total economic losses. Results are reproducible from the original CSV. Plots summarize the top event types and magnitudes.

Data Processing

knitr::opts_chunk$set(echo = TRUE, cache = TRUE)

req_pkgs <- c("dplyr", "ggplot2", "readr", "stringr", "forcats", "tidyr", "scales", "lubridate")
new_pkgs <- req_pkgs[!(req_pkgs %in% installed.packages()[,"Package"])]
if(length(new_pkgs)) install.packages(new_pkgs, repos = "https://cloud.r-project.org")

library(dplyr)
library(ggplot2)
library(readr)
library(stringr)
library(forcats)
library(tidyr)
library(scales)
library(lubridate)
# Download and Load Data
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
zip_path <- "StormData.csv.bz2"
if(!file.exists(zip_path)) download.file(url, destfile = zip_path, mode = "wb")

storm <- read_csv(zip_path, show_col_types = FALSE)
head(storm)
## # A tibble: 6 × 37
##   STATE__ BGN_DATE   BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE BGN_RANGE
##     <dbl> <chr>      <chr>    <chr>      <dbl> <chr>      <chr> <chr>      <dbl>
## 1       1 4/18/1950… 0130     CST           97 MOBILE     AL    TORNA…         0
## 2       1 4/18/1950… 0145     CST            3 BALDWIN    AL    TORNA…         0
## 3       1 2/20/1951… 1600     CST           57 FAYETTE    AL    TORNA…         0
## 4       1 6/8/1951 … 0900     CST           89 MADISON    AL    TORNA…         0
## 5       1 11/15/195… 1500     CST           43 CULLMAN    AL    TORNA…         0
## 6       1 11/15/195… 2000     CST           77 LAUDERDALE AL    TORNA…         0
## # ℹ 28 more variables: BGN_AZI <chr>, BGN_LOCATI <chr>, END_DATE <chr>,
## #   END_TIME <chr>, COUNTY_END <dbl>, COUNTYENDN <lgl>, END_RANGE <dbl>,
## #   END_AZI <chr>, END_LOCATI <chr>, LENGTH <dbl>, WIDTH <dbl>, F <dbl>,
## #   MAG <dbl>, FATALITIES <dbl>, INJURIES <dbl>, PROPDMG <dbl>,
## #   PROPDMGEXP <chr>, CROPDMG <dbl>, CROPDMGEXP <chr>, WFO <chr>,
## #   STATEOFFIC <chr>, ZONENAMES <chr>, LATITUDE <dbl>, LONGITUDE <dbl>,
## #   LATITUDE_E <dbl>, LONGITUDE_ <dbl>, REMARKS <chr>, REFNUM <dbl>
# Clean Data
exp_to_mult <- function(x){
  x <- toupper(trimws(as.character(x)))
  case_when(
    x %in% c("K") ~ 1e3,
    x %in% c("M") ~ 1e6,
    x %in% c("B") ~ 1e9,
    x %in% as.character(0:8) ~ 10^as.numeric(x),
    TRUE ~ 1
  )
}

storm_clean <- storm %>%
  mutate(
    EVTYPE = str_squish(toupper(EVTYPE)),
    EVTYPE = str_replace_all(EVTYPE, "TSTM", "THUNDERSTORM"),
    EVTYPE = str_replace_all(EVTYPE, "\\n", " "),
    EVTYPE = case_when(
      str_detect(EVTYPE, "HURRICANE|TYPHOON") ~ "HURRICANE/TYPHOON",
      str_detect(EVTYPE, "TORNADO") ~ "TORNADO",
      str_detect(EVTYPE, "THUNDERSTORM|THUNDERSTORM WIND|TSTM WIND") ~ "THUNDERSTORM WIND",
      str_detect(EVTYPE, "FLASH FLOOD") ~ "FLASH FLOOD",
      str_detect(EVTYPE, "FLOOD") ~ "FLOOD",
      str_detect(EVTYPE, "HEAT|WARM") ~ "EXCESSIVE HEAT",
      str_detect(EVTYPE, "COLD|CHILL|HYPOTHERMIA|LOW TEMP") ~ "EXTREME COLD/WIND CHILL",
      str_detect(EVTYPE, "WINTER|SNOW|BLIZZARD|ICE STORM") ~ "WINTER WEATHER",
      str_detect(EVTYPE, "HAIL") ~ "HAIL",
      str_detect(EVTYPE, "DROUGHT") ~ "DROUGHT",
      str_detect(EVTYPE, "FIRE") ~ "WILDFIRE",
      str_detect(EVTYPE, "LIGHTNING") ~ "LIGHTNING",
      str_detect(EVTYPE, "SURF|RIP CURRENT|SEAS|SWELL|WAVE") ~ "MARINE/COASTAL HAZARDS",
      TRUE ~ EVTYPE
    ),
    PROP_MULT = exp_to_mult(PROPDMGEXP),
    CROP_MULT = exp_to_mult(CROPDMGEXP),
    PROP_DMG_USD = PROPDMG * PROP_MULT,
    CROP_DMG_USD = CROPDMG * CROP_MULT,
    ECON_DMG_USD = PROP_DMG_USD + CROP_DMG_USD,
    HEALTH_HARM = FATALITIES + INJURIES,
    YEAR = as.integer(lubridate::year(lubridate::mdy_hms(BGN_DATE)))
  )

head(storm_clean)
## # A tibble: 6 × 44
##   STATE__ BGN_DATE   BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE BGN_RANGE
##     <dbl> <chr>      <chr>    <chr>      <dbl> <chr>      <chr> <chr>      <dbl>
## 1       1 4/18/1950… 0130     CST           97 MOBILE     AL    TORNA…         0
## 2       1 4/18/1950… 0145     CST            3 BALDWIN    AL    TORNA…         0
## 3       1 2/20/1951… 1600     CST           57 FAYETTE    AL    TORNA…         0
## 4       1 6/8/1951 … 0900     CST           89 MADISON    AL    TORNA…         0
## 5       1 11/15/195… 1500     CST           43 CULLMAN    AL    TORNA…         0
## 6       1 11/15/195… 2000     CST           77 LAUDERDALE AL    TORNA…         0
## # ℹ 35 more variables: BGN_AZI <chr>, BGN_LOCATI <chr>, END_DATE <chr>,
## #   END_TIME <chr>, COUNTY_END <dbl>, COUNTYENDN <lgl>, END_RANGE <dbl>,
## #   END_AZI <chr>, END_LOCATI <chr>, LENGTH <dbl>, WIDTH <dbl>, F <dbl>,
## #   MAG <dbl>, FATALITIES <dbl>, INJURIES <dbl>, PROPDMG <dbl>,
## #   PROPDMGEXP <chr>, CROPDMG <dbl>, CROPDMGEXP <chr>, WFO <chr>,
## #   STATEOFFIC <chr>, ZONENAMES <chr>, LATITUDE <dbl>, LONGITUDE <dbl>,
## #   LATITUDE_E <dbl>, LONGITUDE_ <dbl>, REMARKS <chr>, REFNUM <dbl>, …
# Most harmful to population health
health_top <- storm_clean %>%
  group_by(EVTYPE) %>%
  summarise(Fatalities = sum(FATALITIES, na.rm=TRUE),
            Injuries   = sum(INJURIES,   na.rm=TRUE)) %>%
  mutate(Total = Fatalities + Injuries) %>%
  arrange(desc(Total)) %>%
  slice_head(n = 10)

health_top
## # A tibble: 10 × 4
##    EVTYPE                 Fatalities Injuries Total
##    <chr>                       <dbl>    <dbl> <dbl>
##  1 TORNADO                      5661    91407 97068
##  2 EXCESSIVE HEAT               3178     9243 12421
##  3 THUNDERSTORM WIND             729     9544 10273
##  4 FLOOD                         490     6802  7292
##  5 WINTER WEATHER                622     5852  6474
##  6 LIGHTNING                     817     5231  6048
##  7 FLASH FLOOD                  1035     1802  2837
##  8 WILDFIRE                       90     1608  1698
##  9 MARINE/COASTAL HAZARDS        763      810  1573
## 10 HURRICANE/TYPHOON             135     1333  1468
ggplot(health_top, aes(x=reorder(EVTYPE, Total), y=Total)) +
  geom_col(fill="firebrick") +
  coord_flip() +
  labs(title="Top 10 Event Types by Health Impact",
       subtitle="Fatalities + Injuries (1950–2011)",
       x="Event Type", y="Total Health Harm")

 # Figure 1: Tornadoes cause the highest combined fatalities and injuries, followed by excessive heat and thunderstorms.

# Greatest economic consequences
econ_top <- storm_clean %>%
  group_by(EVTYPE) %>%
  summarise(Property = sum(PROP_DMG_USD, na.rm=TRUE),
            Crop     = sum(CROP_DMG_USD, na.rm=TRUE)) %>%
  mutate(Total = Property + Crop) %>%
  arrange(desc(Total)) %>%
  slice_head(n = 10)

econ_top
## # A tibble: 10 × 4
##    EVTYPE                 Property        Crop         Total
##    <chr>                     <dbl>       <dbl>         <dbl>
##  1 FLOOD             150623398739  10847881950 161471280689 
##  2 HURRICANE/TYPHOON  85356410010   5516117800  90872527810 
##  3 TORNADO            58603317926.   417461520  59020779446.
##  4 STORM SURGE        43323536000         5000  43323541000 
##  5 FLASH FLOOD        17588292096.  1532197150  19120489246.
##  6 HAIL               15977564018.  3046887623  19024451641.
##  7 WINTER WEATHER     12407006811   5316280600  17723287411 
##  8 DROUGHT             1046106000  13972566000  15018672000 
##  9 THUNDERSTORM WIND  11184751522.  1271708988  12456460510.
## 10 WILDFIRE            8501628500    403281630   8904910130
ggplot(econ_top, aes(x=reorder(EVTYPE, Total), y=Total/1e9)) +
  geom_col(fill="steelblue") +
  coord_flip() +
  labs(title="Top 10 Event Types by Economic Damage",
       subtitle="Property + Crop Damage (1950–2011)",
       x="Event Type", y="Total Damage (Billion USD)")

# Figure 2: Floods cause the largest economic losses, followed by hurricanes/typhoons and tornadoes.
## Discussion
# The analysis shows that tornadoes are responsible for the highest combined fatalities and injuries in the U.S. from 1950 to 2011.
# Excessive heat and thunderstorms also have a significant impact on population health.
# Economically, floods cause the largest property and crop damages, followed by hurricanes/typhoons and tornadoes.
# This suggests that while tornadoes are deadly, floods and tropical storms carry the greatest financial burden.
# These insights can guide policymakers in prioritizing disaster preparedness and resource allocation to reduce both human and economic losses.
 ## Conclusion

# The analysis shows that tornadoes are the most harmful to U.S. population health, causing the highest combined fatalities and injuries.
# Floods and hurricanes/typhoons lead to the greatest economic losses.
# This can help guide disaster preparedness and resource allocation.



# Reproducibility
sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26100)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: Asia/Qatar
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] lubridate_1.9.4 scales_1.4.0    tidyr_1.3.1     forcats_1.0.0  
## [5] stringr_1.5.1   readr_2.1.5     ggplot2_3.5.2   dplyr_1.1.4    
## 
## loaded via a namespace (and not attached):
##  [1] bit_4.6.0          gtable_0.3.6       jsonlite_2.0.0     crayon_1.5.3      
##  [5] compiler_4.5.1     tidyselect_1.2.1   parallel_4.5.1     jquerylib_0.1.4   
##  [9] yaml_2.3.10        fastmap_1.2.0      R6_2.6.1           labeling_0.4.3    
## [13] generics_0.1.4     knitr_1.50         tibble_3.3.0       bslib_0.9.0       
## [17] pillar_1.11.0      RColorBrewer_1.1-3 tzdb_0.5.0         rlang_1.1.6       
## [21] utf8_1.2.6         cachem_1.1.0       stringi_1.8.7      xfun_0.52         
## [25] sass_0.4.10        bit64_4.6.0-1      timechange_0.3.0   cli_3.6.5         
## [29] withr_3.0.2        magrittr_2.0.3     digest_0.6.37      grid_4.5.1        
## [33] vroom_1.6.5        rstudioapi_0.17.1  hms_1.1.3          lifecycle_1.0.4   
## [37] vctrs_0.6.5        evaluate_1.0.4     glue_1.8.0         farver_2.1.2      
## [41] purrr_1.1.0        rmarkdown_2.29     tools_4.5.1        pkgconfig_2.0.3   
## [45] htmltools_0.5.8.1