Synopsis

This analysis uses the NOAA Storm Events database (1950 through November 2011) to identify which event types cause the greatest harm to population health (fatalities + injuries) and which cause the largest economic damage (property + crop losses). The R Markdown document downloads the raw compressed CSV, performs reproducible processing (including interpreting damage exponent codes), summarizes the data by event type, and presents bar charts of the top contributors for both public-health and economic impact. Code is shown for every step so the analysis can be re-run and published to RPubs.

Data Processing

This section shows the R code used to download, read, clean, and prepare the NOAA storm data for analysis. All code chunks are shown (echo = TRUE).

# Load packages used in the analysis
if (!requireNamespace("dplyr", quietly = TRUE)) install.packages("dplyr")
if (!requireNamespace("readr", quietly = TRUE)) install.packages("readr")
if (!requireNamespace("ggplot2", quietly = TRUE)) install.packages("ggplot2")
if (!requireNamespace("stringr", quietly = TRUE)) install.packages("stringr")

library(dplyr)
library(readr)
library(ggplot2)
library(stringr)
# URL for the course dataset (compressed CSV)
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"

# Download file to a temporary file and read directly (no manual preprocessing)
tempfile_path <- tempfile(fileext = ".bz2")
downloader <- tryCatch({
  download.file(url, destfile = tempfile_path, mode = "wb")
  TRUE
}, error = function(e) {
  message("Download failed: ", e$message)
  FALSE
})

# Read the CSV (readr::read_csv is tolerant and fast)
storm_raw <- read_csv(tempfile_path, guess_max = 100000)

# Inspect the structure
glimpse(storm_raw)
## Rows: 902,297
## Columns: 37
## $ STATE__    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ BGN_DATE   <chr> "4/18/1950 0:00:00", "4/18/1950 0:00:00", "2/20/1951 0:00:0…
## $ BGN_TIME   <chr> "0130", "0145", "1600", "0900", "1500", "2000", "0100", "09…
## $ TIME_ZONE  <chr> "CST", "CST", "CST", "CST", "CST", "CST", "CST", "CST", "CS…
## $ COUNTY     <dbl> 97, 3, 57, 89, 43, 77, 9, 123, 125, 57, 43, 9, 73, 49, 107,…
## $ COUNTYNAME <chr> "MOBILE", "BALDWIN", "FAYETTE", "MADISON", "CULLMAN", "LAUD…
## $ STATE      <chr> "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL", "AL",…
## $ EVTYPE     <chr> "TORNADO", "TORNADO", "TORNADO", "TORNADO", "TORNADO", "TOR…
## $ BGN_RANGE  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ BGN_AZI    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ BGN_LOCATI <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ END_DATE   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ END_TIME   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ COUNTY_END <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ COUNTYENDN <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ END_RANGE  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ END_AZI    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ END_LOCATI <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ LENGTH     <dbl> 14.0, 2.0, 0.1, 0.0, 0.0, 1.5, 1.5, 0.0, 3.3, 2.3, 1.3, 4.7…
## $ WIDTH      <dbl> 100, 150, 123, 100, 150, 177, 33, 33, 100, 100, 400, 400, 2…
## $ F          <dbl> 3, 2, 2, 2, 2, 2, 2, 1, 3, 3, 1, 1, 3, 3, 3, 4, 1, 1, 1, 1,…
## $ MAG        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ FATALITIES <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0,…
## $ INJURIES   <dbl> 15, 0, 2, 2, 2, 6, 1, 0, 14, 0, 3, 3, 26, 12, 6, 50, 2, 0, …
## $ PROPDMG    <dbl> 25.0, 2.5, 25.0, 2.5, 2.5, 2.5, 2.5, 2.5, 25.0, 25.0, 2.5, …
## $ PROPDMGEXP <chr> "K", "K", "K", "K", "K", "K", "K", "K", "K", "K", "M", "M",…
## $ CROPDMG    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ CROPDMGEXP <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ WFO        <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ STATEOFFIC <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ ZONENAMES  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ LATITUDE   <dbl> 3040, 3042, 3340, 3458, 3412, 3450, 3405, 3255, 3334, 3336,…
## $ LONGITUDE  <dbl> 8812, 8755, 8742, 8626, 8642, 8748, 8631, 8558, 8740, 8738,…
## $ LATITUDE_E <dbl> 3051, 0, 0, 0, 0, 0, 0, 0, 3336, 3337, 3402, 3404, 0, 3432,…
## $ LONGITUDE_ <dbl> 8806, 0, 0, 0, 0, 0, 0, 0, 8738, 8737, 8644, 8640, 0, 8540,…
## $ REMARKS    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
## $ REFNUM     <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, …
# Keep only columns we need for this analysis
storm <- storm_raw %>%
  select(BGN_DATE, STATE, EVTYPE,
         FATALITIES, INJURIES,
         PROPDMG, PROPDMGEXP,
         CROPDMG, CROPDMGEXP)


# Clean EVTYPE: uppercase trim and collapse whitespace
storm <- storm %>%
  mutate(EVTYPE = str_to_upper(str_trim(EVTYPE)))

# Function to convert damage exponent to numeric multiplier
exp_to_mult <- function(exp) {
  # make NA and blanks zero multiplier
  exp <- toupper(as.character(exp))
  exp[is.na(exp)] <- ""
  exp <- str_trim(exp)
  mult <- numeric(length(exp))
  mult[exp %in% c("", "0")] <- 1
  mult[exp %in% c("K")] <- 1e3
  mult[exp %in% c("M")] <- 1e6
  mult[exp %in% c("B")] <- 1e9
  # There are other codes used; handle common ones
  mult[exp %in% c("H")] <- 100
  # Numeric exponents (e.g., "2" => 10^2) occasionally appear
  is_num <- grepl("^[0-9]+$", exp)
  mult[is_num] <- 10 ^ as.numeric(exp[is_num])
  # Anything else: try to interpret common synonyms (e.g., 'K' spelled full)
  # default to 1 to avoid dropping data but flag in a column if unusual
  mult[mult == 0] <- 1
  return(mult)
}

# Apply multipliers and compute total economic damage
storm <- storm %>%
  mutate(PROP_MULT = exp_to_mult(PROPDMGEXP),
         CROP_MULT = exp_to_mult(CROPDMGEXP),
         PROP_DMG_USD = PROPDMG * PROP_MULT,
         CROP_DMG_USD = CROPDMG * CROP_MULT,
         TOTAL_ECON_DMG = PROP_DMG_USD + CROP_DMG_USD,
         TOTAL_HEALTH_IMPACT = FATALITIES + INJURIES)

# Quick sanity checks
summary(storm$PROP_DMG_USD)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.000e+00 0.000e+00 0.000e+00 4.746e+05 5.000e+02 1.150e+11
summary(storm$CROP_DMG_USD)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.000e+00 0.000e+00 0.000e+00 5.442e+04 0.000e+00 5.000e+09
summary(storm$TOTAL_HEALTH_IMPACT)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##    0.0000    0.0000    0.0000    0.1725    0.0000 1742.0000

Results

This section computes summaries by event type and produces figures showing the top event types by health impact and by economic damage.

# Aggregate by event type for health impact
health_by_event <- storm %>%
  group_by(EVTYPE) %>%
  summarize(total_fatalities = sum(FATALITIES, na.rm = TRUE),
            total_injuries = sum(INJURIES, na.rm = TRUE),
            total_health = sum(TOTAL_HEALTH_IMPACT, na.rm = TRUE),
            n_events = n()) %>%
  arrange(desc(total_health))

# Top 10 by total health impact
top10_health <- health_by_event %>% slice_head(n = 10)
knitr::kable(top10_health, digits = 0, caption = "Top 10 EVTYPE by total (fatalities + injuries)")
Top 10 EVTYPE by total (fatalities + injuries)
EVTYPE total_fatalities total_injuries total_health n_events
TORNADO 5633 91346 96979 60652
EXCESSIVE HEAT 1903 6525 8428 1678
TSTM WIND 504 6957 7461 219946
FLOOD 470 6789 7259 25327
LIGHTNING 816 5230 6046 15755
HEAT 937 2100 3037 767
FLASH FLOOD 978 1777 2755 54278
ICE STORM 89 1975 2064 2006
THUNDERSTORM WIND 133 1488 1621 82564
WINTER STORM 206 1321 1527 11433
# Bar plot for health impact: fatalities + injuries (stacked)
plot_health <- top10_health %>%
  tidyr::pivot_longer(cols = c(total_fatalities, total_injuries),
                      names_to = "type", values_to = "count") %>%
  mutate(EVTYPE = forcats::fct_reorder(EVTYPE, -count, .fun = sum))

ggplot(plot_health, aes(x = EVTYPE, y = count, fill = type)) +
  geom_col(position = "stack") +
  coord_flip() +
  labs(title = "Top 10 Event Types by Total Health Impact (Fatalities + Injuries)",
       x = "Event Type", y = "Count", fill = "Measure") +
  theme_minimal()

# Aggregate by event type for economic damage
econ_by_event <- storm %>%
  group_by(EVTYPE) %>%
  summarize(total_prop = sum(PROP_DMG_USD, na.rm = TRUE),
            total_crop = sum(CROP_DMG_USD, na.rm = TRUE),
            total_econ = sum(TOTAL_ECON_DMG, na.rm = TRUE),
            n_events = n()) %>%
  arrange(desc(total_econ))

# Top 10 by economic damage
top10_econ <- econ_by_event %>% slice_head(n = 10)
knitr::kable(top10_econ %>% mutate(total_prop = round(total_prop, 0),
                                   total_crop = round(total_crop, 0),
                                   total_econ = round(total_econ, 0)),
             caption = "Top 10 EVTYPE by total economic damage (USD)")
Top 10 EVTYPE by total economic damage (USD)
EVTYPE total_prop total_crop total_econ n_events
FLOOD 144657709807 5661968450 150319678257 25327
HURRICANE/TYPHOON 69305840000 2607872800 71913712800 88
TORNADO 56947380676 414953270 57362333946 60652
STORM SURGE 43323536000 5000 43323541000 261
HAIL 15735267513 3025954473 18761221986 288661
FLASH FLOOD 16822723978 1421317100 18244041078 54278
DROUGHT 1046106000 13972566000 15018672000 2488
HURRICANE 11868319010 2741910000 14610229010 174
RIVER FLOOD 5118945500 5029459000 10148404500 173
ICE STORM 3944927860 5022113500 8967041360 2006
# Bar plot for economic damage (property + crop stacked)
plot_econ <- top10_econ %>%
  tidyr::pivot_longer(cols = c(total_prop, total_crop),
                      names_to = "type", values_to = "amount") %>%
  mutate(EVTYPE = forcats::fct_reorder(EVTYPE, -amount, .fun = sum))

ggplot(plot_econ, aes(x = EVTYPE, y = amount, fill = type)) +
  geom_col(position = "stack") +
  coord_flip() +
  scale_y_continuous(labels = scales::dollar_format(prefix = "$", scale = 1)) +
  labs(title = "Top 10 Event Types by Total Economic Damage (Property + Crop)",
       x = "Event Type", y = "Damage (USD)", fill = "Damage Type") +
  theme_minimal()

Short Discussion

The tables and figures above identify which event types are most harmful to population health and which cause the largest economic losses across the period covered. The code and plots are suitable for inclusion in a short briefing to municipal managers. When preparing resources and prioritization, managers may consider both frequency (number of events) and severity (per-event damage). The code can be modified to restrict analysis to more recent years (e.g., 1990+) if that is preferred.

Reproducibility notes

Appendix: Quick audit of exponent codes (optional)

unique_props <- sort(unique(storm_raw$PROPDMGEXP))
unique_crops <- sort(unique(storm_raw$CROPDMGEXP))
unique_props
##  [1] "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K" "m" "M"
unique_crops
## [1] "?" "0" "2" "B" "k" "K" "m" "M"
sessionInfo()
## R version 4.5.0 (2025-04-11 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26100)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: Africa/Nairobi
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] stringr_1.5.1 ggplot2_3.5.2 readr_2.1.5   dplyr_1.1.4  
## 
## loaded via a namespace (and not attached):
##  [1] bit_4.6.0         gtable_0.3.6      jsonlite_2.0.0    crayon_1.5.3     
##  [5] compiler_4.5.0    tidyselect_1.2.1  parallel_4.5.0    tidyr_1.3.1      
##  [9] jquerylib_0.1.4   scales_1.3.0      yaml_2.3.10       fastmap_1.2.0    
## [13] R6_2.6.1          labeling_0.4.3    generics_0.1.3    knitr_1.50       
## [17] forcats_1.0.0     tibble_3.2.1      munsell_0.5.1     bslib_0.9.0      
## [21] pillar_1.10.2     tzdb_0.5.0        rlang_1.1.6       cachem_1.1.0     
## [25] stringi_1.8.7     xfun_0.52         sass_0.4.10       bit64_4.6.0-1    
## [29] cli_3.6.4         withr_3.0.2       magrittr_2.0.3    digest_0.6.37    
## [33] grid_4.5.0        vroom_1.6.5       rstudioapi_0.17.1 hms_1.1.3        
## [37] lifecycle_1.0.4   vctrs_0.6.5       evaluate_1.0.3    glue_1.8.0       
## [41] farver_2.1.2      colorspace_2.1-1  purrr_1.0.4       rmarkdown_2.29   
## [45] tools_4.5.0       pkgconfig_2.0.3   htmltools_0.5.8.1