This report analyzes the NOAA Storm Database to identify which weather event types are most harmful to population health and which have the greatest economic impact across the United States. The analysis focuses on fatalities and injuries to represent population health impacts, and on combined property and crop damage to represent economic consequences. The raw compressed dataset is downloaded (if needed), read directly from the .csv.bz2 file, and processed entirely within this document for reproducibility. Several transformations are required: event names are standardized (uppercased and trimmed) to reduce inconsistencies, and damage amounts are converted using official exponent codes to numeric dollar values. Impacts are then aggregated by event type, and the highest-impact event categories are ranked. Results are presented using two figures. These findings can help municipal or emergency management stakeholders understand which event types historically caused the greatest harm and losses. The conclusions are based on the available database records and reflect that reporting becomes more complete in later years.
library(dplyr)
library(ggplot2)
library(readr)
library(stringr)
library(tidyr)
library(scales)
storm_raw <- read_csv("repdata_data_StormData.csv")
storm <- storm_raw %>%
transmute(
EVTYPE = str_squish(str_to_upper(EVTYPE)),
FATALITIES = as.numeric(FATALITIES),
INJURIES = as.numeric(INJURIES),
PROPDMG = as.numeric(PROPDMG),
PROPDMGEXP = str_to_upper(PROPDMGEXP),
CROPDMG = as.numeric(CROPDMG),
CROPDMGEXP = str_to_upper(CROPDMGEXP)
)
summary(storm$EVTYPE)[1:10]
## Length Class Mode <NA> <NA> <NA> <NA> <NA>
## 902297 character character
## <NA> <NA>
##
exp_multiplier <- function(exp_code) {
dplyr::case_when(
exp_code == "H" ~ 1e2,
exp_code == "K" ~ 1e3,
exp_code == "M" ~ 1e6,
exp_code == "B" ~ 1e9,
exp_code %in% c("", NA) ~ 1,
exp_code %in% as.character(0:9) ~ 10 ^ as.numeric(exp_code),
TRUE ~ 1
)
}
storm <- storm %>%
mutate(
PROP_MULT = exp_multiplier(PROPDMGEXP),
CROP_MULT = exp_multiplier(CROPDMGEXP),
PROP_DAMAGE = PROPDMG * PROP_MULT,
CROP_DAMAGE = CROPDMG * CROP_MULT,
ECON_DAMAGE = PROP_DAMAGE + CROP_DAMAGE,
HEALTH_IMPACT = FATALITIES + INJURIES
)
summary(storm$ECON_DAMAGE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00e+00 0.00e+00 0.00e+00 5.29e+05 1.00e+03 1.15e+11
health_by_event <- storm %>%
group_by(EVTYPE) %>%
summarise(
fatalities = sum(FATALITIES, na.rm = TRUE),
injuries = sum(INJURIES, na.rm = TRUE),
health_total = sum(HEALTH_IMPACT, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(health_total))
econ_by_event <- storm %>%
group_by(EVTYPE) %>%
summarise(
prop_damage = sum(PROP_DAMAGE, na.rm = TRUE),
crop_damage = sum(CROP_DAMAGE, na.rm = TRUE),
econ_total = sum(ECON_DAMAGE, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(econ_total))
head(health_by_event, 10)
## # A tibble: 10 × 4
## EVTYPE fatalities injuries health_total
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
head(econ_by_event, 10)
## # A tibble: 10 × 4
## EVTYPE prop_damage crop_damage econ_total
## <chr> <dbl> <dbl> <dbl>
## 1 FLOOD 144657709807 5661968450 150319678257
## 2 HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3 TORNADO 56947380676. 414953270 57362333946.
## 4 STORM SURGE 43323536000 5000 43323541000
## 5 HAIL 15735267513. 3025954473 18761221986.
## 6 FLASH FLOOD 16822723978. 1421317100 18244041078.
## 7 DROUGHT 1046106000 13972566000 15018672000
## 8 HURRICANE 11868319010 2741910000 14610229010
## 9 RIVER FLOOD 5118945500 5029459000 10148404500
## 10 ICE STORM 3944927860 5022113500 8967041360
Event types are ranked by total harm to population health defined as fatalities + injuries.
health_top10 <- health_by_event %>% slice_head(n = 10)
health_top10
## # A tibble: 10 × 4
## EVTYPE fatalities injuries health_total
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
health_long <- health_top10 %>%
select(EVTYPE, fatalities, injuries) %>%
tidyr::pivot_longer(cols = c(fatalities, injuries),
names_to = "metric", values_to = "count")
ggplot(health_long, aes(x = reorder(EVTYPE, count), y = count, fill = metric)) +
geom_col() +
coord_flip() +
labs(
title = "Top 10 Event Types by Population Health Impact",
subtitle = "Counts of fatalities and injuries aggregated across the U.S. (1950–Nov 2011)",
x = "Event type (standardized)",
y = "Number of people affected",
caption = "Figure 1. Bars show injuries and fatalities by the 10 event types with the highest combined impact."
)
TORNADOS represent the largest combined burden of fatalities and injuries in the database period.
Event types are ranked by total economic impact defined as property damage + crop damage (USD).
econ_top10 <- econ_by_event %>% slice_head(n = 10)
econ_top10
## # A tibble: 10 × 4
## EVTYPE prop_damage crop_damage econ_total
## <chr> <dbl> <dbl> <dbl>
## 1 FLOOD 144657709807 5661968450 150319678257
## 2 HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3 TORNADO 56947380676. 414953270 57362333946.
## 4 STORM SURGE 43323536000 5000 43323541000
## 5 HAIL 15735267513. 3025954473 18761221986.
## 6 FLASH FLOOD 16822723978. 1421317100 18244041078.
## 7 DROUGHT 1046106000 13972566000 15018672000
## 8 HURRICANE 11868319010 2741910000 14610229010
## 9 RIVER FLOOD 5118945500 5029459000 10148404500
## 10 ICE STORM 3944927860 5022113500 8967041360
ggplot(econ_top10, aes(x = reorder(EVTYPE, econ_total), y = econ_total)) +
geom_col() +
coord_flip() +
scale_y_continuous(labels = scales::dollar_format()) +
labs(
title = "Top 10 Event Types by Total Economic Damage",
subtitle = "Property + crop damage aggregated across the U.S. (1950–Nov 2011)",
x = "Event type (standardized)",
y = "Total damage (USD)",
caption = "Figure 2. Bars show combined crop and property damages for the 10 event types with the highest total losses."
)
FLOODS account for the largest reported losses in property and agriculture, and would be key categories for resource prioritization.