This report analyzes the NOAA Storm Database (1950–2011) to identify which event types cause the greatest harm to population health and the greatest economic consequences in the United States. The data are loaded from the raw storm dataset and processed by standardizing event types and converting damage exponents (K/M/B) into numeric multipliers. Health impact is measured as fatalities plus injuries, and economic impact is measured as property plus crop damage. Results are summarized by event type (EVTYPE) and presented using two bar charts of the top 10 categories.
# Packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
data_file <- "repdata_data_StormData.csv"
storm <- read.csv(data_file, stringsAsFactors = FALSE)
dim(storm)
## [1] 902297 37
# Quick check
dim(storm)
## [1] 902297 37
names(storm)[1:10]
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
storm2 <- storm %>%
transmute(
EVTYPE = toupper(trimws(EVTYPE)),
FATALITIES = as.numeric(FATALITIES),
INJURIES = as.numeric(INJURIES),
PROPDMG = as.numeric(PROPDMG),
PROPDMGEXP = toupper(trimws(PROPDMGEXP)),
CROPDMG = as.numeric(CROPDMG),
CROPDMGEXP = toupper(trimws(CROPDMGEXP))
)
# Function: convert exponent codes to multipliers
exp_to_mult <- function(x) {
x <- toupper(trimws(x))
dplyr::case_when(
x == "K" ~ 1e3,
x == "M" ~ 1e6,
x == "B" ~ 1e9,
x == "" ~ 1,
is.na(x) ~ 1,
TRUE ~ 1
)
}
storm3 <- storm2 %>%
mutate(
prop_mult = exp_to_mult(PROPDMGEXP),
crop_mult = exp_to_mult(CROPDMGEXP),
health_impact = FATALITIES + INJURIES,
econ_damage = PROPDMG * prop_mult + CROPDMG * crop_mult
)
health_top <- storm3 %>%
group_by(EVTYPE) %>%
summarise(total_health = sum(health_impact, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(total_health)) %>%
slice_head(n = 10)
health_top
## # A tibble: 10 × 2
## EVTYPE total_health
## <chr> <dbl>
## 1 TORNADO 96979
## 2 EXCESSIVE HEAT 8428
## 3 TSTM WIND 7461
## 4 FLOOD 7259
## 5 LIGHTNING 6046
## 6 HEAT 3037
## 7 FLASH FLOOD 2755
## 8 ICE STORM 2064
## 9 THUNDERSTORM WIND 1621
## 10 WINTER STORM 1527
ggplot(health_top, aes(x = reorder(EVTYPE, total_health), y = total_health)) +
geom_col() +
coord_flip() +
labs(
title = "Top 10 Storm Event Types by Population Health Impact",
x = "Event Type (EVTYPE)",
y = "Fatalities + Injuries"
)
Figure 1. This bar chart shows the 10 event types with
the highest combined fatalities and injuries across the U.S. in the NOAA
Storm Database.
econ_top <- storm3 %>%
group_by(EVTYPE) %>%
summarise(total_econ = sum(econ_damage, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(total_econ)) %>%
slice_head(n = 10)
econ_top
## # A tibble: 10 × 2
## EVTYPE total_econ
## <chr> <dbl>
## 1 FLOOD 150319678257
## 2 HURRICANE/TYPHOON 71913712800
## 3 TORNADO 57352114049.
## 4 STORM SURGE 43323541000
## 5 HAIL 18758221521.
## 6 FLASH FLOOD 17562179167.
## 7 DROUGHT 15018672000
## 8 HURRICANE 14610229010
## 9 RIVER FLOOD 10148404500
## 10 ICE STORM 8967041360
ggplot(econ_top, aes(x = reorder(EVTYPE, total_econ), y = total_econ)) +
geom_col() +
coord_flip() +
labs(
title = "Top 10 Storm Event Types by Economic Consequences",
x = "Event Type (EVTYPE)",
y = "Property + Crop Damage (USD)"
)
Figure 2. This bar chart shows the 10 event types with
the highest total economic damage (property plus crop) after converting
exponent codes (K/M/B) into numeric multipliers.