This report analyzes the U.S. National Oceanic and Atmospheric
Administration (NOAA) Storm Events Database to identify which event
types are most harmful to population health and which have the greatest
economic consequences. We begin from the raw CSV
repdata_data_StormData.csv. We compute health impacts
(fatalities, injuries) and standardized economic losses (property +
crop), showing all R code and caching heavy steps for speed. Results
typically show tornadoes dominating health impacts, while floods,
hurricanes/typhoons, and storm surges dominate economic losses.
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(stringr)
library(readr)
library(forcats)
library(tidyr)
csv_path <- "repdata_data_StormData.csv"
stopifnot(file.exists(csv_path))
storm_raw <- readr::read_csv(
csv_path,
show_col_types = FALSE,
progress = FALSE
)
storm <- storm_raw %>%
dplyr::select(
BGN_DATE, STATE, EVTYPE,
FATALITIES, INJURIES,
PROPDMG, PROPDMGEXP,
CROPDMG, CROPDMGEXP
)
rm(storm_raw)
storm <- storm %>%
mutate(EVTYPE = stringr::str_squish(stringr::str_to_upper(as.character(EVTYPE))))
health_by_event <- storm %>%
group_by(EVTYPE) %>%
summarise(
fatalities = sum(FATALITIES, na.rm = TRUE),
injuries = sum(INJURIES, na.rm = TRUE),
health_harm = fatalities + injuries,
.groups = "drop"
) %>%
filter(health_harm > 0)
top_health <- health_by_event %>%
arrange(desc(health_harm)) %>%
slice_head(n = 10)
top_health
exp_to_multiplier <- function(x) {
x <- toupper(trimws(as.character(x)))
m <- rep(1, length(x))
m[x %in% c("H")] <- 1e2
m[x %in% c("K")] <- 1e3
m[x %in% c("M")] <- 1e6
m[x %in% c("B")] <- 1e9
is_digit <- grepl("^[0-8]$", x)
m[is_digit] <- 10 ^ as.numeric(x[is_digit])
m
}
storm <- storm %>%
mutate(
prop_mult = exp_to_multiplier(PROPDMGEXP),
crop_mult = exp_to_multiplier(CROPDMGEXP),
prop_loss = as.numeric(PROPDMG) * prop_mult,
crop_loss = as.numeric(CROPDMG) * crop_mult,
total_loss = prop_loss + crop_loss
)
econ_by_event <- storm %>%
group_by(EVTYPE) %>%
summarise(
property_damage = sum(prop_loss, na.rm = TRUE),
crop_damage = sum(crop_loss, na.rm = TRUE),
economic_loss = sum(total_loss, na.rm = TRUE),
.groups = "drop"
) %>%
filter(economic_loss > 0)
top_econ <- econ_by_event %>%
arrange(desc(economic_loss)) %>%
slice_head(n = 10)
top_econ
health_long <- health_by_event %>%
semi_join(top_health, by = "EVTYPE") %>%
select(EVTYPE, fatalities, injuries) %>%
tidyr::pivot_longer(cols = c(fatalities, injuries),
names_to = "metric", values_to = "count") %>%
mutate(EVTYPE = forcats::fct_reorder(EVTYPE, count, sum))
ggplot(health_long, aes(x = EVTYPE, y = count, fill = metric)) +
geom_col() +
coord_flip() +
labs(
x = "Event Type (EVTYPE)",
y = "People Affected",
fill = "Health Metric",
title = "Top 10 Event Types by Health Harm (Fatalities + Injuries)"
) +
theme_minimal(base_size = 12)
econ_long <- econ_by_event %>%
semi_join(top_econ, by = "EVTYPE") %>%
select(EVTYPE, property_damage, crop_damage) %>%
tidyr::pivot_longer(cols = c(property_damage, crop_damage),
names_to = "type", values_to = "usd") %>%
mutate(EVTYPE = forcats::fct_reorder(EVTYPE, usd, sum))
ggplot(econ_long, aes(x = EVTYPE, y = usd/1e9, fill = type)) +
geom_col() +
coord_flip() +
labs(
x = "Event Type (EVTYPE)",
y = "Economic Loss (Billions of USD)",
fill = "Damage Type",
title = "Top 10 Event Types by Economic Loss (Property + Crop)"
) +
theme_minimal(base_size = 12)
repdata_data_StormData.csv (no external
pre-processing).cache=TRUE on heavy chunks to speed re-runs.sessionInfo()
## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26100)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=Spanish_Mexico.utf8 LC_CTYPE=Spanish_Mexico.utf8
## [3] LC_MONETARY=Spanish_Mexico.utf8 LC_NUMERIC=C
## [5] LC_TIME=Spanish_Mexico.utf8
##
## time zone: America/Guayaquil
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tidyr_1.3.1 forcats_1.0.0 readr_2.1.5 stringr_1.5.1 ggplot2_3.5.1
## [6] dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] bit_4.0.5 gtable_0.3.5 jsonlite_1.8.8 highr_0.11
## [5] crayon_1.5.3 compiler_4.4.1 tidyselect_1.2.1 parallel_4.4.1
## [9] jquerylib_0.1.4 scales_1.3.0 yaml_2.3.9 fastmap_1.2.0
## [13] R6_2.5.1 labeling_0.4.3 generics_0.1.3 knitr_1.48
## [17] tibble_3.2.1 munsell_0.5.1 bslib_0.7.0 pillar_1.9.0
## [21] tzdb_0.4.0 rlang_1.1.4 utf8_1.2.4 cachem_1.1.0
## [25] stringi_1.8.4 xfun_0.45 sass_0.4.9 bit64_4.0.5
## [29] cli_3.6.3 withr_3.0.0 magrittr_2.0.3 digest_0.6.36
## [33] grid_4.4.1 vroom_1.6.5 rstudioapi_0.16.0 hms_1.1.3
## [37] lifecycle_1.0.4 vctrs_0.6.5 evaluate_0.24.0 glue_1.7.0
## [41] farver_2.1.2 codetools_0.2-20 fansi_1.0.6 colorspace_2.1-0
## [45] purrr_1.0.2 rmarkdown_2.27 tools_4.4.1 pkgconfig_2.0.3
## [49] htmltools_0.5.8.1