Load Libraries and Data
library(dplyr)
library(stringr)
library(ggplot2)
library(knitr)
library(scales)
sessionInfo()
## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19045)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_Canada.utf8 LC_CTYPE=English_Canada.utf8
## [3] LC_MONETARY=English_Canada.utf8 LC_NUMERIC=C
## [5] LC_TIME=English_Canada.utf8
##
## time zone: America/Guatemala
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] scales_1.4.0 knitr_1.50 ggplot2_4.0.0 stringr_1.5.2 dplyr_1.1.4
##
## loaded via a namespace (and not attached):
## [1] vctrs_0.6.5 cli_3.6.5 rlang_1.1.6 xfun_0.54
## [5] stringi_1.8.7 generics_0.1.4 S7_0.2.0 jsonlite_2.0.0
## [9] glue_1.8.0 htmltools_0.5.8.1 sass_0.4.10 rmarkdown_2.30
## [13] grid_4.5.1 evaluate_1.0.5 jquerylib_0.1.4 tibble_3.3.0
## [17] fastmap_1.2.0 yaml_2.3.10 lifecycle_1.0.4 compiler_4.5.1
## [21] RColorBrewer_1.1-3 pkgconfig_2.0.3 rstudioapi_0.17.1 farver_2.1.2
## [25] digest_0.6.37 R6_2.6.1 tidyselect_1.2.1 pillar_1.11.1
## [29] magrittr_2.0.4 bslib_0.9.0 withr_3.0.2 gtable_0.3.6
## [33] tools_4.5.1 cachem_1.1.0
data_file <- "repdata_data_StormData.csv.bz2"
storm <- read.csv(data_file, stringsAsFactors = FALSE)
dim(storm) # colmms and rows
## [1] 902297 37
str(storm[, c("BGN_DATE", "STATE", "EVTYPE", "FATALITIES", "INJURIES",
"PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")])
## 'data.frame': 902297 obs. of 9 variables:
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
exp_to_num <- function(exp) {
exp <- toupper(trimws(exp)) # limpiar y convertir a mayúsculas
mult <- ifelse(exp == "H", 1e2, # H = hundred
ifelse(exp == "K", 1e3, # K = thousand
ifelse(exp == "M", 1e6, # M = million
ifelse(exp == "B", 1e9, # B = billion
NA_real_)))) # otros casos → NA
is_digit <- grepl("^[0-9]$", exp)
mult[is_digit] <- 10 ^ as.numeric(exp[is_digit])
mult[is.na(mult)] <- 1
return(mult)
}
# chang the columms in prper way
storm <- storm %>%
mutate(
PROP_MULT = exp_to_num(PROPDMGEXP),
CROP_MULT = exp_to_num(CROPDMGEXP),
PROPDMG_USD = PROPDMG * PROP_MULT,
CROPDMG_USD = CROPDMG * CROP_MULT,
TOTALDMG_USD = PROPDMG_USD + CROPDMG_USD
)
summary(storm$TOTALDMG_USD)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00e+00 0.00e+00 0.00e+00 5.29e+05 1.00e+03 1.15e+11
#Summary helth impact
health_by_event <- storm %>%
dplyr::group_by(EVTYPE) %>%
dplyr::summarise(
Fatalities = sum(FATALITIES, na.rm = TRUE),
Injuries = sum(INJURIES, na.rm = TRUE),
Health_Total = Fatalities + Injuries
) %>%
dplyr::arrange(dplyr::desc(Health_Total))
# Top 10
top_health <- head(health_by_event, 10)
knitr::kable(
top_health,
caption = "Top 10 event types by total population health impact (fatalities + injuries)."
)
Top 10 event types by total population health impact
(fatalities + injuries).
| TORNADO |
5633 |
91346 |
96979 |
| EXCESSIVE HEAT |
1903 |
6525 |
8428 |
| TSTM WIND |
504 |
6957 |
7461 |
| FLOOD |
470 |
6789 |
7259 |
| LIGHTNING |
816 |
5230 |
6046 |
| HEAT |
937 |
2100 |
3037 |
| FLASH FLOOD |
978 |
1777 |
2755 |
| ICE STORM |
89 |
1975 |
2064 |
| THUNDERSTORM WIND |
133 |
1488 |
1621 |
| WINTER STORM |
206 |
1321 |
1527 |
# Fig 1
ggplot2::ggplot(top_health, ggplot2::aes(x = reorder(EVTYPE, Health_Total), y = Health_Total)) +
ggplot2::geom_col() +
ggplot2::coord_flip() +
ggplot2::labs(
title = "Population Health Impact by Event Type (Top 10)",
x = "Event Type (EVTYPE)",
y = "Fatalities + Injuries"
) +
ggplot2::theme_minimal(base_size = 12)

if ("package:plyr" %in% search()) detach("package:plyr", unload = TRUE)
if (!exists("exp_to_num")) {
exp_to_num <- function(exp) {
exp <- toupper(trimws(exp))
mult <- ifelse(exp == "H", 1e2,
ifelse(exp == "K", 1e3,
ifelse(exp == "M", 1e6,
ifelse(exp == "B", 1e9, NA_real_))))
is_digit <- grepl("^[0-9]$", exp)
mult[is_digit] <- 10 ^ as.numeric(exp[is_digit])
mult[is.na(mult)] <- 1
mult
}
}
storm_econ <- storm %>%
dplyr::mutate(
PROP_MULT = exp_to_num(PROPDMGEXP),
CROP_MULT = exp_to_num(CROPDMGEXP),
PROPDMG_USD = as.numeric(PROPDMG) * PROP_MULT,
CROPDMG_USD = as.numeric(CROPDMG) * CROP_MULT
)
econ_by_event <- storm_econ %>%
dplyr::group_by(EVTYPE) %>%
dplyr::summarise(
Property_USD = sum(as.numeric(PROPDMG_USD), na.rm = TRUE),
Crop_USD = sum(as.numeric(CROPDMG_USD), na.rm = TRUE),
.groups = "drop"
) %>%
dplyr::mutate(
Property_USD = as.numeric(Property_USD),
Crop_USD = as.numeric(Crop_USD),
Econ_Total = Property_USD + Crop_USD
) %>%
dplyr::arrange(dplyr::desc(Econ_Total)) %>%
dplyr::ungroup()
print(colnames(econ_by_event))
## [1] "EVTYPE" "Property_USD" "Crop_USD" "Econ_Total"
print(summary(econ_by_event$Econ_Total))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000e+00 0.000e+00 0.000e+00 4.846e+08 8.500e+04 1.503e+11
top_econ <- head(econ_by_event, 10)
knitr::kable(
top_econ %>%
dplyr::mutate(
Property_USD = scales::label_dollar()(Property_USD),
Crop_USD = scales::label_dollar()(Crop_USD),
Econ_Total = scales::label_dollar()(Econ_Total)
),
caption = "Top 10 event types by total economic impact (USD)."
)
Top 10 event types by total economic impact (USD).
| FLOOD |
$144,657,709,807 |
$5,661,968,450 |
$150,319,678,257 |
| HURRICANE/TYPHOON |
$69,305,840,000 |
$2,607,872,800 |
$71,913,712,800 |
| TORNADO |
$56,947,380,676 |
$414,953,270 |
$57,362,333,946 |
| STORM SURGE |
$43,323,536,000 |
$5,000 |
$43,323,541,000 |
| HAIL |
$15,735,267,513 |
$3,025,954,473 |
$18,761,221,986 |
| FLASH FLOOD |
$16,822,673,978 |
$1,421,317,100 |
$18,243,991,078 |
| DROUGHT |
$1,046,106,000 |
$13,972,566,000 |
$15,018,672,000 |
| HURRICANE |
$11,868,319,010 |
$2,741,910,000 |
$14,610,229,010 |
| RIVER FLOOD |
$5,118,945,500 |
$5,029,459,000 |
$10,148,404,500 |
| ICE STORM |
$3,944,927,860 |
$5,022,113,500 |
$8,967,041,360 |
ggplot2::ggplot(top_econ, ggplot2::aes(x = reorder(EVTYPE, Econ_Total), y = Econ_Total)) +
ggplot2::geom_col() +
ggplot2::coord_flip() +
ggplot2::scale_y_continuous(labels = scales::label_dollar()) +
ggplot2::labs(
title = "Economic Impact by Event Type (Top 10)",
x = "Event Type (EVTYPE)",
y = "USD (Property + Crop Damages)"
) +
ggplot2::theme_minimal(base_size = 12)

if (!exists("health_by_event")) {
health_by_event <- storm %>%
dplyr::group_by(EVTYPE) %>%
dplyr::summarise(
Fatalities = sum(FATALITIES, na.rm = TRUE),
Injuries = sum(INJURIES, na.rm = TRUE),
Health_Total = Fatalities + Injuries
) %>%
dplyr::arrange(dplyr::desc(Health_Total))
}
if (!exists("econ_by_event")) {
# construcción económica segura
exp_to_num <- function(exp) {
exp <- toupper(trimws(exp))
mult <- ifelse(exp == "H", 1e2,
ifelse(exp == "K", 1e3,
ifelse(exp == "M", 1e6,
ifelse(exp == "B", 1e9, NA_real_))))
is_digit <- grepl("^[0-9]$", exp)
mult[is_digit] <- 10 ^ as.numeric(exp[is_digit])
mult[is.na(mult)] <- 1
mult
}
storm_econ <- storm %>%
dplyr::mutate(
PROP_MULT = exp_to_num(PROPDMGEXP),
CROP_MULT = exp_to_num(CROPDMGEXP),
PROPDMG_USD = as.numeric(PROPDMG) * PROP_MULT,
CROPDMG_USD = as.numeric(CROPDMG) * CROP_MULT
)
econ_by_event <- storm_econ %>%
dplyr::group_by(EVTYPE) %>%
dplyr::summarise(
Property_USD = sum(PROPDMG_USD, na.rm = TRUE),
Crop_USD = sum(CROPDMG_USD, na.rm = TRUE),
.groups = "drop"
) %>%
dplyr::mutate(Econ_Total = as.numeric(Property_USD) + as.numeric(Crop_USD)) %>%
dplyr::arrange(dplyr::desc(Econ_Total))
}
# Top 3 salud y economía
top3_health <- head(health_by_event$EVTYPE, 3)
top3_econ <- head(econ_by_event$EVTYPE, 3)
cat("### Conclusions\n\n")
## ### Conclusions
cat("- Most harmful to population health ",
paste(top3_health, collapse = ", "), ".\n", sep = "")
## - Most harmful to population health TORNADO, EXCESSIVE HEAT, TSTM WIND.
cat("- Greatest economic consequences ",
paste(top3_econ, collapse = ", "), ".\n", sep = "")
## - Greatest economic consequences FLOOD, HURRICANE/TYPHOON, TORNADO.
cat("- Results are derived from the original NOAA Storm Database (1950–2011) .\n")
## - Results are derived from the original NOAA Storm Database (1950–2011) .