RResearch Peer-graded Assignment: Course Project 2-FELIZ CUMPLEAÑOS PRIMA DANIA

Load Libraries and Data

library(dplyr)     
library(stringr)  
library(ggplot2)  
library(knitr)     
library(scales)    


sessionInfo()

## R version 4.5.1 (2025-06-13 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 10 x64 (build 19045)
## 
## Matrix products: default
##   LAPACK version 3.12.1
## 
## locale:
## [1] LC_COLLATE=English_Canada.utf8  LC_CTYPE=English_Canada.utf8   
## [3] LC_MONETARY=English_Canada.utf8 LC_NUMERIC=C                   
## [5] LC_TIME=English_Canada.utf8    
## 
## time zone: America/Guatemala
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] scales_1.4.0  knitr_1.50    ggplot2_4.0.0 stringr_1.5.2 dplyr_1.1.4  
## 
## loaded via a namespace (and not attached):
##  [1] vctrs_0.6.5        cli_3.6.5          rlang_1.1.6        xfun_0.54         
##  [5] stringi_1.8.7      generics_0.1.4     S7_0.2.0           jsonlite_2.0.0    
##  [9] glue_1.8.0         htmltools_0.5.8.1  sass_0.4.10        rmarkdown_2.30    
## [13] grid_4.5.1         evaluate_1.0.5     jquerylib_0.1.4    tibble_3.3.0      
## [17] fastmap_1.2.0      yaml_2.3.10        lifecycle_1.0.4    compiler_4.5.1    
## [21] RColorBrewer_1.1-3 pkgconfig_2.0.3    rstudioapi_0.17.1  farver_2.1.2      
## [25] digest_0.6.37      R6_2.6.1           tidyselect_1.2.1   pillar_1.11.1     
## [29] magrittr_2.0.4     bslib_0.9.0        withr_3.0.2        gtable_0.3.6      
## [33] tools_4.5.1        cachem_1.1.0

data_file <- "repdata_data_StormData.csv.bz2"


storm <- read.csv(data_file, stringsAsFactors = FALSE)

dim(storm)   # colmms and rows

## [1] 902297     37

str(storm[, c("BGN_DATE", "STATE", "EVTYPE", "FATALITIES", "INJURIES",
              "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")])

## 'data.frame':    902297 obs. of  9 variables:
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...

exp_to_num <- function(exp) {
  exp <- toupper(trimws(exp))  # limpiar y convertir a mayúsculas
  mult <- ifelse(exp == "H", 1e2,     # H = hundred
          ifelse(exp == "K", 1e3,     # K = thousand
          ifelse(exp == "M", 1e6,     # M = million
          ifelse(exp == "B", 1e9,     # B = billion
          NA_real_))))               # otros casos → NA
  is_digit <- grepl("^[0-9]$", exp)
  mult[is_digit] <- 10 ^ as.numeric(exp[is_digit])
  mult[is.na(mult)] <- 1
  return(mult)
}

# chang the columms in prper way
storm <- storm %>%
  mutate(
    PROP_MULT = exp_to_num(PROPDMGEXP),
    CROP_MULT = exp_to_num(CROPDMGEXP),
    PROPDMG_USD = PROPDMG * PROP_MULT,
    CROPDMG_USD = CROPDMG * CROP_MULT,
    TOTALDMG_USD = PROPDMG_USD + CROPDMG_USD
  )

summary(storm$TOTALDMG_USD)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.00e+00 0.00e+00 0.00e+00 5.29e+05 1.00e+03 1.15e+11

#Summary helth impact


health_by_event <- storm %>%
  dplyr::group_by(EVTYPE) %>%
  dplyr::summarise(
    Fatalities   = sum(FATALITIES, na.rm = TRUE),
    Injuries     = sum(INJURIES,   na.rm = TRUE),
    Health_Total = Fatalities + Injuries
  ) %>%
  dplyr::arrange(dplyr::desc(Health_Total))

# Top 10 
top_health <- head(health_by_event, 10)
knitr::kable(
  top_health,
  caption = "Top 10 event types by total population health impact (fatalities + injuries)."
)

Top 10 event types by total population health impact (fatalities + injuries).
EVTYPE	Fatalities	Injuries	Health_Total
TORNADO	5633	91346	96979
EXCESSIVE HEAT	1903	6525	8428
TSTM WIND	504	6957	7461
FLOOD	470	6789	7259
LIGHTNING	816	5230	6046
HEAT	937	2100	3037
FLASH FLOOD	978	1777	2755
ICE STORM	89	1975	2064
THUNDERSTORM WIND	133	1488	1621
WINTER STORM	206	1321	1527

#  Fig 1
ggplot2::ggplot(top_health, ggplot2::aes(x = reorder(EVTYPE, Health_Total), y = Health_Total)) +
  ggplot2::geom_col() +
  ggplot2::coord_flip() +
  ggplot2::labs(
    title = "Population Health Impact by Event Type (Top 10)",
    x = "Event Type (EVTYPE)",
    y = "Fatalities + Injuries"
  ) +
  ggplot2::theme_minimal(base_size = 12)

if ("package:plyr" %in% search()) detach("package:plyr", unload = TRUE)


if (!exists("exp_to_num")) {
  exp_to_num <- function(exp) {
    exp <- toupper(trimws(exp))
    mult <- ifelse(exp == "H", 1e2,
            ifelse(exp == "K", 1e3,
            ifelse(exp == "M", 1e6,
            ifelse(exp == "B", 1e9, NA_real_))))
    is_digit <- grepl("^[0-9]$", exp)
    mult[is_digit] <- 10 ^ as.numeric(exp[is_digit])
    mult[is.na(mult)] <- 1
    mult
  }
}


storm_econ <- storm %>%
  dplyr::mutate(
    PROP_MULT = exp_to_num(PROPDMGEXP),
    CROP_MULT = exp_to_num(CROPDMGEXP),
    PROPDMG_USD = as.numeric(PROPDMG) * PROP_MULT,
    CROPDMG_USD = as.numeric(CROPDMG) * CROP_MULT
  )


econ_by_event <- storm_econ %>%
  dplyr::group_by(EVTYPE) %>%
  dplyr::summarise(
    Property_USD = sum(as.numeric(PROPDMG_USD), na.rm = TRUE),
    Crop_USD     = sum(as.numeric(CROPDMG_USD),  na.rm = TRUE),
    .groups = "drop"
  ) %>%
  dplyr::mutate(
    Property_USD = as.numeric(Property_USD),
    Crop_USD     = as.numeric(Crop_USD),
    Econ_Total   = Property_USD + Crop_USD
  ) %>%
  dplyr::arrange(dplyr::desc(Econ_Total)) %>%
  dplyr::ungroup()

print(colnames(econ_by_event))

## [1] "EVTYPE"       "Property_USD" "Crop_USD"     "Econ_Total"

print(summary(econ_by_event$Econ_Total))

##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.000e+00 0.000e+00 0.000e+00 4.846e+08 8.500e+04 1.503e+11

top_econ <- head(econ_by_event, 10)

knitr::kable(
  top_econ %>%
    dplyr::mutate(
      Property_USD = scales::label_dollar()(Property_USD),
      Crop_USD     = scales::label_dollar()(Crop_USD),
      Econ_Total   = scales::label_dollar()(Econ_Total)
    ),
  caption = "Top 10 event types by total economic impact (USD)."
)

Top 10 event types by total economic impact (USD).
EVTYPE	Property_USD	Crop_USD	Econ_Total
FLOOD	$144,657,709,807	$5,661,968,450	$150,319,678,257
HURRICANE/TYPHOON	$69,305,840,000	$2,607,872,800	$71,913,712,800
TORNADO	$56,947,380,676	$414,953,270	$57,362,333,946
STORM SURGE	$43,323,536,000	$5,000	$43,323,541,000
HAIL	$15,735,267,513	$3,025,954,473	$18,761,221,986
FLASH FLOOD	$16,822,673,978	$1,421,317,100	$18,243,991,078
DROUGHT	$1,046,106,000	$13,972,566,000	$15,018,672,000
HURRICANE	$11,868,319,010	$2,741,910,000	$14,610,229,010
RIVER FLOOD	$5,118,945,500	$5,029,459,000	$10,148,404,500
ICE STORM	$3,944,927,860	$5,022,113,500	$8,967,041,360

ggplot2::ggplot(top_econ, ggplot2::aes(x = reorder(EVTYPE, Econ_Total), y = Econ_Total)) +
  ggplot2::geom_col() +
  ggplot2::coord_flip() +
  ggplot2::scale_y_continuous(labels = scales::label_dollar()) +
  ggplot2::labs(
    title = "Economic Impact by Event Type (Top 10)",
    x = "Event Type (EVTYPE)",
    y = "USD (Property + Crop Damages)"
  ) +
  ggplot2::theme_minimal(base_size = 12)

if (!exists("health_by_event")) {
  health_by_event <- storm %>%
    dplyr::group_by(EVTYPE) %>%
    dplyr::summarise(
      Fatalities   = sum(FATALITIES, na.rm = TRUE),
      Injuries     = sum(INJURIES,   na.rm = TRUE),
      Health_Total = Fatalities + Injuries
    ) %>%
    dplyr::arrange(dplyr::desc(Health_Total))
}
if (!exists("econ_by_event")) {
  # construcción económica segura
  exp_to_num <- function(exp) {
    exp <- toupper(trimws(exp))
    mult <- ifelse(exp == "H", 1e2,
            ifelse(exp == "K", 1e3,
            ifelse(exp == "M", 1e6,
            ifelse(exp == "B", 1e9, NA_real_))))
    is_digit <- grepl("^[0-9]$", exp)
    mult[is_digit] <- 10 ^ as.numeric(exp[is_digit])
    mult[is.na(mult)] <- 1
    mult
  }
  storm_econ <- storm %>%
    dplyr::mutate(
      PROP_MULT    = exp_to_num(PROPDMGEXP),
      CROP_MULT    = exp_to_num(CROPDMGEXP),
      PROPDMG_USD  = as.numeric(PROPDMG) * PROP_MULT,
      CROPDMG_USD  = as.numeric(CROPDMG) * CROP_MULT
    )
  econ_by_event <- storm_econ %>%
    dplyr::group_by(EVTYPE) %>%
    dplyr::summarise(
      Property_USD = sum(PROPDMG_USD, na.rm = TRUE),
      Crop_USD     = sum(CROPDMG_USD,  na.rm = TRUE),
      .groups      = "drop"
    ) %>%
    dplyr::mutate(Econ_Total = as.numeric(Property_USD) + as.numeric(Crop_USD)) %>%
    dplyr::arrange(dplyr::desc(Econ_Total))
}

# Top 3 salud y economía
top3_health <- head(health_by_event$EVTYPE, 3)
top3_econ   <- head(econ_by_event$EVTYPE, 3)

cat("### Conclusions\n\n")

## ### Conclusions

cat("- Most harmful to population health  ",
    paste(top3_health, collapse = ", "), ".\n", sep = "")

## - Most harmful to population health  TORNADO, EXCESSIVE HEAT, TSTM WIND.

cat("- Greatest economic consequences  ",
    paste(top3_econ, collapse = ", "), ".\n", sep = "")

## - Greatest economic consequences  FLOOD, HURRICANE/TYPHOON, TORNADO.

cat("- Results are derived from the original NOAA Storm Database (1950–2011) .\n")

## - Results are derived from the original NOAA Storm Database (1950–2011) .

RResearch Peer-graded Assignment: Course Project 2-FELIZ CUMPLEAÑOS PRIMA DANIA

NOAA Storm Database — Health & Economic Impacts (1950–2011)

Oscar Trigueros

2025-11-09

Load Libraries and Data