Synopsis

This report explores the NOAA Storm Events Database to identify which weather events have the greatest impact on population health and economic loss across the United States. The analysis uses the raw CSV (compressed .bz2) file and shows code so results are reproducible.

Data Processing

library(data.table)
library(tidyverse)
library(R.utils)
library(knitr)
library(cowplot)
library(scales)

library(dplyr)
library(ggplot2)
library(knitr)
library(cowplot)

storm <- fread("repdata_data_StormData.csv.bz2")
dim(storm)
## [1] 902297     37
names(storm)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
# --- Limpieza y preparación de los datos ---
exp_values <- function(e) {
  ifelse(e %in% c("H","h"), 1e2,
  ifelse(e %in% c("K","k"), 1e3,
  ifelse(e %in% c("M","m"), 1e6,
  ifelse(e %in% c("B","b"), 1e9, 1))))
}

storm$PROPDMGEXP2 <- exp_values(storm$PROPDMGEXP)
storm$CROPDMGEXP2 <- exp_values(storm$CROPDMGEXP)
storm$PROPDMGVAL  <- storm$PROPDMG * storm$PROPDMGEXP2
storm$CROPDMGVAL  <- storm$CROPDMG * storm$CROPDMGEXP2

storm$EVTYPE <- toupper(trimws(storm$EVTYPE))
storm$EVTYPE <- gsub("TSTM WIND", "THUNDERSTORM WIND", storm$EVTYPE)
storm$EVTYPE <- gsub("THUNDERSTORM WINDS", "THUNDERSTORM WIND", storm$EVTYPE)
storm$EVTYPE <- gsub("HURRICANE/TYPHOON", "HURRICANE", storm$EVTYPE)
storm$EVTYPE <- gsub("WILD/FOREST FIRE", "WILDFIRE", storm$EVTYPE)
storm$EVTYPE <- gsub("URBAN/SML STREAM FLD", "FLOOD", storm$EVTYPE)


# --- Impacto en la salud (fatalidades y lesiones) ---
health_impact <- storm %>%
  group_by(EVTYPE) %>%
  summarise(FATALITIES = sum(FATALITIES, na.rm=TRUE),
            INJURIES = sum(INJURIES, na.rm=TRUE)) %>%
  mutate(TOTAL_HEALTH_IMPACT = FATALITIES + INJURIES) %>%
  arrange(desc(TOTAL_HEALTH_IMPACT))

top5_health <- head(health_impact, 5)
kable(top5_health, caption = "Top 5 events by fatalities and injuries")
Top 5 events by fatalities and injuries
EVTYPE FATALITIES INJURIES TOTAL_HEALTH_IMPACT
TORNADO 5633 91346 96979
THUNDERSTORM WIND 701 9353 10054
EXCESSIVE HEAT 1903 6525 8428
FLOOD 498 6868 7366
LIGHTNING 816 5230 6046
# --- Impacto económico (daños a propiedades y cultivos) ---
economic_impact <- storm %>%
  group_by(EVTYPE) %>%
  summarise(PROPERTY_DAMAGE = sum(PROPDMGVAL, na.rm=TRUE),
            CROP_DAMAGE = sum(CROPDMGVAL, na.rm=TRUE)) %>%
  mutate(TOTAL_ECONOMIC_IMPACT = PROPERTY_DAMAGE + CROP_DAMAGE) %>%
  arrange(desc(TOTAL_ECONOMIC_IMPACT))

top5_economic <- head(economic_impact, 5)
kable(top5_economic, caption = "Top 5 events by economic damage")
Top 5 events by economic damage
EVTYPE PROPERTY_DAMAGE CROP_DAMAGE TOTAL_ECONOMIC_IMPACT
FLOOD 144716019457 5670456550 150386476007
HURRICANE 81174159010 5349782800 86523941810
TORNADO 56937160779 414953270 57352114049
STORM SURGE 43323536000 5000 43323541000
HAIL 15732267543 3025954473 18758222016
# --- Visualización de resultados ---
p1 <- ggplot(top5_health, aes(x=reorder(EVTYPE, TOTAL_HEALTH_IMPACT), y=TOTAL_HEALTH_IMPACT)) +
  geom_col(fill="steelblue") + coord_flip() +
  labs(title="Top 5 Events: Health Impact", x="Event Type", y="Fatalities + Injuries")

p2 <- ggplot(top5_economic, aes(x=reorder(EVTYPE, TOTAL_ECONOMIC_IMPACT), y=TOTAL_ECONOMIC_IMPACT)) +
  geom_col(fill="darkred") + coord_flip() +
  labs(title="Top 5 Events: Economic Damage", x="Event Type", y="Total Damage ($)")

plot_grid(p1, p2, ncol=1)