Title : Event analysis on health and economic impact

Synopsis: Analyze NOAA storm data to identify events with the greatest impact on population health and economic consequences. First cleared event type categories tidying up and simplifying categories. Also, changed damage exponents to multipliers. Then summarized total fatalities, injuries, property damage, and crop damage by event type. Finally, visualized the results using bar plots.

Load libraries and set seed

# library load
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
# set seed for reproducibility
set.seed(827)

Read data

df <- readr::read_csv("./repdata_data_StormData.csv.bz2")
## Rows: 902297 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): BGN_DATE, BGN_TIME, TIME_ZONE, COUNTYNAME, STATE, EVTYPE, BGN_AZI,...
## dbl (18): STATE__, COUNTY, BGN_RANGE, COUNTY_END, END_RANGE, LENGTH, WIDTH, ...
## lgl  (1): COUNTYENDN
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Data Processing

# get only relevant columns
df_selected <- df %>%
  select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

# filter out rows with zero damage and zero casualties
df_filtered <- df_selected %>%
  filter((PROPDMG > 0 | CROPDMG > 0 | FATALITIES > 0 | INJURIES > 0))

# Function to convert damage exponents to multipliers
convert_damage <- function(exp) {
  case_when(
    exp %in% c("H", "h") ~ 100,
    exp %in% c("K", "k") ~ 1000,
    exp %in% c("M", "m") ~ 1000000,
    exp %in% c("B", "b") ~ 1000000000,
    exp %in% c("", "0") ~ 1,
    exp %in% c("1") ~ 10,
    exp %in% c("2") ~ 100,
    exp %in% c("3") ~ 1000,
    exp %in% c("4") ~ 10000,
    exp %in% c("5") ~ 100000,
    exp %in% c("6") ~ 1000000,
    exp %in% c("7") ~ 10000000,
    exp %in% c("8") ~ 100000000,
    exp %in% c("9") ~ 1000000000,
    TRUE ~ NA_real_
  )
}

# Apply conversion to damage columns
df_converted <- df_filtered %>%
  mutate(
    PROPDMGEXP = convert_damage(PROPDMGEXP),
    CROPDMGEXP = convert_damage(CROPDMGEXP),
    PROPDMG = PROPDMG * PROPDMGEXP,
    CROPDMG = CROPDMG * CROPDMGEXP
  ) %>%
  select(-PROPDMGEXP, -CROPDMGEXP)

Clean up EVTYPE

# Standardize event types to uppercase and trim whitespace
df_converted <- df_converted %>%
  mutate(EVTYPE = toupper(trimws(EVTYPE)))

# event type check
evtype <- unique(df_converted$EVTYPE) %>% sort()
length(evtype)
## [1] 444
# Group similar event types
library(dplyr)
library(stringr)

# NOAA 48 target labels
noaa48 <- c(
  "Astronomical Low Tide","Avalanche","Blizzard","Coastal Flood","Cold/Wind Chill",
  "Debris Flow","Dense Fog","Dense Smoke","Drought","Dust Devil","Dust Storm",
  "Excessive Heat","Extreme Cold/Wind Chill","Flash Flood","Flood","Frost/Freeze",
  "Funnel Cloud","Freezing Fog","Hail","Heat","Heavy Rain","Heavy Snow","High Surf",
  "High Wind","Hurricane (Typhoon)","Ice Storm","Lake-Effect Snow","Lakeshore Flood",
  "Lightning","Marine Hail","Marine High Wind","Marine Strong Wind",
  "Marine Thunderstorm Wind","Rip Current","Seiche","Sleet","Storm Surge/Tide",
  "Strong Wind","Thunderstorm Wind","Tornado","Tropical Depression","Tropical Storm",
  "Tsunami","Volcanic Ash","Waterspout","Wildfire","Winter Storm","Winter Weather"
)

df_converted <- df_converted %>%
  mutate(
    EVTYPE = toupper(EVTYPE),
    EVTYPE = str_replace_all(EVTYPE, "\\\\", "/"),
    EVTYPE = str_replace_all(EVTYPE, "[[:space:]]+", " "),
    EVTYPE = str_trim(EVTYPE)
  ) %>%

  mutate(EVTYPE_std = case_when(

    str_detect(EVTYPE, "FLOOD|RAPIDLY RISING WATER|URBAN.*SMALL|URBAN/SMALL STREAM|URBAN AND SMALL|URBAN SMALL|URBAN/SML STREAM FLD") ~ "Flood",
    str_detect(EVTYPE, "STORM SURGE|STORM SURGE/TIDE|COASTAL SURGE|COASTAL STORM|COASTALSTORM") ~ "Flood",
    str_detect(EVTYPE, "COASTAL FLOOD|EROSION/CSTL FLOOD|COASTAL FLOODING") ~ "Flood",
    str_detect(EVTYPE, "LAKESHORE FLOOD") ~ "Flood",
    str_detect(EVTYPE, "FLASH FLOOD") ~ "Flood",

    # Lightning
    str_detect(EVTYPE, "LIGHTNING|LIGNTNING|LIGHTING") ~ "Lightning",

    # Tropical & hurricane
    str_detect(EVTYPE, "HURRICANE|TYPHOON") ~ "Hurricane",
    str_detect(EVTYPE, "TROPICAL STORM") ~ "Tropical",
    str_detect(EVTYPE, "TROPICAL DEPRESSION") ~ "Tropical",

    # Tornado/funnel/waterspout
    str_detect(EVTYPE, "TORNADO|TORNDAO") ~ "Tornado",
    str_detect(EVTYPE, "FUNNEL CLOUD|LANDSPOUT") ~ "Funnel Cloud",
    str_detect(EVTYPE, "WATERSPOUT") ~ "Waterspout",

    # Floods

    # Surf / marine waves
    str_detect(EVTYPE, "RIP CURRENT|COASTAL EROSION") ~ "Marine Wave",
    str_detect(EVTYPE, "ASTRONOMICAL LOW TIDE|ASTRONOMICAL HIGH TIDE") ~ "Marine Wave",
    str_detect(EVTYPE, "HIGH SURF|HEAVY SURF|HAZARDOUS SURF|ROUGH SURF|SWELL|HIGH TIDES|HIGH WAVES") ~ "Marine Wave",
    str_detect(EVTYPE, "STORM TIDE") ~ "Marine Wave",  

    # Wind 
    str_detect(EVTYPE, "\\bWIND\\b|HIGH WIND|STRONG WIND|WIND STORM|GUST|STRONG WIND|TSTM|THUNDERSTORM|DOWNBURST|MICROBURST") ~ "Wind",
    
    # Winter
    str_detect(EVTYPE, "BLIZZARD") ~ "Blizzard",
    str_detect(EVTYPE, "WINTER WEATHER|WINTRY MIX") ~ "Winter",
    str_detect(EVTYPE, "WINTER STORM") ~ "Winter",
    str_detect(EVTYPE, "LAKE[- ]?EFFECT SNOW") ~ "Snow",
    str_detect(EVTYPE, "HEAVY SNOW") ~ "Snow",
    str_detect(EVTYPE, "SLEET") ~ "Sleet",
    str_detect(EVTYPE, "ICE STORM") ~ "Ice Storm",

    # Ice/Freeze/Cold
    str_detect(EVTYPE, "FROST/FREEZE|FROST\\\\FREEZE|FROST|FREEZE") ~ "Cold/Freeze",
    str_detect(EVTYPE, "EXTREME COLD|EXTREME WIND ?CHILL") ~ "Cold/Freeze",
    str_detect(EVTYPE, "COLD/WIND ?CHILL|COLD WEATHER|COLD WAVE|LOW TEMPERATURE") ~ "Cold/Freeze",
    str_detect(EVTYPE, "FREEZING DRIZZLE|FREEZING RAIN|FREEZING SPRAY|ICE|ICY ROADS|ICE ON ROAD|ICE ROADS|GLAZE( ICE)?|GLAZE/ICE STORM|HEAVY MIX|MIXED PRECIP") ~ "Cold/Freeze",

    # Heat
    str_detect(EVTYPE, "EXCESSIVE HEAT") ~ "Heat",
    str_detect(EVTYPE, "HEAT WAVE|HEAT") ~ "Heat",

    # Precipitation (rain)
    str_detect(EVTYPE, "HEAVY RAIN|TORRENTIAL RAINFALL|PRECIPITATION|RAINSTORM|RAIN/SNOW|RAIN/WIND|RAIN") ~ "Rain",

    # Dust / Fog / Smoke
    str_detect(EVTYPE, "DUST DEVIL") ~ "Dust",
    str_detect(EVTYPE, "DUST STORM|BLOWING DUST") ~ "Dust",
    str_detect(EVTYPE, "FREEZING FOG") ~ "Fog",
    str_detect(EVTYPE, "\\bFOG\\b|DENSE FOG") ~ "Fog",
    str_detect(EVTYPE, "DENSE SMOKE") ~ "Smoke",

    # Geophysical
    str_detect(EVTYPE, "AVALANCHE|AVALANCE") ~ "Avalanche",
    str_detect(EVTYPE, "DEBRIS FLOW|LANDSLIDE|LANDSLUMP|MUD ?SLIDE|ROCK SLIDE|MUDSLIDE|MUD SLIDES") ~ "Landslide",
    str_detect(EVTYPE, "SEICHE") ~ "Seiche",
    str_detect(EVTYPE, "TSUNAMI") ~ "Tsunami",
    str_detect(EVTYPE, "VOLCANIC ASH|VOLCANIC") ~ "Volcanic",

    # Marine winds/hail/thunderstorm
    str_detect(EVTYPE, "MARINE TSTM WIND|MARINE THUNDERSTORM WIND") ~ "Marine Wind",
    str_detect(EVTYPE, "MARINE STRONG WIND") ~ "Marine Wind",
    str_detect(EVTYPE, "MARINE HIGH WIND") ~ "Marine Wind",
    str_detect(EVTYPE, "MARINE HAIL") ~ "Marine Hail",

    # Drought / Wildfire
    str_detect(EVTYPE, "DROUGHT") ~ "Drought",
    str_detect(EVTYPE, "WILDFIRE|WILD/FOREST FIRE|FOREST FIRE|WILD FIRES|WILDFIRES|BRUSH FIRE|GRASS FIRE") ~ "Fire",

    # Hail
    str_detect(EVTYPE, "\\bHAIL\\b|HAILSTORM|HAIL \\d") ~ "Hail",

    # OTHER 
    TRUE ~ "Other"
  ))

# check again
evtype <- unique(df_converted$EVTYPE_std) %>% sort()
length(evtype)
## [1] 30

Results

Q1. harmful events to population health

# Summarize total fatalities and injuries by event type
health <- df_converted %>%
  group_by(EVTYPE_std) %>%
  summarise(
    total_fatalities = sum(FATALITIES, na.rm = TRUE),
    total_injuries = sum(INJURIES, na.rm = TRUE)
  ) %>%
  ungroup() 

health_long <- health %>%
  pivot_longer(cols = c(total_fatalities, total_injuries), names_to = "type", values_to = "count")

# Combined plot for fatalities and injuries

ggplot(health_long, aes(x = reorder(EVTYPE_std, count), y = count, fill = type)) +
  geom_bar(stat = 'identity', position = 'dodge') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Total Fatalities and Injuries by Event Type", x = "Event Type", y = "Count", fill = "Type")

message('Most harmful events to population health are Tornado, Wind, Heat, Flood, and Lightning.')
## Most harmful events to population health are Tornado, Wind, Heat, Flood, and Lightning.

Q2. Greatest economic consequences

# Summarize total property and crop damage by event type
economic <- df_converted %>%
  group_by(EVTYPE_std) %>%
  summarise(
    total_property_damage = sum(PROPDMG, na.rm = TRUE),
    total_crop_damage = sum(CROPDMG, na.rm = TRUE)
  ) %>%
  ungroup()

economic_long <- economic %>%
  pivot_longer(cols = c(total_property_damage, total_crop_damage), names_to = "type", values_to = "amount")

# Combined plot for property and crop damage
ggplot(economic_long, aes(x = reorder(EVTYPE_std, amount), y = amount, fill = type)) +
  geom_bar(stat = 'identity', position = 'dodge') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Total Property and Crop Damage by Event Type", x = "Event Type", y = "Amount (USD)", fill = "Type")

message('Events with greatest economic consequences are Flood, Hurricane, Tornado, Wind, and Hail.')
## Events with greatest economic consequences are Flood, Hurricane, Tornado, Wind, and Hail.
# Report
message("Most harmful events to population health are Tornado, Wind, Heat, Flood, and Lightning.\n")
## Most harmful events to population health are Tornado, Wind, Heat, Flood, and Lightning.
message("Events with greatest economic consequences are Flood, Hurricane, Tornado, Wind, and Hail.\n")
## Events with greatest economic consequences are Flood, Hurricane, Tornado, Wind, and Hail.