Synopsis

The goal of this analysis is to identify the most harmful weather events in terms of population health and economic damage using the NOAA Storm Database. Tornadoes were found to cause the most fatalities and injuries, while floods caused the greatest economic losses.

Data Processing

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# Load data
storm_data <- read.csv("/Users/valel/Downloads/repdata_data_StormData.csv.bz2")

# Data cleaning and transformation
convert_exp <- function(exp) {
  if (exp %in% c("K", "k")) return(1000)
  if (exp %in% c("M", "m")) return(1e6)
  if (exp %in% c("B", "b")) return(1e9)
  return(1)
}

storm_data <- storm_data %>%
  mutate(
    PROPDMG_MULTIPLIER = sapply(PROPDMGEXP, convert_exp),
    CROPDMG_MULTIPLIER = sapply(CROPDMGEXP, convert_exp),
    TOTAL_PROPDMG = PROPDMG * PROPDMG_MULTIPLIER,
    TOTAL_CROPDMG = CROPDMG * CROPDMG_MULTIPLIER,
    TOTAL_DAMAGE = TOTAL_PROPDMG + TOTAL_CROPDMG
  )

Results

Population Health Impact

# Health impact analysis
health_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(
    Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
    Total_Injuries = sum(INJURIES, na.rm = TRUE)
  ) %>%
  arrange(desc(Total_Fatalities), desc(Total_Injuries))

head(health_impact, 10)
## # A tibble: 10 × 3
##    EVTYPE         Total_Fatalities Total_Injuries
##    <chr>                     <dbl>          <dbl>
##  1 TORNADO                    5633          91346
##  2 EXCESSIVE HEAT             1903           6525
##  3 FLASH FLOOD                 978           1777
##  4 HEAT                        937           2100
##  5 LIGHTNING                   816           5230
##  6 TSTM WIND                   504           6957
##  7 FLOOD                       470           6789
##  8 RIP CURRENT                 368            232
##  9 HIGH WIND                   248           1137
## 10 AVALANCHE                   224            170
# Visualization
health_plot <- health_impact %>%
  slice_max(Total_Fatalities, n = 10) %>%
  ggplot(aes(x = reorder(EVTYPE, -Total_Fatalities), y = Total_Fatalities)) +
  geom_bar(stat = "identity", fill = "red") +
  labs(title = "Top 10 Most Harmful Events (Fatalities)",
       x = "Event Type", y = "Total Fatalities") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

print(health_plot)

Economic Damage

# Economic damage analysis
economic_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(
    Total_Economic_Damage = sum(TOTAL_DAMAGE, na.rm = TRUE)
  ) %>%
  arrange(desc(Total_Economic_Damage))

head(economic_impact, 10)
## # A tibble: 10 × 2
##    EVTYPE            Total_Economic_Damage
##    <chr>                             <dbl>
##  1 FLOOD                     150319678257 
##  2 HURRICANE/TYPHOON          71913712800 
##  3 TORNADO                    57352114049.
##  4 STORM SURGE                43323541000 
##  5 HAIL                       18758221521.
##  6 FLASH FLOOD                17562129167.
##  7 DROUGHT                    15018672000 
##  8 HURRICANE                  14610229010 
##  9 RIVER FLOOD                10148404500 
## 10 ICE STORM                   8967041360
# Visualization
economic_plot <- economic_impact %>%
  slice_max(Total_Economic_Damage, n = 10) %>%
  ggplot(aes(x = reorder(EVTYPE, -Total_Economic_Damage), y = Total_Economic_Damage / 1e9)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(title = "Top 10 Events with Greatest Economic Damage",
       x = "Event Type", y = "Total Economic Damage (in Billions)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

print(economic_plot)