Synopsis

This analysis explores the NOAA Storm Database to determine the most harmful types of severe weather events in the United States from 1950 to 2011. We identify the events that caused the most fatalities and injuries, as well as those with the greatest economic consequences. The dataset is processed directly from the original compressed source and analyzed using R. Results are presented in summary tables and bar plots to assist emergency preparedness planning.

Data Processing

url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
file <- "StormData.csv.bz2"
if (!file.exists(file)) {
  download.file(url, destfile = file, mode = "wb")
}
storm <- read.csv(file)
library(dplyr)
library(tidyr)
library(ggplot2)

storm$EVTYPE <- toupper(trimws(storm$EVTYPE))
storm$PROPDMGEXP <- toupper(trimws(storm$PROPDMGEXP))
storm$CROPDMGEXP <- toupper(trimws(storm$CROPDMGEXP))

exp_map <- function(e) {
  ifelse(e %in% c("K"), 1e3,
  ifelse(e %in% c("M"), 1e6,
  ifelse(e %in% c("B"), 1e9, 1)))
}

storm$PROPDMGVAL <- storm$PROPDMG * exp_map(storm$PROPDMGEXP)
storm$CROPDMGVAL <- storm$CROPDMG * exp_map(storm$CROPDMGEXP)
storm$TOTALDMG <- storm$PROPDMGVAL + storm$CROPDMGVAL

Results

Events Most Harmful to Population Health

health <- storm %>%
  group_by(EVTYPE) %>%
  summarise(Fatalities = sum(FATALITIES, na.rm = TRUE),
            Injuries = sum(INJURIES, na.rm = TRUE)) %>%
  mutate(Total = Fatalities + Injuries) %>%
  arrange(desc(Total)) %>%
  slice(1:10)

health_long <- pivot_longer(health, cols = c("Fatalities", "Injuries"),
                            names_to = "Type", values_to = "Count")

ggplot(health_long, aes(x = reorder(EVTYPE, Count), y = Count, fill = Type)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Top 10 Event Types by Population Health Impact",
       x = "Event Type", y = "Number of People Affected") +
  theme_minimal()

Events with Greatest Economic Consequences

economic <- storm %>%
  group_by(EVTYPE) %>%
  summarise(EconomicDamage = sum(TOTALDMG, na.rm = TRUE)) %>%
  arrange(desc(EconomicDamage)) %>%
  slice(1:10)

ggplot(economic, aes(x = reorder(EVTYPE, EconomicDamage), y = EconomicDamage)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  coord_flip() +
  labs(title = "Top 10 Event Types by Economic Damage",
       x = "Event Type", y = "Total Damage (USD)") +
  theme_minimal()

Summary Table

health %>%
  left_join(economic, by = "EVTYPE") %>%
  arrange(desc(Total)) %>%
  head(10)
## # A tibble: 10 × 5
##    EVTYPE            Fatalities Injuries Total EconomicDamage
##    <chr>                  <dbl>    <dbl> <dbl>          <dbl>
##  1 TORNADO                 5633    91346 96979   57352114049.
##  2 EXCESSIVE HEAT          1903     6525  8428            NA 
##  3 TSTM WIND                504     6957  7461            NA 
##  4 FLOOD                    470     6789  7259  150319678257 
##  5 LIGHTNING                816     5230  6046            NA 
##  6 HEAT                     937     2100  3037            NA 
##  7 FLASH FLOOD              978     1777  2755   17562179167.
##  8 ICE STORM                 89     1975  2064    8967041360 
##  9 THUNDERSTORM WIND        133     1488  1621            NA 
## 10 WINTER STORM             206     1321  1527            NA