Synopsis

This report analyzes the NOAA Storm Database to identify which weather events are most harmful to population health and which cause the greatest economic damage. Tornadoes are the most harmful to human health, while floods and storms cause the highest economic losses.

Data Processing

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# IMPORTANT FIX: correct file name (no folder path)
data <- read.csv("repdata_data_StormData (1).csv.bz2")

# Select required columns
data2 <- data %>%
  select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

# Convert damage multipliers
convert_exp <- function(exp) {
  if (exp == "K") return(1e3)
  if (exp == "M") return(1e6)
  if (exp == "B") return(1e9)
  return(1)
}

data2$PROPDMGEXP <- sapply(data2$PROPDMGEXP, convert_exp)
data2$CROPDMGEXP <- sapply(data2$CROPDMGEXP, convert_exp)

# Calculate damage
data2$prop_damage <- data2$PROPDMG * data2$PROPDMGEXP
data2$crop_damage <- data2$CROPDMG * data2$CROPDMGEXP

health <- data2 %>%
  group_by(EVTYPE) %>%
  summarise(total = sum(FATALITIES + INJURIES)) %>%
  arrange(desc(total)) %>%
  head(10)
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(health, aes(x=reorder(EVTYPE, total), y=total)) +
  geom_bar(stat="identity") +
  coord_flip() +
  labs(title="Top 10 Harmful Events (Health Impact)", x="Event Type", y="Total Harm")

economic <- data2 %>%
  group_by(EVTYPE) %>%
  summarise(total = sum(prop_damage + crop_damage)) %>%
  arrange(desc(total)) %>%
  head(10)
## `summarise()` ungrouping output (override with `.groups` argument)
ggplot(economic, aes(x=reorder(EVTYPE, total), y=total)) +
  geom_bar(stat="identity") +
  coord_flip() +
  labs(title="Top 10 Economic Damage Events", x="Event Type", y="Damage")