This report analyzes the weather events from 1950 to 2011 in the US.

It’s based on the Storm Data dataset, which you can find here.

If further information is needed consider the Storm Data Documentation.

Data Processing

# Load data
storm_data <- read.csv("repdata_data_StormData.csv.bz2")
# Load all libraries
library(ggplot2)
library(dplyr)
library(scales)

Process data

# Aggregating the amount of fatalities and injuries by weather event

fatalities_by_event <-aggregate(FATALITIES~EVTYPE, data=storm_data, FUN=sum, na.rm=TRUE)
damage_population_by_event <- fatalities_by_event[order(-fatalities_by_event$FATALITIES), ]
injuries_by_event <-aggregate(INJURIES~EVTYPE, data=storm_data, FUN=sum, na.rm=TRUE)

damage_population_by_event <- merge(damage_population_by_event, injuries_by_event, by="EVTYPE", all.x=TRUE)
damage_population_by_event <- damage_population_by_event[order(-damage_population_by_event$FATALITIES), ]
# Summarizing all economic costs for each event type

costs_by_event <- storm_data %>%
  mutate(total = PROPDMG + CROPDMG ) %>%
  group_by(EVTYPE) %>%
  summarise(sum_total = sum(total, na.rm = TRUE))

Results

Across the United States, which types of events (as indicated in the EVTYPE variable)

are most harmful with respect to population health?

# Top 10
damage_population_event_top10 <- damage_population_by_event[order(-damage_population_by_event$FATALITIES ), ][1:10, ]

# Rescale factor to match FATALITIES and INJURIES scales
# Example: 1 fatality = ~15 injuries
scale_factor <- 15

# Prepare the data
event_plot <- damage_population_event_top10 %>%
  mutate(INJURIES_scaled = INJURIES / scale_factor)

# Create the plot
ggplot(event_plot, aes(x = reorder(EVTYPE, -FATALITIES))) +
  geom_bar(aes(y = FATALITIES, fill = "Fatalities"), stat = "identity", alpha = 0.6) +
  geom_line(aes(y = INJURIES_scaled, group = 1, color = "Injuries"), linewidth = 1.2) +
  geom_point(aes(y = INJURIES_scaled, color = "Injuries"), size = 2) +
  # Customize colors for Fatalities (red) and Injuries (blue) in the legend
  scale_fill_manual(values = c("Fatalities" = "red")) +
  scale_color_manual(values = c("Injuries" = "blue")) +
  scale_y_continuous(
    name = "Fatalities",
    labels = comma,
    sec.axis = sec_axis(~ . * scale_factor, name = "Injuries", labels = comma)
  ) +
  labs(title = "Event Impact: Fatalities vs. Injuries",
       x = "Event Type") +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.title = element_blank() # Remove legend title for cleaner look
  )

Across the United States, which types of events have the greatest economic consequences?

# Top 10
costs_by_event_top10 <- costs_by_event[order(-costs_by_event$sum_total), ][1:10, ]

# Create the bar plot
ggplot(costs_by_event_top10, aes(x = reorder(EVTYPE, -sum_total), y = sum_total)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Top 10 Events by Economic Damage",
       x = "Event Type",
       y = "Total Cost") +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_y_continuous(labels = comma)