This report analyzes the weather events from 1950 to 2011 in the
US.
It’s based on the Storm Data dataset, which you can find here.
Data Processing
# Load data
storm_data <- read.csv("repdata_data_StormData.csv.bz2")
# Load all libraries
library(ggplot2)
library(dplyr)
library(scales)
Process data
# Aggregating the amount of fatalities and injuries by weather event
fatalities_by_event <-aggregate(FATALITIES~EVTYPE, data=storm_data, FUN=sum, na.rm=TRUE)
damage_population_by_event <- fatalities_by_event[order(-fatalities_by_event$FATALITIES), ]
injuries_by_event <-aggregate(INJURIES~EVTYPE, data=storm_data, FUN=sum, na.rm=TRUE)
damage_population_by_event <- merge(damage_population_by_event, injuries_by_event, by="EVTYPE", all.x=TRUE)
damage_population_by_event <- damage_population_by_event[order(-damage_population_by_event$FATALITIES), ]
# Summarizing all economic costs for each event type
costs_by_event <- storm_data %>%
mutate(total = PROPDMG + CROPDMG ) %>%
group_by(EVTYPE) %>%
summarise(sum_total = sum(total, na.rm = TRUE))
Results
Across the United States, which types of events (as indicated in the
EVTYPE variable)
are most harmful with respect to population health?
# Top 10
damage_population_event_top10 <- damage_population_by_event[order(-damage_population_by_event$FATALITIES ), ][1:10, ]
# Rescale factor to match FATALITIES and INJURIES scales
# Example: 1 fatality = ~15 injuries
scale_factor <- 15
# Prepare the data
event_plot <- damage_population_event_top10 %>%
mutate(INJURIES_scaled = INJURIES / scale_factor)
# Create the plot
ggplot(event_plot, aes(x = reorder(EVTYPE, -FATALITIES))) +
geom_bar(aes(y = FATALITIES, fill = "Fatalities"), stat = "identity", alpha = 0.6) +
geom_line(aes(y = INJURIES_scaled, group = 1, color = "Injuries"), linewidth = 1.2) +
geom_point(aes(y = INJURIES_scaled, color = "Injuries"), size = 2) +
# Customize colors for Fatalities (red) and Injuries (blue) in the legend
scale_fill_manual(values = c("Fatalities" = "red")) +
scale_color_manual(values = c("Injuries" = "blue")) +
scale_y_continuous(
name = "Fatalities",
labels = comma,
sec.axis = sec_axis(~ . * scale_factor, name = "Injuries", labels = comma)
) +
labs(title = "Event Impact: Fatalities vs. Injuries",
x = "Event Type") +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_blank() # Remove legend title for cleaner look
)

Across the United States, which types of events have the greatest
economic consequences?
# Top 10
costs_by_event_top10 <- costs_by_event[order(-costs_by_event$sum_total), ][1:10, ]
# Create the bar plot
ggplot(costs_by_event_top10, aes(x = reorder(EVTYPE, -sum_total), y = sum_total)) +
geom_bar(stat = "identity", fill = "skyblue") +
labs(title = "Top 10 Events by Economic Damage",
x = "Event Type",
y = "Total Cost") +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_y_continuous(labels = comma)
