This analysis explores the NOAA Storm Database to identify which
weather event types have the greatest impact on public health and the
economy in the United States between 1950 and 2011. Using variables such
as fatalities, injuries, property damage, and crop damage, we summarize
and visualize the impact of different event types. Data transformations
include converting monetary values to numeric and grouping events.
Tornadoes are found to be the most harmful to population health, while
floods and hurricanes cause the highest economic damage. All data
processing and plotting were performed in R, using packages such as
dplyr, ggplot2, and knitr. The
analysis begins with the raw data file and is fully reproducible.
Figures are limited to three and include appropriate captions. This
report aims to inform stakeholders involved in disaster preparedness.
The final figures illustrate the top harmful and costly weather events.
The code and results are included in this document.
# Load required packages again in case chunk order changes
library(dplyr)
library(readr)
# Load the data
data_file <- "repdata_data_StormData.csv.bz2"
if (!file.exists(data_file)) {
stop("Data file 'repdata_data_StormData.csv.bz2' not found.")
}
storm_data <- read.csv(data_file)
# Function to convert exponent values
convert_exp <- function(e) {
e <- toupper(as.character(e))
if (e == "K") return(1e3)
if (e == "M") return(1e6)
if (e == "B") return(1e9)
return(1) # default case
}
# Clean and process the dataset
storm_df <- storm_data %>%
select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP) %>%
mutate(
PROPDMGEXP = sapply(PROPDMGEXP, convert_exp),
CROPDMGEXP = sapply(CROPDMGEXP, convert_exp),
PROP_DAMAGE = PROPDMG * as.numeric(PROPDMGEXP),
CROP_DAMAGE = CROPDMG * as.numeric(CROPDMGEXP)
)
health_impact <- storm_df %>%
group_by(EVTYPE) %>%
summarise(
Fatalities = sum(FATALITIES, na.rm = TRUE),
Injuries = sum(INJURIES, na.rm = TRUE)
) %>%
mutate(Total_Impact = Fatalities + Injuries) %>%
arrange(desc(Total_Impact)) %>%
slice_head(n = 10)
# Plot
health_impact %>%
pivot_longer(cols = c(Fatalities, Injuries), names_to = "Type", values_to = "Count") %>%
ggplot(aes(x = reorder(EVTYPE, -Count), y = Count, fill = Type)) +
geom_bar(stat = "identity", position = "dodge") +
coord_flip() +
labs(
title = "Top 10 Weather Events Impacting Health",
x = "Event Type",
y = "Number of People"
) +
theme_minimal()
economic_impact <- storm_df %>%
group_by(EVTYPE) %>%
summarise(Total_Damage = sum(PROP_DAMAGE + CROP_DAMAGE, na.rm = TRUE)) %>%
arrange(desc(Total_Damage)) %>%
slice_head(n = 10)
# Plot
ggplot(economic_impact, aes(x = reorder(EVTYPE, Total_Damage), y = Total_Damage)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(
title = "Top 10 Weather Events by Economic Damage",
x = "Event Type",
y = "Total Economic Damage (USD)"
) +
theme_minimal()
combined_impact <- health_impact %>%
select(EVTYPE, Total_Impact) %>%
inner_join(economic_impact, by = "EVTYPE")
# Plot
ggplot(combined_impact, aes(x = Total_Impact, y = Total_Damage, label = EVTYPE)) +
geom_point(color = "steelblue", size = 3) +
geom_text(hjust = 0, vjust = 0, size = 3, nudge_x = 0.05 * max(combined_impact$Total_Impact)) +
labs(
title = "Health vs. Economic Impact of Top Weather Events",
x = "Total Health Impact (Fatalities + Injuries)",
y = "Total Economic Damage (USD)"
) +
theme_minimal()