This analysis examines the NOAA Storm Database to identify which weather events are most harmful to population health and have the greatest economic consequences in the United States. The data spans from 1950 to 2011, with more complete records in recent years. After downloading and processing the raw data, we found that tornadoes cause the most injuries and fatalities, while floods and hurricanes/typhoons result in the highest economic damages. The analysis uses R for data processing, aggregation, and visualization, with results presented through tables and plots to inform resource allocation decisions for severe weather preparedness.
# Load libraries
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.5.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(data.table)
## Warning: package 'data.table' was built under R version 4.5.3
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.5.3
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
# Set URL for the data file
data_url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
# Download the file
if(!file.exists("StormData.csv.bz2")) {
download.file(data_url, destfile = "StormData.csv.bz2", method = "curl")
}
# Read the compressed CSV file
storm_data <- read.csv("StormData.csv.bz2", stringsAsFactors = FALSE)
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string
# Check the structure
cat("Dataset dimensions:", dim(storm_data), "\n")
## Dataset dimensions: 749555 37
cat("Number of columns:", ncol(storm_data), "\n")
## Number of columns: 37
cat("Number of rows:", nrow(storm_data), "\n")
## Number of rows: 749555
# Standardize event types
storm_data$EVTYPE <- toupper(trimws(storm_data$EVTYPE))
# Function to convert damage values with exponents
convert_damage <- function(value, exponent) {
if (is.na(value) | is.na(exponent)) return(0)
exponent <- toupper(as.character(exponent))
if (exponent %in% c("", "?", "-", "+", "0")) {
multiplier <- 1
} else if (exponent == "H") {
multiplier <- 100
} else if (exponent == "K") {
multiplier <- 1000
} else if (exponent == "M") {
multiplier <- 1000000
} else if (exponent == "B") {
multiplier <- 1000000000
} else if (exponent %in% c("2", "3", "4", "5", "6", "7", "8")) {
multiplier <- 10^as.numeric(exponent)
} else {
multiplier <- 1
}
value * multiplier
}
# Apply conversion to property damage
storm_data$PROPDMG_NUM <- mapply(convert_damage,
storm_data$PROPDMG,
storm_data$PROPDMGEXP)
# Apply conversion to crop damage
storm_data$CROPDMG_NUM <- mapply(convert_damage,
storm_data$CROPDMG,
storm_data$CROPDMGEXP)
# Calculate totals
storm_data$TOTAL_ECONOMIC_DMG <- storm_data$PROPDMG_NUM + storm_data$CROPDMG_NUM
storm_data$TOTAL_HEALTH_IMPACT <- storm_data$FATALITIES + storm_data$INJURIES
cat("Data cleaning completed.\n")
## Data cleaning completed.
### Results
``` r
# Group by event type for health impacts
health_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarize(
total_fatalities = sum(FATALITIES, na.rm = TRUE),
total_injuries = sum(INJURIES, na.rm = TRUE),
total_health_impact = sum(TOTAL_HEALTH_IMPACT, na.rm = TRUE),
event_count = n()
) %>%
arrange(desc(total_health_impact))
# Get top 10 events
top_health_events <- head(health_impact, 10)
# Display table
cat("Top 10 Events by Health Impact:\n")
## Top 10 Events by Health Impact:
print(top_health_events[, c("EVTYPE", "total_fatalities", "total_injuries", "total_health_impact")])
## # A tibble: 10 × 4
## EVTYPE total_fatalities total_injuries total_health_impact
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 4989 84141 89130
## 2 EXCESSIVE HEAT 1821 6312 8133
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 350 6642 6992
## 5 LIGHTNING 727 4657 5384
## 6 FLASH FLOOD 812 1543 2355
## 7 HEAT 804 1436 2240
## 8 ICE STORM 89 1974 2063
## 9 WINTER STORM 198 1315 1513
## 10 HURRICANE/TYPHOON 64 1275 1339
# Prepare for plotting
plot_health_data <- top_health_events %>%
pivot_longer(cols = c(total_fatalities, total_injuries),
names_to = "impact_type",
values_to = "count")
# Create plot
health_plot <- ggplot(plot_health_data,
aes(x = reorder(EVTYPE, -count),
y = count,
fill = impact_type)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
labs(
title = "Top 10 Weather Events by Population Health Impact",
x = "Event Type",
y = "Number of People Affected",
fill = "Impact Type"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("total_fatalities" = "#E74C3C",
"total_injuries" = "#F39C12"),
labels = c("Fatalities", "Injuries"))
print(health_plot)
# Group by event type for economic impacts
economic_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarize(
total_property_dmg = sum(PROPDMG_NUM, na.rm = TRUE),
total_crop_dmg = sum(CROPDMG_NUM, na.rm = TRUE),
total_economic_dmg = sum(TOTAL_ECONOMIC_DMG, na.rm = TRUE),
event_count = n()
) %>%
arrange(desc(total_economic_dmg))
# Get top 10 events
top_economic_events <- head(economic_impact, 10)
# Convert to billions for readability
top_economic_events$total_economic_dmg_billions <-
top_economic_events$total_economic_dmg / 1000000000
# Display table
cat("\nTop 10 Events by Economic Impact (in Billions USD):\n")
##
## Top 10 Events by Economic Impact (in Billions USD):
print(top_economic_events[, c("EVTYPE", "total_property_dmg", "total_crop_dmg", "total_economic_dmg_billions")])
## # A tibble: 10 × 4
## EVTYPE total_property_dmg total_crop_dmg total_economic_dmg_bill…¹
## <chr> <dbl> <dbl> <dbl>
## 1 FLOOD 133303922657 4372682450 138.
## 2 HURRICANE/TYPHOON 69305840000 2607872800 71.9
## 3 TORNADO 45448334926. 337521270 45.8
## 4 STORM SURGE 43323536000 5000 43.3
## 5 FLASH FLOOD 14159071408. 1171142100 15.3
## 6 DROUGHT 1045942000 13921797000 15.0
## 7 HURRICANE 11857799010 2731400000 14.6
## 8 HAIL 10469407963. 2490646473 13.0
## 9 RIVER FLOOD 5118945500 5029459000 10.1
## 10 ICE STORM 3929835560 5022033500 8.95
## # ℹ abbreviated name: ¹​total_economic_dmg_billions
# Prepare for plotting
plot_economic_data <- top_economic_events %>%
pivot_longer(cols = c(total_property_dmg, total_crop_dmg),
names_to = "damage_type",
values_to = "damage_amount")
# Convert to billions
plot_economic_data$damage_amount_billions <-
plot_economic_data$damage_amount / 1000000000
# Create plot
economic_plot <- ggplot(plot_economic_data,
aes(x = reorder(EVTYPE, -damage_amount_billions),
y = damage_amount_billions,
fill = damage_type)) +
geom_bar(stat = "identity", position = "stack") +
theme_minimal() +
labs(
title = "Top 10 Weather Events by Economic Impact",
x = "Event Type",
y = "Total Damage (Billions USD)",
fill = "Damage Type"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) +
scale_fill_manual(values = c("total_property_dmg" = "#3498DB",
"total_crop_dmg" = "#27AE60"),
labels = c("Property Damage", "Crop Damage"))
print(economic_plot)
# Create panel plot
health_plot_panel <- health_plot +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
ggtitle("A) Population Health Impact")
economic_plot_panel <- economic_plot +
theme(legend.position = "bottom",
axis.text.x = element_text(angle = 45, hjust = 1, size = 8)) +
ggtitle("B) Economic Impact")
# Arrange plots side by side
library(gridExtra)
panel_plot <- grid.arrange(health_plot_panel, economic_plot_panel, ncol = 2)
# Display panel plot
print(panel_plot)
## TableGrob (1 x 2) "arrange": 2 grobs
## z cells name grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
# Summary statistics
cat("\n=== SUMMARY ===\n")
##
## === SUMMARY ===
cat("1. Most harmful events to population health:\n")
## 1. Most harmful events to population health:
cat(" -", top_health_events$EVTYPE[1], "caused", top_health_events$total_health_impact[1], "total health impacts\n")
## - TORNADO caused 89130 total health impacts
cat(" -", top_health_events$EVTYPE[2], "caused", top_health_events$total_health_impact[2], "total health impacts\n")
## - EXCESSIVE HEAT caused 8133 total health impacts
cat("\n2. Events with greatest economic consequences:\n")
##
## 2. Events with greatest economic consequences:
cat(" -", top_economic_events$EVTYPE[1], "caused $",
round(top_economic_events$total_economic_dmg_billions[1], 2), "billion in damages\n")
## - FLOOD caused $ 137.68 billion in damages
cat(" -", top_economic_events$EVTYPE[2], "caused $",
round(top_economic_events$total_economic_dmg_billions[2], 2), "billion in damages\n")
## - HURRICANE/TYPHOON caused $ 71.91 billion in damages
After the last code chunk, press Enter twice and type:
```markdown ### Conclusion
This analysis of the NOAA Storm Database reveals clear patterns in weather-related impacts across the United States. Tornadoes emerge as the most harmful event type for population health, causing the highest combined fatalities and injuries. For economic consequences, floods and hurricanes/typhoons result in the greatest damages, with property damage constituting the majority of economic losses. These findings can inform prioritization of resources for severe weather preparedness, suggesting that tornado warning systems and flood mitigation infrastructure should receive significant attention from government and municipal managers responsible for public safety and economic stability.