This report analyzes the NOAA Storm Database to determine which severe weather events are most harmful to population health and which cause the greatest economic consequences in the United States from 1950 to 2011. Data was processed directly from the raw CSV file, cleaned, and aggregated by event type. Analysis shows tornadoes cause the most harm to human health, while floods cause the highest economic damage. The methodology is fully reproducible, and results are visualized through bar plots.
library(dplyr)
library(ggplot2)
library(tidyr)
# Check available files
list.files()
## [1] "activity.csv" "activity.zip" "storm_cache"
## [4] "storm.Rmd" "StormData.csv.bz2"
# Download data if not present (for sandbox)
if (!file.exists("StormData.csv.bz2")) {
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
"StormData.csv.bz2", mode = "wb")
}
# Load the compressed data directly
storm <- read.csv("StormData.csv.bz2", stringsAsFactors = FALSE)
# Quick summaries
summary(storm$FATALITIES)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.0168 0.0000 583.0000
summary(storm$INJURIES)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.1557 0.0000 1700.0000
# Look at event types
head(unique(storm$EVTYPE), 20)
## [1] "TORNADO" "TSTM WIND"
## [3] "HAIL" "FREEZING RAIN"
## [5] "SNOW" "ICE STORM/FLASH FLOOD"
## [7] "SNOW/ICE" "WINTER STORM"
## [9] "HURRICANE OPAL/HIGH WINDS" "THUNDERSTORM WINDS"
## [11] "RECORD COLD" "HURRICANE ERIN"
## [13] "HURRICANE OPAL" "HEAVY RAIN"
## [15] "LIGHTNING" "THUNDERSTORM WIND"
## [17] "DENSE FOG" "RIP CURRENT"
## [19] "THUNDERSTORM WINS" "FLASH FLOOD"
health_impact <- storm %>%
group_by(EVTYPE) %>%
summarise(FATALITIES = sum(FATALITIES, na.rm = TRUE),
INJURIES = sum(INJURIES, na.rm = TRUE),
TOTAL_HARM = FATALITIES + INJURIES) %>%
arrange(desc(TOTAL_HARM))
top_health <- head(health_impact, 10)
# Function to convert damage values
convert_damage <- function(damage, exp) {
exp <- toupper(exp)
multiplier <- case_when(
exp == "K" ~ 1000,
exp == "M" ~ 1e6,
exp == "B" ~ 1e9,
exp == "H" ~ 100,
exp == "" ~ 1,
TRUE ~ 1
)
return(damage * multiplier)
}
economic_impact <- storm %>%
mutate(PROPDMG_VAL = convert_damage(PROPDMG, PROPDMGEXP),
CROPDMG_VAL = convert_damage(CROPDMG, CROPDMGEXP),
TOTAL_DAMAGE = PROPDMG_VAL + CROPDMG_VAL) %>%
group_by(EVTYPE) %>%
summarise(TOTAL_ECONOMIC = sum(TOTAL_DAMAGE, na.rm = TRUE)) %>%
arrange(desc(TOTAL_ECONOMIC))
top_economic <- head(economic_impact, 10)
top_health
## # A tibble: 10 x 4
## EVTYPE FATALITIES INJURIES TOTAL_HARM
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
ggplot(top_health, aes(x = reorder(EVTYPE, -TOTAL_HARM), y = TOTAL_HARM)) +
geom_bar(stat = "identity", fill = "firebrick") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "Event Type", y = "Total Harm",
title = "Top 10 Most Harmful Weather Events to Population Health")
top_economic
## # A tibble: 10 x 2
## EVTYPE TOTAL_ECONOMIC
## <chr> <dbl>
## 1 FLOOD 150319678257
## 2 HURRICANE/TYPHOON 71913712800
## 3 TORNADO 57352114049.
## 4 STORM SURGE 43323541000
## 5 HAIL 18758222016.
## 6 FLASH FLOOD 17562129167.
## 7 DROUGHT 15018672000
## 8 HURRICANE 14610229010
## 9 RIVER FLOOD 10148404500
## 10 ICE STORM 8967041360
ggplot(top_economic, aes(x = reorder(EVTYPE, -TOTAL_ECONOMIC), y = TOTAL_ECONOMIC / 1e9)) +
geom_bar(stat = "identity", fill = "steelblue") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "Event Type", y = "Total Damage (Billions USD)",
title = "Top 10 Weather Events by Economic Damage")