This analysis explores the NOAA Storm Database to determine the most harmful types of severe weather events in the United States from 1950 to 2011. We identify the events that caused the most fatalities and injuries, as well as those with the greatest economic consequences. The dataset is processed directly from the original compressed source and analyzed using R. Results are presented in summary tables and bar plots to assist emergency preparedness planning.
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
file <- "StormData.csv.bz2"
if (!file.exists(file)) {
download.file(url, destfile = file, mode = "wb")
}
storm <- read.csv(file)
library(dplyr)
library(tidyr)
library(ggplot2)
storm$EVTYPE <- toupper(trimws(storm$EVTYPE))
storm$PROPDMGEXP <- toupper(trimws(storm$PROPDMGEXP))
storm$CROPDMGEXP <- toupper(trimws(storm$CROPDMGEXP))
exp_map <- function(e) {
ifelse(e %in% c("K"), 1e3,
ifelse(e %in% c("M"), 1e6,
ifelse(e %in% c("B"), 1e9, 1)))
}
storm$PROPDMGVAL <- storm$PROPDMG * exp_map(storm$PROPDMGEXP)
storm$CROPDMGVAL <- storm$CROPDMG * exp_map(storm$CROPDMGEXP)
storm$TOTALDMG <- storm$PROPDMGVAL + storm$CROPDMGVAL
health <- storm %>%
group_by(EVTYPE) %>%
summarise(Fatalities = sum(FATALITIES, na.rm = TRUE),
Injuries = sum(INJURIES, na.rm = TRUE)) %>%
mutate(Total = Fatalities + Injuries) %>%
arrange(desc(Total)) %>%
slice(1:10)
health_long <- pivot_longer(health, cols = c("Fatalities", "Injuries"),
names_to = "Type", values_to = "Count")
ggplot(health_long, aes(x = reorder(EVTYPE, Count), y = Count, fill = Type)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Top 10 Event Types by Population Health Impact",
x = "Event Type", y = "Number of People Affected") +
theme_minimal()
economic <- storm %>%
group_by(EVTYPE) %>%
summarise(EconomicDamage = sum(TOTALDMG, na.rm = TRUE)) %>%
arrange(desc(EconomicDamage)) %>%
slice(1:10)
ggplot(economic, aes(x = reorder(EVTYPE, EconomicDamage), y = EconomicDamage)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(title = "Top 10 Event Types by Economic Damage",
x = "Event Type", y = "Total Damage (USD)") +
theme_minimal()
health %>%
left_join(economic, by = "EVTYPE") %>%
arrange(desc(Total)) %>%
head(10)
## # A tibble: 10 × 5
## EVTYPE Fatalities Injuries Total EconomicDamage
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979 57352114049.
## 2 EXCESSIVE HEAT 1903 6525 8428 NA
## 3 TSTM WIND 504 6957 7461 NA
## 4 FLOOD 470 6789 7259 150319678257
## 5 LIGHTNING 816 5230 6046 NA
## 6 HEAT 937 2100 3037 NA
## 7 FLASH FLOOD 978 1777 2755 17562179167.
## 8 ICE STORM 89 1975 2064 8967041360
## 9 THUNDERSTORM WIND 133 1488 1621 NA
## 10 WINTER STORM 206 1321 1527 NA