This report analyzes the impact of severe weather events on public health and the economy using the NOAA Storm Database. We examine which types of events cause the most fatalities and injuries, and which lead to the greatest property and crop damage. Our findings indicate that Tornadoes are the leading threat to public health, while Floods result in the highest economic losses.
library(dplyr)
library(ggplot2)
# STEP 1: AUTO-FINDER (This solves the "File Not Found" error)
# This looks for any file containing "StormData" or "data" and ending in .bz2 or .csv
all_files <- list.files()
target_file <- all_files[grep("data.*(csv|bz2)", all_files, ignore.case = TRUE)][1]
if (is.na(target_file)) {
# If it still fails, print the folder contents to the screen to see what's wrong
stop(paste("STILL NOT FOUND. R is looking in:", getwd(),
". Files actually in this folder are:", paste(all_files, collapse=", ")))
}
print(paste("Found file:", target_file))
## [1] "Found file: data.csv.bz2.bz2"
storm_data <- read.csv(target_file)
# STEP 2: Define the conversion function
get_multiplier <- function(exp) {
exp <- toupper(as.character(exp))
if (exp == 'K') return(1000)
if (exp == 'M') return(1000000)
if (exp == 'B') return(1000000000)
return(1)
}
# STEP 3: Apply transformations
storm_data$prop_mult <- sapply(storm_data$PROPDMGEXP, get_multiplier)
storm_data$crop_mult <- sapply(storm_data$CROPDMGEXP, get_multiplier)
storm_data <- storm_data %>%
mutate(Prop_Damage_Total = PROPDMG * prop_mult,
Crop_Damage_Total = CROPDMG * crop_mult,
Total_Economic_Damage = Prop_Damage_Total + Crop_Damage_Total,
Total_Health_Impact = FATALITIES + INJURIES)
health_results <- storm_data %>%
group_by(EVTYPE) %>%
summarise(Total_Impact = sum(Total_Health_Impact)) %>%
arrange(desc(Total_Impact)) %>%
head(10)
ggplot(health_results, aes(x = reorder(EVTYPE, -Total_Impact), y = Total_Impact)) +
geom_bar(stat = "identity", fill = "indianred") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Top 10 Events by Population Health Impact",
x = "Event Type", y = "Fatalities + Injuries")
econ_results <- storm_data %>%
group_by(EVTYPE) %>%
summarise(Total_Damage_Billions = sum(Total_Economic_Damage) / 1e9) %>%
arrange(desc(Total_Damage_Billions)) %>%
head(10)
ggplot(econ_results, aes(x = reorder(EVTYPE, -Total_Damage_Billions), y = Total_Damage_Billions)) +
geom_bar(stat = "identity", fill = "steelblue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(title = "Top 10 Events by Economic Impact",
x = "Event Type", y = "Total Damage (Billions USD)")