This analysis explores the NOAA Storm Database, focusing on the impact of different types of severe weather events in the United States. The analysis aims to identify the types of events that are most harmful to population health and those with the greatest economic consequences. The data spans from 1950 to November 2011, with more complete records in recent years.
# Add neccessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Download and unzip the data file if not already done
if (!file.exists("stormdata.csv.bz2")) {
download.file("https://d396qusza40orc.cloudfront.net/repdata/data/StormData.csv.bz2",
destfile = "stormdata.csv.bz2")
}
# Read the data
storm_data <- read.csv("stormdata.csv.bz2", stringsAsFactors = FALSE)
# Convert necessary columns to appropriate types
storm_data$BGN_DATE <- as.Date(storm_data$BGN_DATE, format = "%m/%d/%Y %H:%M:%S")
# Subset data for analysis
selected_data <- storm_data %>%
select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
# Convert damage exponents to numeric multipliers
convert_exponent <- function(exp) {
if (exp %in% c("B", "b")) return(1e9)
if (exp %in% c("M", "m")) return(1e6)
if (exp %in% c("K", "k")) return(1e3)
if (exp %in% c("H", "h")) return(1e2)
if (exp %in% c("0":"9")) return(10^as.numeric(exp))
return(1)
}
selected_data <- selected_data %>%
mutate(
PROPDMGEXP = sapply(PROPDMGEXP, convert_exponent),
CROPDMGEXP = sapply(CROPDMGEXP, convert_exponent),
PROPDMG = PROPDMG * PROPDMGEXP,
CROPDMG = CROPDMG * CROPDMGEXP
) %>%
select(-PROPDMGEXP, -CROPDMGEXP)
# Calculate total fatalities and injuries by event type
health_impact <- selected_data %>%
group_by(EVTYPE) %>%
summarize(
total_fatalities = sum(FATALITIES, na.rm = TRUE),
total_injuries = sum(INJURIES, na.rm = TRUE)
) %>%
arrange(desc(total_fatalities), desc(total_injuries))
# Top 10 events with the highest number of fatalities
top_fatalities <- health_impact %>% top_n(10, wt = total_fatalities)
# Top 10 events with the highest number of injuries
top_injuries <- health_impact %>% top_n(10, wt = total_injuries)
# Plot
ggplot(top_fatalities, aes(x = reorder(EVTYPE, total_fatalities), y = total_fatalities)) +
geom_bar(stat = "identity", fill = "darkblue") +
coord_flip() +
labs(title = "Top 10 Weather Events Causing Fatalities", x = "Weather Event", y = "Total Fatalities") +
theme_classic()+
theme(plot.title = element_text(hjust = 0.5))
# Calculate total property and crop damage by event type
economic_impact <- selected_data %>%
group_by(EVTYPE) %>%
summarize(
total_prop_dmg = sum(PROPDMG, na.rm = TRUE) / 1e9, # Convert to billions
total_crop_dmg = sum(CROPDMG, na.rm = TRUE) / 1e9, # Convert to billions
total_economic_loss = (sum(PROPDMG, na.rm = TRUE) + sum(CROPDMG, na.rm = TRUE)) / 1e9 # Convert to billions
) %>%
arrange(desc(total_economic_loss))
# Top 10 events with greatest economic consequences
top_economic_loss <- economic_impact %>% top_n(10, wt = total_economic_loss)
# Plot
ggplot(top_economic_loss, aes(x = reorder(EVTYPE, total_economic_loss), y = total_economic_loss)) +
geom_bar(stat = "identity", fill = "darkred") +
coord_flip() +
labs(title = "Top 10 Weather Events Causing Economic Loss", x = "Weather Event", y = "Total Economic Loss (in USD Billions)") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5))
# Summary of findings
top_fatality_event <- top_fatalities$EVTYPE[1]
top_fatality_number <- top_fatalities$total_fatalities[1]
top_injury_event <- top_injuries$EVTYPE[1]
top_injury_number <- top_injuries$total_injuries[1]
top_economic_loss_event <- top_economic_loss$EVTYPE[1]
top_economic_loss_value <- top_economic_loss$total_economic_loss[1]
cat("The most harmful weather event to population health in terms of fatalities is", top_fatality_event, "with", top_fatality_number, "fatalities.\n")
## The most harmful weather event to population health in terms of fatalities is TORNADO with 5633 fatalities.
cat("The most harmful weather event to population health in terms of injuries is", top_injury_event, "with", top_injury_number, "injuries.\n")
## The most harmful weather event to population health in terms of injuries is TORNADO with 91346 injuries.
cat("Economically, the weather event with the greatest impact is", top_economic_loss_event, "causing a total economic loss of", round(top_economic_loss_value / 1e9, 2), "billion USD.\n")
## Economically, the weather event with the greatest impact is FLOOD causing a total economic loss of 0 billion USD.
cat("Overall, tornadoes are the leading cause of fatalities and injuries, while floods and hurricanes cause the most significant property and crop damage.")
## Overall, tornadoes are the leading cause of fatalities and injuries, while floods and hurricanes cause the most significant property and crop damage.
This analysis highlights the types of severe weather events that have the most significant impacts on public health and the economy. Tornadoes are the most harmful to human health, while floods have the greatest economic consequences. This information can help guide resource allocation and preparedness strategies to mitigate the effects of these severe weather events.