This analysis explores the impacts of severe weather events across the United States using data from the NOAA Storm Database. The primary focus is to identify the event types most detrimental to public health and those with the greatest economic consequences. Key metrics include fatalities, injuries, property damage, and crop damage. Data preprocessing involved standardizing damage estimates and filtering for relevant columns. Analysis shows that tornadoes are the leading cause of fatalities and injuries, while floods and hurricanes result in the highest property damage. Visualizations highlight the disparities in impact across event types and provide insights for prioritizing mitigation efforts. This project emphasizes the importance of clean and reproducible analysis to support informed decision-making. Findings can guide policymakers and emergency response teams in allocating resources effectively. The report uses R for data processing and visualization, ensuring transparency and replicability.
# Download and load the data
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url, "StormData.csv.bz2")
storm_data <- read.csv("StormData.csv.bz2")
#storm_data
#Examining column names
colnames(storm_data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
# Select relevant columns
storm_data <- storm_data[, c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG",
"PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
# Convert damage exponents (e.g., K = 1,000, M = 1,000,000)
convert_exp <- function(exp) {
ifelse(toupper(exp) == "K", 1000,
ifelse(toupper(exp) == "M", 1e6,
ifelse(toupper(exp) == "B", 1e9, 1)))
}
storm_data$PROPDMG <- storm_data$PROPDMG * convert_exp(storm_data$PROPDMGEXP)
storm_data$CROPDMG <- storm_data$CROPDMG * convert_exp(storm_data$CROPDMGEXP)
#Aggregate data to identify events with the most significant impacts on public health:
# Aggregate fatalities and injuries
health_impact <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data = storm_data, sum)
# Sort by highest impact
health_impact <- health_impact[order(-health_impact$FATALITIES, -health_impact$INJURIES), ]
# Top 10 event types by health impact
head(health_impact, 10)
## EVTYPE FATALITIES INJURIES
## 834 TORNADO 5633 91346
## 130 EXCESSIVE HEAT 1903 6525
## 153 FLASH FLOOD 978 1777
## 275 HEAT 937 2100
## 464 LIGHTNING 816 5230
## 856 TSTM WIND 504 6957
## 170 FLOOD 470 6789
## 585 RIP CURRENT 368 232
## 359 HIGH WIND 248 1137
## 19 AVALANCHE 224 170
#Aggregate property and crop damage data to identify events with the greatest economic costs:
# Aggregate economic damage
econ_impact <- aggregate(PROPDMG + CROPDMG ~ EVTYPE, data = storm_data, sum)
colnames(econ_impact) <- c("EVTYPE", "TOTAL_DAMAGE")
# Sort by highest economic impact
econ_impact <- econ_impact[order(-econ_impact$TOTAL_DAMAGE), ]
# Top 10 event types by economic impact
head(econ_impact, 10)
## EVTYPE TOTAL_DAMAGE
## 170 FLOOD 150319678257
## 411 HURRICANE/TYPHOON 71913712800
## 834 TORNADO 57352114049
## 670 STORM SURGE 43323541000
## 244 HAIL 18758221521
## 153 FLASH FLOOD 17562129167
## 95 DROUGHT 15018672000
## 402 HURRICANE 14610229010
## 590 RIVER FLOOD 10148404500
## 427 ICE STORM 8967041360
You can also embed plots, for example:
library(ggplot2)
library(ggplot2)
ggplot(health_impact[1:10, ], aes(x = reorder(EVTYPE, -FATALITIES), y = FATALITIES)) +
geom_bar(stat = "identity", fill = "red") +
labs(title = "Capstone Insight: Events Most Harmful to Public Health",
subtitle = "Tornadoes lead to the highest fatalities across the US",
x = "Event Type", y = "Fatalities") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
library(ggplot2)
# Assuming econ_impact contains the aggregated total damage (property + crop damage) sorted
ggplot(econ_impact[1:10, ], aes(x = reorder(EVTYPE, -TOTAL_DAMAGE), y = TOTAL_DAMAGE)) +
geom_bar(stat = "identity", fill = "blue") +
labs(
title = "Capstone Analysis: Economic Impact of Weather Events",
subtitle = "Top 10 Event Types by Property and Crop Damage",
x = "Event Type",
y = "Total Damage ($)"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1), # Rotate x-axis labels
plot.title = element_text(face = "bold", size = 16), # Bold title
plot.subtitle = element_text(size = 12)) # Adjust subtitle size
Note that the
echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.