This analysis explores the NOAA Storm Database to identify the types of severe weather events that cause the greatest harm to population health and result in the largest economic damages in the United States. The results show that tornadoes lead in total health impact with nearly 100,000 combined fatalities and injuries, followed by excessive heat events with under 10,000. Economically, floods cause the highest damages, estimated at around 150 billion US dollars, followed by hurricanes and typhoons at nearly 75 billion, and tornadoes at over 50 billion dollars. These findings highlight the significant risks posed by these weather events to both human health and the economy.
library(dplyr)
library(ggplot2)
# Load the storm data CSV file (make sure the file is in your working directory)
storm_data <- read.csv("repdata-data-StormData.csv.bz2", stringsAsFactors = FALSE)
# Show first 6 rows so the reader can see the data loaded
head(storm_data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
# Normalize event types to uppercase and trim whitespace
storm_data$EVTYPE <- toupper(trimws(storm_data$EVTYPE))
# Replace common typos or variants manually if needed (optional)
# Create a function to convert damage exponents to multipliers
convert_exp <- function(exp) {
if (exp %in% c("K", "k")) return(1e3)
if (exp %in% c("M", "m")) return(1e6)
if (exp %in% c("B", "b")) return(1e9)
if (grepl("^[0-9]+$", exp)) return(as.numeric(exp))
return(1) # default multiplier if unknown
}
# Apply the function to property and crop damage exponents
storm_data$PROPDMGEXP <- sapply(storm_data$PROPDMGEXP, convert_exp)
storm_data$CROPDMGEXP <- sapply(storm_data$CROPDMGEXP, convert_exp)
# Calculate total property and crop damage in dollars
storm_data$PROPDMG_TOTAL <- storm_data$PROPDMG * storm_data$PROPDMGEXP
storm_data$CROPDMG_TOTAL <- storm_data$CROPDMG * storm_data$CROPDMGEXP
# Calculate total economic damage
storm_data$TOTAL_DAMAGE <- storm_data$PROPDMG_TOTAL + storm_data$CROPDMG_TOTAL
# Calculate total health impact as sum of fatalities and injuries
storm_data$HEALTH_IMPACT <- storm_data$FATALITIES + storm_data$INJURIES
# Quick look at processed data
head(storm_data %>% select(EVTYPE, PROPDMG_TOTAL, CROPDMG_TOTAL, TOTAL_DAMAGE, HEALTH_IMPACT))
## EVTYPE PROPDMG_TOTAL CROPDMG_TOTAL TOTAL_DAMAGE HEALTH_IMPACT
## 1 TORNADO 25000 0 25000 15
## 2 TORNADO 2500 0 2500 0
## 3 TORNADO 25000 0 25000 2
## 4 TORNADO 2500 0 2500 2
## 5 TORNADO 2500 0 2500 2
## 6 TORNADO 2500 0 2500 6
We converted all event types to uppercase and trimmed any whitespace to standardize the data.
We transformed the damage exponents (e.g., 'K', 'M', 'B') into numeric multipliers to calculate actual dollar damages for property and crops.
We then computed the total economic damage by summing property and crop damages.
Additionally, we created a total health impact variable by adding fatalities and injuries.
# Summarize total health impact by event type
health_summary <- storm_data %>%
group_by(EVTYPE) %>%
summarize(Total_Health_Impact = sum(HEALTH_IMPACT, na.rm = TRUE)) %>%
arrange(desc(Total_Health_Impact))
# Show top 10 events by health impact
head(health_summary, 10)
## # A tibble: 10 × 2
## EVTYPE Total_Health_Impact
## <chr> <dbl>
## 1 TORNADO 96979
## 2 EXCESSIVE HEAT 8428
## 3 TSTM WIND 7461
## 4 FLOOD 7259
## 5 LIGHTNING 6046
## 6 HEAT 3037
## 7 FLASH FLOOD 2755
## 8 ICE STORM 2064
## 9 THUNDERSTORM WIND 1621
## 10 WINTER STORM 1527
# Plot top 10 most harmful event types by health impact
top_health_events <- head(health_summary, 10)
ggplot(top_health_events, aes(x = reorder(EVTYPE, Total_Health_Impact), y = Total_Health_Impact)) +
geom_bar(stat = "identity", fill = "red") +
coord_flip() +
labs(title = "Top 10 Most Harmful Weather Events by Health Impact",
x = "Event Type",
y = "Total Fatalities + Injuries") +
theme_minimal()
# Summarize total economic damage by event type
economic_summary <- storm_data %>%
group_by(EVTYPE) %>%
summarize(Total_Economic_Damage = sum(TOTAL_DAMAGE, na.rm = TRUE)) %>%
arrange(desc(Total_Economic_Damage))
# Show top 10 events by economic damage
head(economic_summary, 10)
## # A tibble: 10 × 2
## EVTYPE Total_Economic_Damage
## <chr> <dbl>
## 1 FLOOD 150319678257
## 2 HURRICANE/TYPHOON 71913712800
## 3 TORNADO 57352114164
## 4 STORM SURGE 43323541000
## 5 HAIL 18758221385
## 6 FLASH FLOOD 17562179394.
## 7 DROUGHT 15018672000
## 8 HURRICANE 14610229010
## 9 RIVER FLOOD 10148404500
## 10 ICE STORM 8967041310
# Plot top 10 events with greatest economic consequences
top_economic_events <- head(economic_summary, 10)
ggplot(top_economic_events, aes(x = reorder(EVTYPE, Total_Economic_Damage), y = Total_Economic_Damage / 1e9)) +
geom_bar(stat = "identity", fill = "blue") +
coord_flip() +
labs(title = "Top 10 Weather Events by Economic Damage (Billion USD)",
x = "Event Type",
y = "Total Damage (Billion USD)") +
theme_minimal()