This analysis explores the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database to identify the most harmful weather events in terms of public health and economic consequences. The data spans from 1950 to 2011 and contains records of injuries, fatalities, and property damage. We determine which event types (e.g., tornado, flood, etc.) cause the most deaths and injuries, and which cause the highest economic losses. The dataset is processed entirely within R, and key findings are visualized with bar plots. Results show that tornadoes are most dangerous to human life, while floods and hurricanes cause the highest financial damages.
library(dplyr)
library(ggplot2)
library(readr)
# Load the data
data <- read.csv("repdata_data_StormData.csv.bz2")
# Check the structure
str(data)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
# Select needed columns
storm <- data %>%
select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
# Convert PROPDMGEXP and CROPDMGEXP to multipliers
exp_map <- c(
"K" = 1e3, "M" = 1e6, "B" = 1e9,
"k" = 1e3, "m" = 1e6,
"H" = 1e2, "h" = 1e2,
"0" = 1, "1" = 10, "2" = 100, "3" = 1000, "4" = 10000,
"5" = 1e5, "6" = 1e6, "7" = 1e7, "8" = 1e8, "9" = 1e9
)
storm$PROPDMGEXP <- toupper(as.character(storm$PROPDMGEXP))
storm$CROPDMGEXP <- toupper(as.character(storm$CROPDMGEXP))
storm$PROPDMG_MULTI <- exp_map[storm$PROPDMGEXP]
storm$CROPDMG_MULTI <- exp_map[storm$CROPDMGEXP]
storm$PROPDMG_MULTI[is.na(storm$PROPDMG_MULTI)] <- 1
storm$CROPDMG_MULTI[is.na(storm$CROPDMG_MULTI)] <- 1
# Calculate total damage
storm <- storm %>%
mutate(
property_damage = PROPDMG * PROPDMG_MULTI,
crop_damage = CROPDMG * CROPDMG_MULTI,
total_damage = property_damage + crop_damage
)
health <- storm %>%
group_by(EVTYPE) %>%
summarise(
fatalities = sum(FATALITIES, na.rm = TRUE),
injuries = sum(INJURIES, na.rm = TRUE),
total_harm = fatalities + injuries
) %>%
arrange(desc(total_harm)) %>%
slice(1:10)
# Plot
ggplot(health, aes(x = reorder(EVTYPE, -total_harm), y = total_harm)) +
geom_bar(stat = "identity", fill = "tomato") +
labs(title = "Top 10 Weather Events by Total Harm to Population Health",
x = "Event Type", y = "Fatalities + Injuries") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
economic <- storm %>%
group_by(EVTYPE) %>%
summarise(total_cost = sum(total_damage, na.rm = TRUE)) %>%
arrange(desc(total_cost)) %>%
slice(1:10)
# Plot
ggplot(economic, aes(x = reorder(EVTYPE, -total_cost), y = total_cost / 1e9)) +
geom_bar(stat = "identity", fill = "steelblue") +
labs(title = "Top 10 Weather Events by Economic Damage",
x = "Event Type", y = "Total Damage (Billion USD)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))