This analysis explores the NOAA Storm Database to determine which types of weather events are most harmful to population health and which have the greatest economic consequences. The data spans from 1950 to 2011. Tornadoes are the most harmful in terms of injuries and fatalities. Floods and hurricanes cause the highest economic damage. These insights help authorities prioritize disaster preparedness.
# Install packages if not present
if (!require(dplyr)) install.packages("dplyr")
if (!require(ggplot2)) install.packages("ggplot2")
if (!require(tidyr)) install.packages("tidyr")
library(dplyr)
library(ggplot2)
library(tidyr)
# Download dataset if not present
if (!file.exists("stormdata.csv.bz2")) {
download.file(
"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
"stormdata.csv.bz2"
)
}
# Load dataset
storm_data <- read.csv("stormdata.csv.bz2", stringsAsFactors = FALSE)
dim(storm_data)
## [1] 902297 37
head(storm_data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
health_data <- storm_data %>%
group_by(EVTYPE) %>%
summarise(
fatalities = sum(FATALITIES, na.rm = TRUE),
injuries = sum(INJURIES, na.rm = TRUE)
) %>%
mutate(total = fatalities + injuries) %>%
arrange(desc(total))
top10_health <- head(health_data, 10)
top10_health
## # A tibble: 10 × 4
## EVTYPE fatalities injuries total
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
# Function to convert exponent
get_multiplier <- function(exp) {
exp <- toupper(exp)
ifelse(exp == "K", 1e3,
ifelse(exp == "M", 1e6,
ifelse(exp == "B", 1e9, 1)))
}
economic_data <- storm_data %>%
mutate(
prop = PROPDMG * get_multiplier(PROPDMGEXP),
crop = CROPDMG * get_multiplier(CROPDMGEXP),
total = prop + crop
) %>%
group_by(EVTYPE) %>%
summarise(total_damage = sum(total, na.rm = TRUE)) %>%
arrange(desc(total_damage))
top10_econ <- head(economic_data, 10)
top10_econ
## # A tibble: 10 × 2
## EVTYPE total_damage
## <chr> <dbl>
## 1 FLOOD 150319678257
## 2 HURRICANE/TYPHOON 71913712800
## 3 TORNADO 57352114049.
## 4 STORM SURGE 43323541000
## 5 HAIL 18758221521.
## 6 FLASH FLOOD 17562129167.
## 7 DROUGHT 15018672000
## 8 HURRICANE 14610229010
## 9 RIVER FLOOD 10148404500
## 10 ICE STORM 8967041360
ggplot(top10_health, aes(x = reorder(EVTYPE, total), y = total)) +
geom_bar(stat = "identity", fill = "red") +
coord_flip() +
labs(
title = "Top 10 Harmful Weather Events",
x = "Event Type",
y = "Total Injuries + Fatalities"
)
ggplot(top10_econ, aes(x = reorder(EVTYPE, total_damage), y = total_damage)) +
geom_bar(stat = "identity", fill = "blue") +
coord_flip() +
labs(
title = "Top 10 Economic Damage Events",
x = "Event Type",
y = "Total Damage"
)