This analysis explores the influence of weather events on public health and economic outcomes in the U.S. between 1996 and 2011. Utilizing data from the National Oceanic and Atmospheric Administration (NOAA), we assess the most devastating events based on fatalities, injuries, and both property and crop damages. The analysis identifies tornadoes and excessive heat as leading causes of fatalities and injuries, while floods stand out for the most severe economic losses. The results provide critical insights for improving disaster preparedness and mitigation efforts.
We begin by loading the NOAA storm dataset, focusing on records from January 1996 onward, when consistent event tracking began.
storm<- "H:/My Drive/Data Science Diploma/5Reproducive/project2/StormData.csv/StormData.csv"
olddata <- read.csv(file = storm, header = TRUE, sep = ",")
olddata $BGN_DATE <- strptime(olddata$BGN_DATE, "%m/%d/%Y %H:%M:%S")
newdata <- subset(olddata, BGN_DATE > "1995-12-31")
rm(olddata)
We then select the relevant columns and filter out rows with no fatalities, injuries, or damage data.
newdata <- subset(newdata, select = c(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP))
newdata$EVTYPE <- toupper(newdata$EVTYPE)
newdata <- newdata[newdata$FATALITIES != 0 |
newdata$INJURIES != 0 |
newdata$PROPDMG != 0 |
newdata$CROPDMG != 0, ]
We calculate the total fatalities and injuries for each event type.
fatalities <- aggregate(FATALITIES ~ EVTYPE, data = newdata, sum)
injuries <- aggregate(INJURIES ~ EVTYPE, data = newdata, sum)
We then sort and extract the top 10 events by fatalities and injuries.
fatalities <- arrange(fatalities, desc(FATALITIES), EVTYPE)[1:10,]
injuries <- arrange(injuries, desc(INJURIES), EVTYPE)[1:10,]
fatalities
## EVTYPE FATALITIES
## 1 EXCESSIVE HEAT 1797
## 2 TORNADO 1511
## 3 FLASH FLOOD 887
## 4 LIGHTNING 651
## 5 FLOOD 414
## 6 RIP CURRENT 340
## 7 TSTM WIND 241
## 8 HEAT 237
## 9 HIGH WIND 235
## 10 AVALANCHE 223
injuries
## EVTYPE INJURIES
## 1 TORNADO 20667
## 2 FLOOD 6758
## 3 EXCESSIVE HEAT 6391
## 4 LIGHTNING 4141
## 5 TSTM WIND 3629
## 6 FLASH FLOOD 1674
## 7 THUNDERSTORM WIND 1400
## 8 WINTER STORM 1292
## 9 HURRICANE/TYPHOON 1275
## 10 HEAT 1222
# Fatalities Plot
ggplot(fatalities, aes(x = EVTYPE, y = FATALITIES)) +
geom_bar(stat = "identity", fill = brewer.pal(9, "Purples")[7]) + # Using a color from RColorBrewer
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(
title = "Top 10 Weather Events Leading to the Highest Fatalities (1996–2011)",
x = "Event Type",
y = "Number of Fatalities",
caption = "Figure 1: Excessive heat resulted in the highest fatalities."
)
# Injuries Plot
ggplot(injuries, aes(x = EVTYPE, y = INJURIES)) +
geom_bar(stat = "identity", fill = brewer.pal(9, "Blues")[6]) + # Using a color from RColorBrewer
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(
title = "Top 10 Weather Events Leading to the Most Injuries (1996–2011)",
x = "Event Type",
y = "Number of Injuries",
caption = "Figure 2: Tornadoes caused the most injuries."
)
We standardize the damage exponent values for property and crop damages.
newdata$PROPDMGEXP <- gsub("[Hh]", "2", newdata$PROPDMGEXP)
newdata$PROPDMGEXP <- gsub("[Kk]", "3", newdata$PROPDMGEXP)
newdata$PROPDMGEXP <- gsub("[Mm]", "6", newdata$PROPDMGEXP)
newdata$PROPDMGEXP <- gsub("[Bb]", "9", newdata$PROPDMGEXP)
newdata$PROPDMGEXP <- gsub("\\+", "1", newdata$PROPDMGEXP)
newdata$PROPDMGEXP <- gsub("\\?|\\-|\\ ", "0", newdata$PROPDMGEXP)
newdata$PROPDMGEXP <- as.numeric(newdata$PROPDMGEXP)
newdata$CROPDMGEXP <- gsub("[Hh]", "2", newdata$CROPDMGEXP)
newdata$CROPDMGEXP <- gsub("[Kk]", "3", newdata$CROPDMGEXP)
newdata$CROPDMGEXP <- gsub("[Mm]", "6", newdata$CROPDMGEXP)
newdata$CROPDMGEXP <- gsub("[Bb]", "9", newdata$CROPDMGEXP)
newdata$CROPDMGEXP <- gsub("\\+", "1", newdata$CROPDMGEXP)
newdata$CROPDMGEXP <- gsub("\\-|\\?|\\ ", "0", newdata$CROPDMGEXP)
newdata$CROPDMGEXP <- as.numeric(newdata$CROPDMGEXP)
newdata$PROPDMGEXP[is.na(newdata$PROPDMGEXP)] <- 0
newdata$CROPDMGEXP[is.na(newdata$CROPDMGEXP)] <- 0
We then calculate the total damages.
newdata <- mutate(newdata,
PROPDMGTOTAL = PROPDMG * (10 ^ PROPDMGEXP),
CROPDMGTOTAL = CROPDMG * (10 ^ CROPDMGEXP))
Economy_Loss <- aggregate(cbind(PROPDMGTOTAL, CROPDMGTOTAL) ~ EVTYPE, data = newdata, FUN = sum)
Economy_Loss$ECONOMIC_LOSS <- Economy_Loss$PROPDMGTOTAL + Economy_Loss$CROPDMGTOTAL
Economy_Loss<- Economy_Loss[order(Economy_Loss$ECONOMIC_LOSS, decreasing = TRUE), ]
highest_events <- Economy_Loss[1:10, c(1, 4)]
highest_events
## EVTYPE ECONOMIC_LOSS
## 48 FLOOD 148919611950
## 88 HURRICANE/TYPHOON 71913712800
## 141 STORM SURGE 43193541000
## 149 TORNADO 24900370720
## 66 HAIL 17071172870
## 46 FLASH FLOOD 16557105610
## 86 HURRICANE 14554229010
## 32 DROUGHT 14413667000
## 152 TROPICAL STORM 8320186550
## 83 HIGH WIND 5881421660
ggplot(highest_events, aes(x = EVTYPE, y = ECONOMIC_LOSS)) +
geom_bar(stat = "identity", fill = brewer.pal(9, "YlGnBu")[6]) + # Using a color from RColorBrewer
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Total Property & Crop Damages (USD)") +
ggtitle("Top 10 Weather Events Causing the Greatest Economic Losses in the U.S. (1996-2011)")
Based on the data, the weather events with the most severe impacts on health and economy have been identified. Excessive heat caused the highest fatalities, while tornadoes were responsible for the most injuries. Economically, floods resulted in the greatest financial losses, amounting to approximately $148.9 billion, followed by hurricanes/typhoons at $71.9 billion. This emphasizes the need for targeted disaster management strategies to mitigate the health and economic toll of these extreme weather events. ```