This analysis examines severe weather events in the United States using the NOAA storm database. The objective is to identify which types of events are most harmful to population health and which have the greatest economic consequences. Population health impact is measured using fatalities and injuries, while economic impact is evaluated using property and crop damage. The results show that tornadoes and excessive heat have the highest impact on human health, while floods and hurricanes contribute the most to economic losses. These insights can help authorities better prepare for and respond to severe weather conditions.
# Load package
if(!require(dplyr)) {
install.packages("dplyr")
library(dplyr)
}
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 4.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Load dataset
storm <- read.csv("repdata_data_StormData.csv.bz2")
# Inspect data
str(storm)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
# Clean damage exponent columns
storm$PROPDMGEXP <- toupper(storm$PROPDMGEXP)
storm$CROPDMGEXP <- toupper(storm$CROPDMGEXP)
storm$PROPDMGEXP[storm$PROPDMGEXP == "K"] <- 1e3
storm$PROPDMGEXP[storm$PROPDMGEXP == "M"] <- 1e6
storm$PROPDMGEXP[storm$PROPDMGEXP == "B"] <- 1e9
storm$PROPDMGEXP[storm$PROPDMGEXP == ""] <- 1
storm$PROPDMGEXP[!storm$PROPDMGEXP %in% c(1e3,1e6,1e9)] <- 1
storm$CROPDMGEXP[storm$CROPDMGEXP == "K"] <- 1e3
storm$CROPDMGEXP[storm$CROPDMGEXP == "M"] <- 1e6
storm$CROPDMGEXP[storm$CROPDMGEXP == "B"] <- 1e9
storm$CROPDMGEXP[storm$CROPDMGEXP == ""] <- 1
storm$CROPDMGEXP[!storm$CROPDMGEXP %in% c(1e3,1e6,1e9)] <- 1
storm$PROPDMGEXP <- as.numeric(storm$PROPDMGEXP)
storm$CROPDMGEXP <- as.numeric(storm$CROPDMGEXP)
# Calculate total damages
storm$PROP_DAMAGE <- storm$PROPDMG * storm$PROPDMGEXP
storm$CROP_DAMAGE <- storm$CROPDMG * storm$CROPDMGEXP
health_data <- storm %>%
group_by(EVTYPE) %>%
summarise(
fatalities = sum(FATALITIES, na.rm = TRUE),
injuries = sum(INJURIES, na.rm = TRUE)
) %>%
mutate(total_harm = fatalities + injuries) %>%
arrange(desc(total_harm))
top_health <- head(health_data, 10)
barplot(
top_health$total_harm,
names.arg = top_health$EVTYPE,
las = 2,
col = "steelblue",
main = "Top 10 Most Harmful Weather Events",
ylab = "Total Fatalities + Injuries"
)
The total impact on population health was calculated by combining fatalities and injuries for each event type. The analysis shows that tornadoes, excessive heat, and floods are the most harmful events, causing the highest number of casualties.
economic_data <- storm %>%
group_by(EVTYPE) %>%
summarise(
property_damage = sum(PROP_DAMAGE, na.rm = TRUE),
crop_damage = sum(CROP_DAMAGE, na.rm = TRUE)
) %>%
mutate(total_damage = property_damage + crop_damage) %>%
arrange(desc(total_damage))
top_economic <- head(economic_data, 10)
barplot(
top_economic$total_damage,
names.arg = top_economic$EVTYPE,
las = 2,
col = "darkred",
main = "Top 10 Weather Events by Economic Damage",
ylab = "Total Damage (USD)"
)
The economic impact was evaluated by combining property and crop damage. The results indicate that floods, hurricanes, and storm surges cause the greatest economic losses.