The goal of this analysis is to identify the most harmful weather events in terms of population health and economic damage using the NOAA Storm Database. Tornadoes were found to cause the most fatalities and injuries, while floods caused the greatest economic losses.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Load data
storm_data <- read.csv("/Users/valel/Downloads/repdata_data_StormData.csv.bz2")
# Data cleaning and transformation
convert_exp <- function(exp) {
if (exp %in% c("K", "k")) return(1000)
if (exp %in% c("M", "m")) return(1e6)
if (exp %in% c("B", "b")) return(1e9)
return(1)
}
storm_data <- storm_data %>%
mutate(
PROPDMG_MULTIPLIER = sapply(PROPDMGEXP, convert_exp),
CROPDMG_MULTIPLIER = sapply(CROPDMGEXP, convert_exp),
TOTAL_PROPDMG = PROPDMG * PROPDMG_MULTIPLIER,
TOTAL_CROPDMG = CROPDMG * CROPDMG_MULTIPLIER,
TOTAL_DAMAGE = TOTAL_PROPDMG + TOTAL_CROPDMG
)
# Health impact analysis
health_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarise(
Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
Total_Injuries = sum(INJURIES, na.rm = TRUE)
) %>%
arrange(desc(Total_Fatalities), desc(Total_Injuries))
head(health_impact, 10)
## # A tibble: 10 × 3
## EVTYPE Total_Fatalities Total_Injuries
## <chr> <dbl> <dbl>
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 FLASH FLOOD 978 1777
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
## 6 TSTM WIND 504 6957
## 7 FLOOD 470 6789
## 8 RIP CURRENT 368 232
## 9 HIGH WIND 248 1137
## 10 AVALANCHE 224 170
# Visualization
health_plot <- health_impact %>%
slice_max(Total_Fatalities, n = 10) %>%
ggplot(aes(x = reorder(EVTYPE, -Total_Fatalities), y = Total_Fatalities)) +
geom_bar(stat = "identity", fill = "red") +
labs(title = "Top 10 Most Harmful Events (Fatalities)",
x = "Event Type", y = "Total Fatalities") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(health_plot)
# Economic damage analysis
economic_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarise(
Total_Economic_Damage = sum(TOTAL_DAMAGE, na.rm = TRUE)
) %>%
arrange(desc(Total_Economic_Damage))
head(economic_impact, 10)
## # A tibble: 10 × 2
## EVTYPE Total_Economic_Damage
## <chr> <dbl>
## 1 FLOOD 150319678257
## 2 HURRICANE/TYPHOON 71913712800
## 3 TORNADO 57352114049.
## 4 STORM SURGE 43323541000
## 5 HAIL 18758221521.
## 6 FLASH FLOOD 17562129167.
## 7 DROUGHT 15018672000
## 8 HURRICANE 14610229010
## 9 RIVER FLOOD 10148404500
## 10 ICE STORM 8967041360
# Visualization
economic_plot <- economic_impact %>%
slice_max(Total_Economic_Damage, n = 10) %>%
ggplot(aes(x = reorder(EVTYPE, -Total_Economic_Damage), y = Total_Economic_Damage / 1e9)) +
geom_bar(stat = "identity", fill = "blue") +
labs(title = "Top 10 Events with Greatest Economic Damage",
x = "Event Type", y = "Total Economic Damage (in Billions)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(economic_plot)