Severe weather events can have significant impacts on human health and the economy. This analysis explores the NOAA Storm Database to determine: 1. The types of weather events that cause the most harm to population health (injuries and fatalities). 2. The types of weather events that result in the greatest economic consequences (property and crop damage).
The data is processed to clean and standardize event types, and summary statistics are used to assess the impact of different event categories. The results are visualized through bar plots to highlight the most severe weather events in terms of health and economic impact.
library(dplyr)
library(ggplot2)
library(readr)
file_url <- "https://d396qusza40orc.cloudfront.net/repdata/data/StormData.csv.bz2"
file_name <- "StormData.csv.bz2"
if (!file.exists(file_name)) {
download.file(file_url, destfile = file_name, mode = "wb")
}
# Read the dataset
storm_data <- read_csv(file_name)
## Rows: 902297 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): BGN_DATE, BGN_TIME, TIME_ZONE, COUNTYNAME, STATE, EVTYPE, BGN_AZI,...
## dbl (18): STATE__, COUNTY, BGN_RANGE, COUNTY_END, END_RANGE, LENGTH, WIDTH, ...
## lgl (1): COUNTYENDN
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
storm_data <- storm_data %>%
select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
# Convert event type to uppercase for consistency
storm_data$EVTYPE <- toupper(storm_data$EVTYPE)
health_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarize(Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
Total_Injuries = sum(INJURIES, na.rm = TRUE),
Total_Health_Impact = Total_Fatalities + Total_Injuries) %>%
arrange(desc(Total_Health_Impact))
# Top 10 most harmful events for health
top_health_events <- head(health_impact, 10)
print(top_health_events)
## # A tibble: 10 × 4
## EVTYPE Total_Fatalities Total_Injuries Total_Health_Impact
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
# Function to convert damage exponent values
convert_exp <- function(exp) {
if (exp %in% c("H", "h")) return(100)
if (exp %in% c("K", "k")) return(1000)
if (exp %in% c("M", "m")) return(1000000)
if (exp %in% c("B", "b")) return(1000000000)
if (grepl("[0-9]", exp)) return(10^as.numeric(exp))
return(1)
}
# Apply conversion
storm_data$PROPDMGEXP <- sapply(storm_data$PROPDMGEXP, convert_exp)
storm_data$CROPDMGEXP <- sapply(storm_data$CROPDMGEXP, convert_exp)
# Calculate actual damage amounts
storm_data <- storm_data %>%
mutate(Property_Damage = PROPDMG * as.numeric(PROPDMGEXP),
Crop_Damage = CROPDMG * as.numeric(CROPDMGEXP),
Total_Economic_Damage = Property_Damage + Crop_Damage)
# Summarize economic impact
economic_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarize(Total_Property_Damage = sum(Property_Damage, na.rm = TRUE),
Total_Crop_Damage = sum(Crop_Damage, na.rm = TRUE),
Total_Economic_Impact = Total_Property_Damage + Total_Crop_Damage) %>%
arrange(desc(Total_Economic_Impact))
# Top 10 most costly events
top_economic_events <- head(economic_impact, 10)
print(top_economic_events)
## # A tibble: 10 × 4
## EVTYPE Total_Property_Damage Total_Crop_Damage Total_Economic_Impact
## <chr> <dbl> <dbl> <dbl>
## 1 FLOOD 144657709807 5661968450 150319678257
## 2 HURRICANE/TYPH… 69305840000 2607872800 71913712800
## 3 TORNADO 56947380676. 414953270 57362333946.
## 4 STORM SURGE 43323536000 5000 43323541000
## 5 HAIL 15735267513. 3025954473 18761221986.
## 6 FLASH FLOOD 16822723978. 1421317100 18244041078.
## 7 DROUGHT 1046106000 13972566000 15018672000
## 8 HURRICANE 11868319010 2741910000 14610229010
## 9 RIVER FLOOD 5118945500 5029459000 10148404500
## 10 ICE STORM 3944927860 5022113500 8967041360
ggplot(top_health_events, aes(x = reorder(EVTYPE, -Total_Health_Impact), y = Total_Health_Impact)) +
geom_bar(stat="identity", fill="red") +
coord_flip() +
labs(title = "Top 10 Weather Events Affecting Population Health",
x = "Event Type",
y = "Total Fatalities and Injuries") +
theme_minimal()
ggplot(top_economic_events, aes(x = reorder(EVTYPE, -Total_Economic_Impact), y = Total_Economic_Impact)) +
geom_bar(stat="identity", fill="blue") +
coord_flip() +
labs(title = "Top 10 Weather Events with Highest Economic Impact",
x = "Event Type",
y = "Total Economic Damage (USD)") +
theme_minimal()