This report analyzes the U.S. National Oceanic and Atmospheric Administration’s (NOAA)
library(ggplot2)
library(dplyr)
library(tidyr)
The data is loaded from the StormData.csv.bz2 file.
# File to be loaded
file_path <- "StormData.csv.bz2"
# Read directly from the compressed file
storm_data <- read.csv(file_path, stringsAsFactors = FALSE)
# Preview dimensions and column names
dim(storm_data)
## [1] 902297 37
names(storm_data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
We only need the event type, health impact variables (fatalities, injuries), and economic impact variables (property damage, crop damage).
storm_subset <- storm_data %>%
select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
head(storm_subset)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
The EVTYPE variable is troublesome (mixed case, extra
whitespace, abbreviations). We apply basic normalization: convert to
uppercase and trim whitespace.
storm_subset$EVTYPE <- trimws(toupper(storm_subset$EVTYPE))
The PROPDMGEXP and CROPDMGEXP columns
contain multiplier codes (K = thousands, M = millions, B = billions). We
convert these to numeric multipliers so we can compute actual dollar
amounts.
parse_exp <- function(exp) {
exp <- toupper(trimws(exp))
case_when(
exp == "K" ~ 1e3,
exp == "M" ~ 1e6,
exp == "B" ~ 1e9,
exp == "H" ~ 1e2,
exp %in% as.character(0:9) ~ 10 ^ as.numeric(exp),
TRUE ~ 1
)
}
storm_subset <- storm_subset %>%
mutate(
prop_multiplier = parse_exp(PROPDMGEXP),
crop_multiplier = parse_exp(CROPDMGEXP),
prop_damage_total = PROPDMG * prop_multiplier,
crop_damage_total = CROPDMG * crop_multiplier,
total_damage = prop_damage_total + crop_damage_total
)
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `prop_multiplier = parse_exp(PROPDMGEXP)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
We aggregate total fatalities and injuries by event type and select the top 10 most harmful events.
health_impact <- storm_subset %>%
group_by(EVTYPE) %>%
summarise(
Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
Total_Injuries = sum(INJURIES, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(Total_Health_Impact = Total_Fatalities + Total_Injuries) %>%
arrange(desc(Total_Health_Impact)) %>%
slice_head(n = 10)
health_impact
## # A tibble: 10 × 4
## EVTYPE Total_Fatalities Total_Injuries Total_Health_Impact
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
# Reshape for grouped bar chart
health_long <- health_impact %>%
select(EVTYPE, Total_Fatalities, Total_Injuries) %>%
pivot_longer(cols = c(Total_Fatalities, Total_Injuries),
names_to = "Type", values_to = "Count") %>%
mutate(EVTYPE = factor(EVTYPE, levels = health_impact$EVTYPE))
ggplot(health_long, aes(x = reorder(EVTYPE, -Count), y = Count, fill = Type)) +
geom_bar(stat = "identity", position = "stack") +
scale_fill_manual(
values = c("Total_Fatalities" = "#c0392b", "Total_Injuries" = "#e67e22"),
labels = c("Fatalities", "Injuries")
) +
labs(
title = "Top 10 Weather Events Most Harmful to Population Health (1950–2011)",
subtitle = "Stacked bar showing fatalities and injuries by event type",
x = "Event Type",
y = "Total Casualties",
fill = "Impact Type"
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(face = "bold"),
legend.position = "top"
)
Figure 1: Top 10 weather event types by total population health impact (fatalities + injuries), 1950–2011. Tornadoes are by far the leading cause of combined casualties.
Tornadoes dominate all other event types in total population health impact. Excessive heat ranks second in fatalities, while thunderstorm winds rank second in total casualties.
We aggregate total property and crop damage by event type and select the top 10 costliest events.
economic_impact <- storm_subset %>%
group_by(EVTYPE) %>%
summarise(
Total_Prop_Damage = sum(prop_damage_total, na.rm = TRUE),
Total_Crop_Damage = sum(crop_damage_total, na.rm = TRUE),
.groups = "drop"
) %>%
mutate(Total_Economic_Damage = Total_Prop_Damage + Total_Crop_Damage) %>%
arrange(desc(Total_Economic_Damage)) %>%
slice_head(n = 10)
# Display in billions for readability
economic_impact %>%
mutate(across(where(is.numeric), ~ round(. / 1e9, 2))) %>%
rename_with(~ paste0(., " (Billions $)"), where(is.numeric))
## # A tibble: 10 × 4
## EVTYPE Total_Prop_Damage (B…¹ Total_Crop_Damage (B…² Total_Economic_Damag…³
## <chr> <dbl> <dbl> <dbl>
## 1 FLOOD 145. 5.66 150.
## 2 HURRICA… 69.3 2.61 71.9
## 3 TORNADO 57.0 0.41 57.4
## 4 STORM S… 43.3 0 43.3
## 5 HAIL 15.7 3.03 18.8
## 6 FLASH F… 16.8 1.42 18.2
## 7 DROUGHT 1.05 14.0 15.0
## 8 HURRICA… 11.9 2.74 14.6
## 9 RIVER F… 5.12 5.03 10.2
## 10 ICE STO… 3.94 5.02 8.97
## # ℹ abbreviated names: ¹`Total_Prop_Damage (Billions $)`,
## # ²`Total_Crop_Damage (Billions $)`, ³`Total_Economic_Damage (Billions $)`
economic_long <- economic_impact %>%
select(EVTYPE, Total_Prop_Damage, Total_Crop_Damage) %>%
pivot_longer(cols = c(Total_Prop_Damage, Total_Crop_Damage),
names_to = "Type", values_to = "Damage") %>%
mutate(
EVTYPE = factor(EVTYPE, levels = economic_impact$EVTYPE),
Damage = Damage / 1e9 # Convert to billions
)
ggplot(economic_long, aes(x = reorder(EVTYPE, -Damage), y = Damage, fill = Type)) +
geom_bar(stat = "identity", position = "stack") +
scale_fill_manual(
values = c("Total_Prop_Damage" = "#2980b9", "Total_Crop_Damage" = "#27ae60"),
labels = c("Property Damage", "Crop Damage")
) +
labs(
title = "Top 10 Weather Events with Greatest Economic Consequences (1950–2011)",
subtitle = "Stacked bar showing property and crop damage by event type (billions USD)",
x = "Event Type",
y = "Total Damage (Billions USD)",
fill = "Damage Type"
) +
theme_minimal(base_size = 12) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(face = "bold"),
legend.position = "top"
)
Figure 2: Top 10 weather event types by total economic damage (property + crop, in billions USD), 1950–2011. Floods and hurricanes/typhoons lead in total economic losses.
Floods are the single largest contributor to total economic damage, primarily through property damage.
cat("=== Top 5 Events by Health Impact ===\n")
## === Top 5 Events by Health Impact ===
health_impact %>%
select(EVTYPE, Total_Fatalities, Total_Injuries, Total_Health_Impact) %>%
head(5) %>%
knitr::kable(format = "markdown", col.names = c("Event Type", "Fatalities", "Injuries", "Total Casualties"))
| Event Type | Fatalities | Injuries | Total Casualties |
|---|---|---|---|
| TORNADO | 5633 | 91346 | 96979 |
| EXCESSIVE HEAT | 1903 | 6525 | 8428 |
| TSTM WIND | 504 | 6957 | 7461 |
| FLOOD | 470 | 6789 | 7259 |
| LIGHTNING | 816 | 5230 | 6046 |
cat("\n=== Top 5 Events by Economic Damage (Billions USD) ===\n")
##
## === Top 5 Events by Economic Damage (Billions USD) ===
economic_impact %>%
mutate(across(where(is.numeric), ~ round(. / 1e9, 2))) %>%
select(EVTYPE, Total_Prop_Damage, Total_Crop_Damage, Total_Economic_Damage) %>%
head(5) %>%
knitr::kable(format = "markdown", col.names = c("Event Type", "Property (B$)", "Crop (B$)", "Total (B$)"))
| Event Type | Property (B\()| Crop (B\)) | Total (B$) | |
|---|---|---|---|
| FLOOD | 144.66 | 5.66 | 150.32 |
| HURRICANE/TYPHOON | 69.31 | 2.61 | 71.91 |
| TORNADO | 56.95 | 0.41 | 57.36 |
| STORM SURGE | 43.32 | 0.00 | 43.32 |
| HAIL | 15.74 | 3.03 | 18.76 |