This analysis uses 2024 data from the U.S. National Oceanic and Atmospheric Administration (NOAA) Storm Events Database, which documents major storms and weather hazards across the United States. The database includes information on when and where events occur, along with estimates of any fatalities, injuries, and property damage. The purpose of this analysis is to support government and municipal managers responsible for preparing for severe weather by identifying the most harmful storm events to the population, determining the frequency of these events across states and months, and identifying which counties are harmed the most.
Database: U.S. National Oceanic and Atmospheric Administration (NOAA) Storm Events Database
Source:
https://www.ncei.noaa.gov/pub/data/swdi/stormevents/csvfiles/
Downloads:
Copy and paste the source link into your web browser to access the datasets. The link will lead you directly to the NOAA Storm Events Database.
Find and click to download the datasets listed above for Details, Fatalities, and Locations.
Find the downloaded datasets by going to where your downloads are usually stored.
Move the unzipped files to your preferred document location.
Right click on each zipped file and select “Extract All…” to utilize its contents.
required_packages <- c("dplyr", "tidyr", "readr","ggplot2", "knitr")
for(pkg in required_packages) {
if(!require(pkg, character.only = TRUE)) {
cat(paste("Installing", pkg, "...\n"))
install.packages(pkg, repos = "https://cran.r-project.org")
library(pkg, character.only = TRUE)
} else {
cat(paste(pkg, "version", packageVersion(pkg), "loaded\n"))
}
}
dplyr version 1.1.4 loaded
tidyr version 1.3.1 loaded
readr version 2.1.5 loaded
ggplot2 version 4.0.1 loaded
knitr version 1.50 loaded
folder_path <- "C:/Users/kelse/OneDrive/Documents/DAT511/Final Project"
details_file <- file.path(folder_path, "StormEvents_details-ftp_v1.0_d2024_c20251118.csv")
fatalities_file <- file.path(folder_path, "StormEvents_fatalities-ftp_v1.0_d2024_c20251118.csv")
locations_file <- file.path(folder_path, "StormEvents_locations-ftp_v1.0_d2024_c20251118.csv")
details <- read_csv(details_file)
fatalities <- read_csv(fatalities_file)
locations <- read_csv(locations_file)
joined_data <- details %>%
left_join(locations, by = "EVENT_ID") %>%
left_join(fatalities, by = "EVENT_ID")
output_file <- file.path(folder_path, "StormEvents_joined_data.csv")
write_csv(joined_data, output_file)
message("Joined data saved to: ", output_file)
Joined data saved to: C:/Users/kelse/OneDrive/Documents/DAT511/Final Project/StormEvents_joined_data.csv
Combine total number of injuries and deaths into one column
storm <- joined_data
storm$injuries <- storm$INJURIES_DIRECT + storm$INJURIES_INDIRECT
storm$deaths <- storm$DEATHS_DIRECT + storm$DEATHS_INDIRECT
storm$health <- storm$injuries + storm$deaths
Create table for storm event rankings
event_summary <- storm %>%
group_by(EVENT_TYPE) %>%
summarize(
total_inj = sum(injuries, na.rm = TRUE),
total_deaths = sum(deaths, na.rm = TRUE),
total_health_impact = sum(health, na.rm = TRUE)
)
event_ranking <- event_summary %>%
arrange(desc(total_health_impact))
Isolate top ten most harmful storm events
top10_events <- head(event_ranking, 10)
Create a bar chart for top ten events
ggplot(top10_events, aes(x = reorder(EVENT_TYPE, total_health_impact),
y = total_health_impact)) +
geom_col(color = "black", fill = "black") +
coord_flip() +
labs(title = "Top 10 Harmful Storm Events on Population Health",
x = "Event Type",
y = "Injuries & Deaths") +
theme_minimal()
Summary count of storm event type per state/territory
events_by_state <- joined_data %>%
group_by(STATE, EVENT_TYPE) %>%
summarize(count = n())
## `summarise()` has grouped output by 'STATE'. You can override using the
## `.groups` argument.
Storm event type with the highest count in each state/territory
most_common_event_by_state <- events_by_state %>%
group_by(STATE) %>%
filter(count == max(count))
Summary table of most frequent storm event types by state/territory
kable(
most_common_event_by_state,
caption = "Most Frequent Storm Event Type by State/Territory",
col.names = c("State", "Event Type", "Event Count"),
align = c("l", "l", "r")
)
| State | Event Type | Event Count |
|---|---|---|
| ALABAMA | Thunderstorm Wind | 619 |
| ALASKA | Flood | 143 |
| AMERICAN SAMOA | Flash Flood | 24 |
| ARIZONA | Flash Flood | 235 |
| ARKANSAS | Thunderstorm Wind | 339 |
| ATLANTIC NORTH | Marine Thunderstorm Wind | 652 |
| ATLANTIC SOUTH | Marine Thunderstorm Wind | 485 |
| CALIFORNIA | Flood | 1268 |
| COLORADO | Heavy Snow | 439 |
| CONNECTICUT | Thunderstorm Wind | 128 |
| DELAWARE | Thunderstorm Wind | 21 |
| DISTRICT OF COLUMBIA | Thunderstorm Wind | 11 |
| E PACIFIC | Waterspout | 9 |
| FLORIDA | Flash Flood | 762 |
| GEORGIA | Thunderstorm Wind | 1014 |
| GUAM | Drought | 27 |
| GUAM WATERS | Marine Strong Wind | 4 |
| GULF OF ALASKA | Marine Thunderstorm Wind | 4 |
| GULF OF MEXICO | Marine Thunderstorm Wind | 715 |
| HAWAII | High Surf | 77 |
| IDAHO | Thunderstorm Wind | 273 |
| ILLINOIS | Thunderstorm Wind | 981 |
| INDIANA | Thunderstorm Wind | 428 |
| IOWA | Thunderstorm Wind | 660 |
| KANSAS | Thunderstorm Wind | 1043 |
| KENTUCKY | Thunderstorm Wind | 829 |
| LAKE ERIE | Marine Thunderstorm Wind | 92 |
| LAKE HURON | Marine Thunderstorm Wind | 19 |
| LAKE MICHIGAN | Marine Thunderstorm Wind | 117 |
| LAKE ONTARIO | Marine Thunderstorm Wind | 28 |
| LAKE ST CLAIR | Marine Thunderstorm Wind | 31 |
| LAKE SUPERIOR | Marine Thunderstorm Wind | 35 |
| LOUISIANA | Flash Flood | 788 |
| MAINE | Flood | 151 |
| MARYLAND | Thunderstorm Wind | 202 |
| MASSACHUSETTS | Thunderstorm Wind | 127 |
| MICHIGAN | Thunderstorm Wind | 264 |
| MINNESOTA | Thunderstorm Wind | 366 |
| MISSISSIPPI | Thunderstorm Wind | 426 |
| MISSOURI | Thunderstorm Wind | 740 |
| MONTANA | High Wind | 435 |
| NEBRASKA | Thunderstorm Wind | 609 |
| NEVADA | High Wind | 245 |
| NEW HAMPSHIRE | Flash Flood | 168 |
| NEW JERSEY | Thunderstorm Wind | 262 |
| NEW MEXICO | Drought | 375 |
| NEW YORK | Thunderstorm Wind | 1033 |
| NORTH CAROLINA | Flash Flood | 1047 |
| NORTH DAKOTA | Hail | 175 |
| OHIO | Thunderstorm Wind | 645 |
| OKLAHOMA | Heat | 912 |
| OREGON | Winter Storm | 81 |
| PENNSYLVANIA | Thunderstorm Wind | 975 |
| PUERTO RICO | Flash Flood | 572 |
| RHODE ISLAND | Flash Flood | 33 |
| SOUTH CAROLINA | Flash Flood | 533 |
| SOUTH DAKOTA | Thunderstorm Wind | 502 |
| ST LAWRENCE R | Marine Thunderstorm Wind | 3 |
| TENNESSEE | Thunderstorm Wind | 509 |
| TEXAS | Hail | 1602 |
| UTAH | Flash Flood | 239 |
| VERMONT | Flash Flood | 185 |
| VIRGIN ISLANDS | Excessive Heat | 14 |
| VIRGINIA | Flash Flood | 799 |
| WASHINGTON | Heavy Snow | 90 |
| WEST VIRGINIA | Thunderstorm Wind | 518 |
| WISCONSIN | Thunderstorm Wind | 349 |
| WYOMING | High Wind | 271 |
Summary table of storm event count per month
events_per_month_event <- joined_data %>%
group_by(MONTH_NAME, EVENT_TYPE) %>%
summarize(event_count = n(), .groups = "drop") %>%
mutate(MONTH_NAME = factor(MONTH_NAME,
levels = c("January", "February", "March", "April",
"May", "June", "July", "August",
"September", "October", "November", "December"))) %>%
arrange(MONTH_NAME, desc(event_count))
Summary table of most frequent storm event per month
top_event_each_month <- events_per_month_event %>%
group_by(MONTH_NAME) %>%
arrange(desc(event_count)) %>%
slice(1)
Bar chart of most frequent storm event per month
ggplot(top_event_each_month, aes(x = MONTH_NAME, y = event_count, fill = EVENT_TYPE)) +
geom_col() +
labs(
title = "Most Frequent Storm Event Type Per Month",
x = "Month",
y = "Event Count",
fill = "Event Type"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
Filter county FIPS number for only county rows
county_data <- joined_data %>%
filter(CZ_TYPE == "C")
Create unique county FIPS number by including state FIPS number
county_data <- county_data %>%
mutate(
county_fips = sprintf("%02d%03d", STATE_FIPS, CZ_FIPS)
)
Combine total number of injuries and deaths into one column
county_data <- county_data %>%
mutate(
injuries_county = INJURIES_DIRECT + INJURIES_INDIRECT,
deaths_county = DEATHS_DIRECT + DEATHS_INDIRECT,
health_county = injuries_county + deaths_county
)
Total storm harm (injuries and deaths) by county
harm_by_county <- county_data %>%
group_by(county_fips, STATE, CZ_NAME) %>%
summarise(
total_injuries = sum(injuries_county, na.rm = TRUE),
total_deaths = sum(deaths_county, na.rm = TRUE),
total_harm = sum(health_county, na.rm = TRUE),
.groups = "drop"
) %>%
arrange(desc(total_harm))
Bar chart of top ten harmed counties
top10_counties <- harm_by_county %>%
arrange(desc(total_harm)) %>%
head(10)
ggplot(top10_counties,
aes(x = reorder(paste(CZ_NAME, STATE, sep = ", "), total_harm),
y = total_harm)) +
geom_col(fill = "red") +
coord_flip() +
labs(
title = "Top 10 Most Harmed Counties in the U.S.",
x = "County",
y = "Total Injuries & Deaths"
) +
theme_minimal()