Synopsis

This report analyzes severe weather events in the United States using the 2025 NOAA Storm Events Database. The goal is to identify which event types are most harmful to population health, which events occur most frequently in each state,how event types vary by month, and for the final question what event type casued the most fatalities in New York State. The analysis uses three NOAA datasets—details, fatalities, and locations—which are merged using the EVENT_ID key. Fatalities, injuries, and event frequencies are summarized to identify national patterns.

Data Processing

This section describes how the raw NOAA CSV files were loaded, merged, and prepared for analysis.
The three datasets—details, fatalities, and locations—are linked by the EVENT_ID variable.

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.5.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(readr)
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.5.2

library(lubridate)

## 
## Attaching package: 'lubridate'

## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union

Loading Data

folder_path <- "C:/Users/bicki/OneDrive/Desktop/M.S. CYBER/SPRING 26/DAT 511/Final/NOAA_Storm_Data"

# File paths
details_file <- file.path(folder_path, "StormEvents_details-ftp_v1.0_d2025_c20260323.csv")
fatalities_file <- file.path(folder_path, "StormEvents_fatalities-ftp_v1.0_d2025_c20260323.csv")
locations_file <- file.path(folder_path, "StormEvents_locations-ftp_v1.0_d2025_c20260323.csv")

# Load CSVs
details <- read_csv(details_file)

## Rows: 72241 Columns: 51
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (26): STATE, MONTH_NAME, EVENT_TYPE, CZ_TYPE, CZ_NAME, WFO, BEGIN_DATE_T...
## dbl (24): BEGIN_YEARMONTH, BEGIN_DAY, BEGIN_TIME, END_YEARMONTH, END_DAY, EN...
## lgl  (1): CATEGORY
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

fatalities <- read_csv(fatalities_file)

## Rows: 895 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): FATALITY_TYPE, FATALITY_DATE, FATALITY_SEX, FATALITY_LOCATION
## dbl (6): FAT_YEARMONTH, FAT_DAY, FAT_TIME, FATALITY_ID, EVENT_ID, FATALITY_AGE
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

locations <- read_csv(locations_file)

## Rows: 51870 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): AZIMUTH, LOCATION
## dbl (9): YEARMONTH, EPISODE_ID, EVENT_ID, LOCATION_INDEX, RANGE, LATITUDE, L...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Joining Data

joined_data <- details %>%
  left_join(locations, by = "EVENT_ID") %>%
  left_join(fatalities, by = "EVENT_ID")

## Warning in left_join(., fatalities, by = "EVENT_ID"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1142 of `x` matches multiple rows in `y`.
## ℹ Row 729 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

# Save joined file
write_csv(joined_data, file.path(folder_path, "StormEvents_joined_data.csv"))

head(joined_data)

## # A tibble: 6 × 70
##   BEGIN_YEARMONTH BEGIN_DAY BEGIN_TIME END_YEARMONTH END_DAY END_TIME
##             <dbl>     <dbl>      <dbl>         <dbl>   <dbl>    <dbl>
## 1          202503        31       1104        202503      31     1106
## 2          202503        30       1552        202503      30     1555
## 3          202501         5       1800        202501       6     2227
## 4          202501         3       1300        202501       3     1900
## 5          202501         3       1300        202501       3     1900
## 6          202501         3       1300        202501       3     1900
## # ℹ 64 more variables: EPISODE_ID.x <dbl>, EVENT_ID <dbl>, STATE <chr>,
## #   STATE_FIPS <dbl>, YEAR <dbl>, MONTH_NAME <chr>, EVENT_TYPE <chr>,
## #   CZ_TYPE <chr>, CZ_FIPS <dbl>, CZ_NAME <chr>, WFO <chr>,
## #   BEGIN_DATE_TIME <chr>, CZ_TIMEZONE <chr>, END_DATE_TIME <chr>,
## #   INJURIES_DIRECT <dbl>, INJURIES_INDIRECT <dbl>, DEATHS_DIRECT <dbl>,
## #   DEATHS_INDIRECT <dbl>, DAMAGE_PROPERTY <chr>, DAMAGE_CROPS <chr>,
## #   SOURCE <chr>, MAGNITUDE <dbl>, MAGNITUDE_TYPE <chr>, FLOOD_CAUSE <chr>, …

Results

Q1: 1. Across the United States, which types of events(as indicated in the EVENT_TYPE variable) are most harmful with respect to population health?

fatality_counts <- fatalities %>%
  group_by(EVENT_ID) %>%
  summarise(FATALITIES = n())

injury_counts <- details %>%
  mutate(INJURIES = INJURIES_DIRECT + INJURIES_INDIRECT) %>%
  group_by(EVENT_ID) %>%
  summarise(INJURIES = sum(INJURIES, na.rm = TRUE))


joined_data <- details %>%
  left_join(locations, by = "EVENT_ID") %>%
  left_join(fatality_counts, by = "EVENT_ID") %>%
  left_join(injury_counts, by = "EVENT_ID")


health_impact <- joined_data %>%
  group_by(EVENT_TYPE) %>%
  summarise(
    total_fatalities = sum(FATALITIES, na.rm = TRUE),
    total_injuries = sum(INJURIES, na.rm = TRUE),
    total_health = total_fatalities + total_injuries
  ) %>%
  arrange(desc(total_health))

# Plot
ggplot(health_impact[1:10,], aes(x = reorder(EVENT_TYPE, total_health), y = total_health)) +
  geom_col(fill = "red") +
  coord_flip() +
  labs(title = "Top 10 Most Harmful Weather Events (2025)",
       x = "Event Type",
       y = "Fatalities + Injuries")

Plot 1: The plot shows the ten event types with the highest combined fatalities and injuries in 2025.

Q2: Across the United States, which types of events most happening in which States?

events_by_state <- joined_data %>%
  group_by(STATE, EVENT_TYPE) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

## `summarise()` has grouped output by 'STATE'. You can override using the
## `.groups` argument.

top_state_events <- events_by_state %>%
  group_by(STATE) %>%
  slice_max(count, n = 1)

# Plot
ggplot(top_state_events, aes(x = reorder(STATE, count), y = count, fill = EVENT_TYPE)) +
  geom_col() +
  coord_flip() +
  labs(title = "Most Frequent Event Type by State (2025)",
       x = "State",
       y = "Event Count",
       fill = "Event Type")

Plot 2: Each state’s most common weather event type is shown, highlighting a pattern of frequent event type in each state.

Q3: Which types of events are characterized by which months?

joined_data$BEGIN_DATE_TIME <- mdy_hms(joined_data$BEGIN_DATE_TIME)

## Warning: All formats failed to parse. No formats found.

joined_data$MONTH <- month(joined_data$BEGIN_DATE_TIME, label = TRUE)


events_by_month <- joined_data %>%
  group_by(MONTH, EVENT_TYPE) %>%
  summarise(count = n()) %>%
  arrange(desc(count))

## `summarise()` has grouped output by 'MONTH'. You can override using the
## `.groups` argument.

# Plot
ggplot(events_by_month, aes(x = MONTH, y = count, fill = EVENT_TYPE)) +
  geom_col() +
  labs(title = "Weather Events by Month and Type (2025)",
       x = "Month",
       y = "Event Count",
       fill = "Event Type")

Plot 3: This plot reflects on season patterns for event type by months.

Q4: Which event type was the highest cause of fatality for New York State?

ny_data <- joined_data %>%
  filter(STATE == "NEW YORK")


ny_fatal <- ny_data %>%
  group_by(EVENT_TYPE) %>%
  summarise(total_fatalities = sum(FATALITIES, na.rm = TRUE)) %>%
  arrange(desc(total_fatalities))


head(ny_fatal)

## # A tibble: 6 × 2
##   EVENT_TYPE     total_fatalities
##   <chr>                     <int>
## 1 Tornado                       6
## 2 Flash Flood                   4
## 3 Winter Weather                3
## 4 Strong Wind                   2
## 5 Excessive Heat                1
## 6 Rip Current                   1

#Plot
ggplot(ny_fatal[1:10,], aes(x = reorder(EVENT_TYPE, total_fatalities), 
                            y = total_fatalities)) +
  geom_col(fill = "blue") +
  coord_flip() +
  labs(title = "Top Fatality-Causing Weather Events in New York (2025)",
       x = "Event Type",
       y = "Total Fatalities")

Plot 4: The Bar graph cleanly illustrates the weather event related fatalities in New York State.

Conclusion

This analysis focused on the most recent available data available by NOAA(2025). The data was used, joined and analysed to answer some specific questions, what was the most harmful weather event in the United States in 2025, which caused the most fatalities. It highlights despite much advances made in early warning, and prediction models. The threat from weather related events is still a major source of danger, and requires major preparations in order to curtail its effects.

Final Project(NOAA Storm Analysis)

Bikramjit Singh

2026-03-30