This report explores the 2024 United States NOAA Storm Datasets, joining files with data pertaining to location, fatality, and event type. The primary focus on this analysis is to understand the impact of these severe weather events, particularly in respect to population health, location, and frequency. In better understanding the patters of severe weather, the United States government, and it’s people, can best prepare themselves for future events.
Through analysis, it became evident that excessive heat was the most harmful event in terms of fatalities, with a total of 4042 recorded in 2024. In addition, floods and thunderstorm winds were th most frequent events across states. In breaking the data up by month, it can be noted that various events peak across different months. Lastly, it was learned that the summer season, especially in May and July, weather events occur at a higher frequency.
This report provides valuable insights for government officials to prioritize resources for disaster preparedness and response. In order to best ensure the public’s safety, a coordinated response that has learned from past data and patterns is critical. The ability to plan ahead, by knowing which events are more likely in which months, could save precious resources.
# Load necessary libraries
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
# Define the folder path
folder_path <- "C:/Users/albin/OneDrive/Canisius/DAT 511"
# Define the file paths for the unzipped CSV files
details_file <- file.path(folder_path, "StormEvents_Details.csv")
fatalities_file <- file.path(folder_path, "StormEvents_Fatalities.csv")
locations_file <- file.path(folder_path, "StormEvents_Locations.csv")
file.exists("C:/Users/albin/OneDrive/Canisius/DAT 511/Events_Locations.csv")
## [1] FALSE
# Load the CSV files into R
details <- read_csv(details_file)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 70196 Columns: 51
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (26): STATE, MONTH_NAME, EVENT_TYPE, CZ_TYPE, CZ_NAME, WFO, BEGIN_DATE_T...
## dbl (24): BEGIN_YEARMONTH, BEGIN_DAY, BEGIN_TIME, END_YEARMONTH, END_DAY, EN...
## lgl (1): CATEGORY
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
fatalities <- read_csv(fatalities_file)
## Rows: 1047 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): FATALITY_TYPE, FATALITY_DATE, FATALITY_SEX, FATALITY_LOCATION
## dbl (7): FAT_YEARMONTH, FAT_DAY, FAT_TIME, FATALITY_ID, EVENT_ID, FATALITY_A...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
locations <- read_csv(locations_file)
## Rows: 48112 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): AZIMUTH, LOCATION
## dbl (9): YEARMONTH, EPISODE_ID, EVENT_ID, LOCATION_INDEX, RANGE, LATITUDE, L...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Join the datasets by EVENT_ID
joined_data <- details %>%
left_join(locations, by = "EVENT_ID") %>%
left_join(fatalities, by = "EVENT_ID")
## Warning in left_join(., fatalities, by = "EVENT_ID"): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 830 of `x` matches multiple rows in `y`.
## ℹ Row 441 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
# Save the joined data to a new CSV file
output_file <- file.path(folder_path, "StormEvents_joined_data.csv")
write_csv(joined_data, output_file)
# Inform the user
message("Joined data saved to: ", output_file)
## Joined data saved to: C:/Users/albin/OneDrive/Canisius/DAT 511/StormEvents_joined_data.csv
# Optional: View the first few rows of the joined data
print(head(joined_data))
## # A tibble: 6 × 71
## BEGIN_YEARMONTH BEGIN_DAY BEGIN_TIME END_YEARMONTH END_DAY END_TIME
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 202405 23 1947 202405 23 1947
## 2 202411 16 230 202411 18 1421
## 3 202405 19 1839 202405 19 1902
## 4 202405 23 2155 202405 23 2155
## 5 202405 24 1405 202405 24 1410
## 6 202411 1 0 202411 1 1600
## # ℹ 65 more variables: EPISODE_ID.x <dbl>, EVENT_ID <dbl>, STATE <chr>,
## # STATE_FIPS <dbl>, YEAR <dbl>, MONTH_NAME <chr>, EVENT_TYPE <chr>,
## # CZ_TYPE <chr>, CZ_FIPS <dbl>, CZ_NAME <chr>, WFO <chr>,
## # BEGIN_DATE_TIME <chr>, CZ_TIMEZONE <chr>, END_DATE_TIME <chr>,
## # INJURIES_DIRECT <dbl>, INJURIES_INDIRECT <dbl>, DEATHS_DIRECT <dbl>,
## # DEATHS_INDIRECT <dbl>, DAMAGE_PROPERTY <chr>, DAMAGE_CROPS <chr>,
## # SOURCE <chr>, MAGNITUDE <dbl>, MAGNITUDE_TYPE <chr>, FLOOD_CAUSE <chr>, …
health_impact <- joined_data %>%
group_by(EVENT_TYPE) %>%
summarise(
total_fatalities = sum(DEATHS_DIRECT, DEATHS_INDIRECT, na.rm = TRUE),
total_injuries = sum(INJURIES_DIRECT, INJURIES_INDIRECT, na.rm = TRUE),
total_harmed = total_fatalities + total_injuries
) %>%
arrange(desc(total_harmed))
# Show top 10 most harmful event types
head(health_impact, 10)
## # A tibble: 10 × 4
## EVENT_TYPE total_fatalities total_injuries total_harmed
## <chr> <dbl> <dbl> <dbl>
## 1 Excessive Heat 4042 100 4142
## 2 Tornado 235 1646 1881
## 3 Flash Flood 1422 68 1490
## 4 Heat 674 173 847
## 5 Tropical Storm 603 14 617
## 6 Thunderstorm Wind 119 172 291
## 7 Winter Storm 101 170 271
## 8 Debris Flow 193 24 217
## 9 Hurricane 37 162 199
## 10 Flood 192 0 192
event_counts_by_state <- joined_data %>%
group_by(STATE, EVENT_TYPE) %>%
summarise(event_count = n(), .groups = 'drop') %>%
arrange(desc(event_count))
# View top 10 most frequent event types by state
head(event_counts_by_state, 10)
## # A tibble: 10 × 3
## STATE EVENT_TYPE event_count
## <chr> <chr> <int>
## 1 TEXAS Flash Flood 1654
## 2 TEXAS Hail 1613
## 3 CALIFORNIA Flood 1268
## 4 NORTH CAROLINA Flash Flood 1175
## 5 KANSAS Thunderstorm Wind 1081
## 6 PENNSYLVANIA Thunderstorm Wind 1039
## 7 GEORGIA Flash Flood 1037
## 8 NEW YORK Thunderstorm Wind 1036
## 9 TEXAS Thunderstorm Wind 1020
## 10 GEORGIA Thunderstorm Wind 1017
library(dplyr)
library(ggplot2)
# Summarize the number of events by MONTH_NAME
events_by_month <- joined_data %>%
group_by(MONTH_NAME) %>%
summarise(event_count = n(), .groups = "drop")
# Ensure correct month ordering
month_levels <- c("January", "February", "March", "April", "May", "June",
"July", "August", "September", "October", "November", "December")
events_by_month$MONTH_NAME <- factor(events_by_month$MONTH_NAME, levels = month_levels)
# Bar graph of events by month
ggplot(events_by_month, aes(x = MONTH_NAME, y = event_count, fill = MONTH_NAME)) +
geom_bar(stat = "identity") +
labs(title = "Number of Events by Month", x = "Month", y = "Number of Events") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
# Load necessary libraries
library(dplyr)
# Group by month and event type, and calculate the count of each event type
event_month_summary <- joined_data %>%
group_by(MONTH_NAME, EVENT_TYPE) %>%
summarise(event_count = n(), .groups = 'drop') %>%
arrange(MONTH_NAME, desc(event_count))
# For each month, select the event type with the highest count
most_common_event_per_month <- event_month_summary %>%
group_by(MONTH_NAME) %>%
slice_max(event_count, n = 1) %>%
select(MONTH_NAME, EVENT_TYPE, event_count)
# Print the most common event for each month
print(most_common_event_per_month)
## # A tibble: 12 × 3
## # Groups: MONTH_NAME [12]
## MONTH_NAME EVENT_TYPE event_count
## <chr> <chr> <int>
## 1 April Flash Flood 1832
## 2 August Thunderstorm Wind 3272
## 3 December Winter Weather 581
## 4 February Flood 734
## 5 January Winter Weather 1593
## 6 July Thunderstorm Wind 4323
## 7 June Thunderstorm Wind 3960
## 8 March Hail 849
## 9 May Thunderstorm Wind 4755
## 10 November Flash Flood 902
## 11 October Drought 612
## 12 September Flash Flood 1911
# Most common event -top 10
# Load necessary libraries
library(dplyr)
# Count the occurrences of each event type
event_type_summary <- joined_data %>%
group_by(EVENT_TYPE) %>%
summarise(event_count = n(), .groups = 'drop') %>%
arrange(desc(event_count))
# Get the top 10 most common event types
top_10_event_types <- event_type_summary %>%
slice_head(n = 10)
# Print the top 10 most common event types
print(top_10_event_types)
## # A tibble: 10 × 2
## EVENT_TYPE event_count
## <chr> <int>
## 1 Thunderstorm Wind 20628
## 2 Flash Flood 15620
## 3 Hail 9099
## 4 Flood 8582
## 5 High Wind 3994
## 6 Drought 3570
## 7 Winter Weather 3564
## 8 Tornado 3248
## 9 Heat 3202
## 10 Excessive Heat 2717