This analysis was done to evaluate the economic and population health impact on the United States in 2024. The NOAA storm database was used to gather data on three categories, fatalities, details and locations. The findings of this analysis may be used for resource and budget allocation across all states.
The findings of this analysis suggest that resource allocation should have high priority on execcive heat, the largest impact on population health. Furthermore, the resource and budget should be in line with the seasonality of event occurrences in a state by state manner. When considering the variable of property damage, the budget allocation should prioritize flash floods to be the most costly. With a dynamic approach to mirror the loactional seasonality, the budget and resources can be utilized in the most efficient manner. It is essential to update the changes on a yearly basis to account for global warming as a consideration for gradual large scale changes.
### Set working directory
folder_path <- "C:/Users/playe/Desktop/Data Stew/Final Proj"
### Download Packages
library(dplyr)
library(readr)
library(ggplot2)
### Define file paths for unzipped CSV files
details_file <- file.path(folder_path, "StormEvents_details", "StormEventsdetails.csv")
fatalities_file <- file.path(folder_path, "StormEvents_fatalities", "StormEventsfatalities.csv")
locations_file <- file.path(folder_path, "StormEvents_locations", "StormEventslocations.csv")
### load csv files into R
details <- read.csv(details_file)
fatalities <- read.csv(fatalities_file)
locations <- read_csv(locations_file)
### Join datasets by variable titled "EVENT_ID"
joined_data <- details %>%
left_join(locations, by = "EVENT_ID") %>%
left_join(fatalities, by = "EVENT_ID")
### Create a new csv file for housing the joined dataset
output_file <- file.path(folder_path, "StormEvents_joined_data.csv")
write_csv(joined_data, output_file)
### Question 1
# Sum health variables by event types
harm_by_event <- joined_data %>%
group_by(EVENT_TYPE) %>%
summarise(
total_deaths = sum(DEATHS_DIRECT + DEATHS_INDIRECT, na.rm = TRUE),
total_injuries = sum(INJURIES_DIRECT + INJURIES_INDIRECT, na.rm = TRUE),
total_harm = total_deaths + total_injuries
) %>%
arrange(desc(total_harm))
head(harm_by_event, 10)
### Create Results Plot for Question 1
ggplot(head(harm_by_event, 10)
, aes(x = reorder(EVENT_TYPE, -total_harm), y = total_harm)) +
geom_col(fill = "firebrick") +
labs(title = "Top 10 Event Types Harmful to Population Health",
x = "Event Type", y = "Total Harm (Deaths + Injuries)") +
theme_minimal() +
coord_flip()
### Question 2
# Calculate the event type to have highest occurance per top 10 states
event_counts <- joined_data %>%
group_by(STATE, EVENT_TYPE) %>%
summarise(event_count = n(), .groups = "drop")
### Find the most frequent event per state
most_common_events <- event_counts %>%
group_by(STATE) %>%
slice_max(event_count, n = 1)
head(most_common_events, 10)
# Create plot for most common event in the top ten states
# Get the top 10 states with most total events
top_states <- joined_data %>%
count(STATE, sort = TRUE) %>%
slice_max(n, n = 10) %>%
pull(STATE)
# Filter and get most common event per top 10 states
top_events_per_state <- joined_data %>%
filter(STATE %in% top_states) %>%
count(STATE, EVENT_TYPE, sort = TRUE) %>%
group_by(STATE) %>%
slice_max(n, n = 1) %>%
ungroup()
# Plot of results
ggplot(top_events_per_state, aes(x = reorder(STATE, n), y = n, fill = EVENT_TYPE)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(
title = "Most Common Event Type in Top 10 States by Weather Event Count",
x = "State",
y = "Number of Events",
fill = "Event Type"
) +
theme_minimal()
### Question 3
# Calculating seasonality of events through monthly occurrences
events_by_month <- joined_data %>%
group_by(EVENT_TYPE, MONTH_NAME) %>%
summarise(event_count = n(), .groups = "drop")
head(events_by_month[order(-events_by_month$event_count), ], 10)
### Questoin 4
# Calculating total event damage and consolidating results to top 10 highest property damage
damage_by_event <- joined_data %>%
mutate(DAMAGE_PROPERTY_NUMERIC = as.numeric(gsub("[^0-9]", "", DAMAGE_PROPERTY))) %>% # basic conversion
group_by(EVENT_TYPE) %>%
summarise(total_damage = sum(DAMAGE_PROPERTY_NUMERIC, na.rm = TRUE)) %>%
arrange(desc(total_damage))
head(damage_by_event, 10)
### Plot for question 4
# Creating plot to show amount of property damage by event type
top_damage_events <- damage_by_event %>%
slice_max(total_damage, n = 10)
ggplot(top_damage_events, aes(x = reorder(EVENT_TYPE, total_damage), y = total_damage)) +
geom_bar(stat = "identity", fill = "tomato") +
coord_flip() +
labs(
title = "Top 10 Weather Events by Total Property Damage (2024)",
x = "Event Type",
y = "Total Property Damage (numeric scale)",
caption = "Source: NOAA Storm Events Database"
) +
theme_minimal()
## # A tibble: 10 × 4
## EVENT_TYPE total_deaths total_injuries total_harm
## <chr> <int> <int> <int>
## 1 Excessive Heat 4042 100 4142
## 2 Tornado 235 1646 1881
## 3 Flash Flood 1422 68 1490
## 4 Heat 674 173 847
## 5 Tropical Storm 603 14 617
## 6 Thunderstorm Wind 119 172 291
## 7 Winter Storm 101 170 271
## 8 Debris Flow 193 24 217
## 9 Hurricane 37 162 199
## 10 Flood 192 0 192
## # A tibble: 10 × 3
## # Groups: STATE [10]
## STATE EVENT_TYPE event_count
## <chr> <chr> <int>
## 1 ALABAMA Thunderstorm Wind 619
## 2 ALASKA Flood 143
## 3 AMERICAN SAMOA Flash Flood 24
## 4 ARIZONA Flash Flood 235
## 5 ARKANSAS Thunderstorm Wind 339
## 6 ATLANTIC NORTH Marine Thunderstorm Wind 652
## 7 ATLANTIC SOUTH Marine Thunderstorm Wind 486
## 8 CALIFORNIA Flood 1268
## 9 COLORADO Heavy Snow 439
## 10 CONNECTICUT Thunderstorm Wind 128
## # A tibble: 52 × 3
## EVENT_TYPE MONTH_NAME event_count
## <chr> <chr> <int>
## 1 Thunderstorm Wind May 4755
## 2 Thunderstorm Wind July 4323
## 3 Thunderstorm Wind June 3960
## 4 Hail May 3275
## 5 Thunderstorm Wind August 3272
## 6 Flash Flood August 2837
## 7 Flash Flood July 2663
## 8 Flash Flood September 1911
## 9 Flash Flood May 1855
## 10 Flash Flood April 1832
## # ℹ 42 more rows
## # A tibble: 10 × 2
## EVENT_TYPE total_damage
## <chr> <dbl>
## 1 Flash Flood 25467131
## 2 Tornado 18860111
## 3 Thunderstorm Wind 17938300
## 4 Flood 4742261
## 5 Hail 4165448
## 6 High Wind 2057550
## 7 Tropical Storm 1900684
## 8 Wildfire 1537223
## 9 Lightning 1024170
## 10 Strong Wind 479788