This analysis was done on across multiple variables to establish overall risk association with weather events. The findings suggest that events affecting population health and economic damage are not correlated items. This suggest that specific events can be assumed to impact one variable more than another although accept ions like excessive heat can potentially be tied to events like wild fires. Furthermore, the data suggests a seasonality of events typically spiking in the summer months of July and August observing Thunderstorms and Flash Floods, noteably May having large occerances as well. When considering the variable of property damage, Flash floods are the clear number one largest impact with tornados as a not so close second (7 million less). Overall, we can take away from this that the top three largest impacting events are Flash Flood, Thunderstorm and Tornado.
### Set working directory
folder_path <- "C:/Users/playe/Desktop/Data Stew/Final Proj"
### Download Packages
library(dplyr)
library(readr)
library(ggplot2)
### Define file paths for unzipped CSV files
details_file <- file.path(folder_path, "StormEvents_details", "StormEventsdetails.csv")
fatalities_file <- file.path(folder_path, "StormEvents_fatalities", "StormEventsfatalities.csv")
locations_file <- file.path(folder_path, "StormEvents_locations", "StormEventslocations.csv")
### load csv files into R
details <- read.csv(details_file)
fatalities <- read.csv(fatalities_file)
locations <- read_csv(locations_file)
### Join datasets by EVENT_ID
joined_data <- details %>%
left_join(locations, by = "EVENT_ID") %>%
left_join(fatalities, by = "EVENT_ID")
### Save the joined data to a new CSV file
output_file <- file.path(folder_path, "StormEvents_joined_data.csv")
write_csv(joined_data, output_file)
message(“Joined data saved to:”, output_file)
### Across the United States, which types of events are most harmful with respect to population health?
harm_by_event <- joined_data %>%
group_by(EVENT_TYPE) %>%
summarise(
total_deaths = sum(DEATHS_DIRECT + DEATHS_INDIRECT, na.rm = TRUE),
total_injuries = sum(INJURIES_DIRECT + INJURIES_INDIRECT, na.rm = TRUE),
total_harm = total_deaths + total_injuries
) %>%
arrange(desc(total_harm))
head(harm_by_event, 10)
## # A tibble: 10 × 4
## EVENT_TYPE total_deaths total_injuries total_harm
## <chr> <int> <int> <int>
## 1 Excessive Heat 4042 100 4142
## 2 Tornado 235 1646 1881
## 3 Flash Flood 1422 68 1490
## 4 Heat 674 173 847
## 5 Tropical Storm 603 14 617
## 6 Thunderstorm Wind 119 172 291
## 7 Winter Storm 101 170 271
## 8 Debris Flow 193 24 217
## 9 Hurricane 37 162 199
## 10 Flood 192 0 192
### Across the United States which type events happen most per state?
event_counts <- joined_data %>%
group_by(STATE, EVENT_TYPE) %>%
summarise(event_count = n(), .groups = "drop")
### Find the most frequent event per state
most_common_events <- event_counts %>%
group_by(STATE) %>%
slice_max(event_count, n = 1)
head(most_common_events, 10)
## # A tibble: 10 × 3
## # Groups: STATE [10]
## STATE EVENT_TYPE event_count
## <chr> <chr> <int>
## 1 ALABAMA Thunderstorm Wind 619
## 2 ALASKA Flood 143
## 3 AMERICAN SAMOA Flash Flood 24
## 4 ARIZONA Flash Flood 235
## 5 ARKANSAS Thunderstorm Wind 339
## 6 ATLANTIC NORTH Marine Thunderstorm Wind 652
## 7 ATLANTIC SOUTH Marine Thunderstorm Wind 486
## 8 CALIFORNIA Flood 1268
## 9 COLORADO Heavy Snow 439
## 10 CONNECTICUT Thunderstorm Wind 128
### Which type of events are characterized by which months?
events_by_month <- joined_data %>%
group_by(EVENT_TYPE, MONTH_NAME) %>%
summarise(event_count = n(), .groups = "drop")
head(events_by_month[order(-events_by_month$event_count), ], 10)
## # A tibble: 10 × 3
## EVENT_TYPE MONTH_NAME event_count
## <chr> <chr> <int>
## 1 Thunderstorm Wind May 4755
## 2 Thunderstorm Wind July 4323
## 3 Thunderstorm Wind June 3960
## 4 Hail May 3275
## 5 Thunderstorm Wind August 3272
## 6 Flash Flood August 2837
## 7 Flash Flood July 2663
## 8 Flash Flood September 1911
## 9 Flash Flood May 1855
## 10 Flash Flood April 1832
### Which event does the most property damage?
damage_by_event <- joined_data %>%
mutate(DAMAGE_PROPERTY_NUMERIC = as.numeric(gsub("[^0-9]", "", DAMAGE_PROPERTY))) %>% # basic conversion
group_by(EVENT_TYPE) %>%
summarise(total_damage = sum(DAMAGE_PROPERTY_NUMERIC, na.rm = TRUE)) %>%
arrange(desc(total_damage))
head(damage_by_event, 10)
## # A tibble: 10 × 2
## EVENT_TYPE total_damage
## <chr> <dbl>
## 1 Flash Flood 25467131
## 2 Tornado 18860111
## 3 Thunderstorm Wind 17938300
## 4 Flood 4742261
## 5 Hail 4165448
## 6 High Wind 2057550
## 7 Tropical Storm 1900684
## 8 Wildfire 1537223
## 9 Lightning 1024170
## 10 Strong Wind 479788
top_damage_events <- damage_by_event %>%
slice_max(total_damage, n = 10)
ggplot(top_damage_events, aes(x = reorder(EVENT_TYPE, total_damage), y = total_damage)) +
geom_bar(stat = "identity", fill = "tomato") +
coord_flip() +
labs(
title = "Top 10 Weather Events by Total Property Damage (2024)",
x = "Event Type",
y = "Total Property Damage (numeric scale)",
caption = "Source: NOAA Storm Events Database"
) +
theme_minimal()