Synopsis

This analysis was done on across multiple variables to establish overall risk association with weather events. The findings suggest that events affecting population health and economic damage are not correlated items. This suggest that specific events can be assumed to impact one variable more than another although accept ions like excessive heat can potentially be tied to events like wild fires. Furthermore, the data suggests a seasonality of events typically spiking in the summer months of July and August observing Thunderstorms and Flash Floods, noteably May having large occerances as well. When considering the variable of property damage, Flash floods are the clear number one largest impact with tornados as a not so close second (7 million less). Overall, we can take away from this that the top three largest impacting events are Flash Flood, Thunderstorm and Tornado.

Data Processing

### Set working directory
folder_path <- "C:/Users/playe/Desktop/Data Stew/Final Proj"

### Download Packages
library(dplyr)
library(readr)
library(ggplot2)

### Define file paths for unzipped CSV files
details_file <- file.path(folder_path, "StormEvents_details", "StormEventsdetails.csv")
fatalities_file <- file.path(folder_path, "StormEvents_fatalities", "StormEventsfatalities.csv")
locations_file <- file.path(folder_path, "StormEvents_locations", "StormEventslocations.csv")

### load csv files into R
details <- read.csv(details_file)
fatalities <- read.csv(fatalities_file)
locations <- read_csv(locations_file)

### Join datasets by EVENT_ID
joined_data <- details %>%
  left_join(locations, by = "EVENT_ID") %>%
  left_join(fatalities, by = "EVENT_ID")

### Save the joined data to a new CSV file
output_file <- file.path(folder_path, "StormEvents_joined_data.csv")
write_csv(joined_data, output_file)

Inform the user where the file was saved to

message(“Joined data saved to:”, output_file)

Results

Question 1

### Across the United States, which types of events are most harmful with respect to population health?
harm_by_event <- joined_data %>%
  group_by(EVENT_TYPE) %>%
  summarise(
    total_deaths = sum(DEATHS_DIRECT + DEATHS_INDIRECT, na.rm = TRUE),
    total_injuries = sum(INJURIES_DIRECT + INJURIES_INDIRECT, na.rm = TRUE),
    total_harm = total_deaths + total_injuries
  ) %>%
  arrange(desc(total_harm))

head(harm_by_event, 10)
## # A tibble: 10 × 4
##    EVENT_TYPE        total_deaths total_injuries total_harm
##    <chr>                    <int>          <int>      <int>
##  1 Excessive Heat            4042            100       4142
##  2 Tornado                    235           1646       1881
##  3 Flash Flood               1422             68       1490
##  4 Heat                       674            173        847
##  5 Tropical Storm             603             14        617
##  6 Thunderstorm Wind          119            172        291
##  7 Winter Storm               101            170        271
##  8 Debris Flow                193             24        217
##  9 Hurricane                   37            162        199
## 10 Flood                      192              0        192

Question 2

### Across the United States which type events happen most per state?

event_counts <- joined_data %>%
  group_by(STATE, EVENT_TYPE) %>%
  summarise(event_count = n(), .groups = "drop")

### Find the most frequent event per state
most_common_events <- event_counts %>%
  group_by(STATE) %>%
  slice_max(event_count, n = 1)

head(most_common_events, 10)
## # A tibble: 10 × 3
## # Groups:   STATE [10]
##    STATE          EVENT_TYPE               event_count
##    <chr>          <chr>                          <int>
##  1 ALABAMA        Thunderstorm Wind                619
##  2 ALASKA         Flood                            143
##  3 AMERICAN SAMOA Flash Flood                       24
##  4 ARIZONA        Flash Flood                      235
##  5 ARKANSAS       Thunderstorm Wind                339
##  6 ATLANTIC NORTH Marine Thunderstorm Wind         652
##  7 ATLANTIC SOUTH Marine Thunderstorm Wind         486
##  8 CALIFORNIA     Flood                           1268
##  9 COLORADO       Heavy Snow                       439
## 10 CONNECTICUT    Thunderstorm Wind                128

Question 3

### Which type of events are characterized by which months?

events_by_month <- joined_data %>%
  group_by(EVENT_TYPE, MONTH_NAME) %>%
  summarise(event_count = n(), .groups = "drop")

head(events_by_month[order(-events_by_month$event_count), ], 10)
## # A tibble: 10 × 3
##    EVENT_TYPE        MONTH_NAME event_count
##    <chr>             <chr>            <int>
##  1 Thunderstorm Wind May               4755
##  2 Thunderstorm Wind July              4323
##  3 Thunderstorm Wind June              3960
##  4 Hail              May               3275
##  5 Thunderstorm Wind August            3272
##  6 Flash Flood       August            2837
##  7 Flash Flood       July              2663
##  8 Flash Flood       September         1911
##  9 Flash Flood       May               1855
## 10 Flash Flood       April             1832

Question 4

### Which event does the most property damage?
damage_by_event <- joined_data %>%
  mutate(DAMAGE_PROPERTY_NUMERIC = as.numeric(gsub("[^0-9]", "", DAMAGE_PROPERTY))) %>%  # basic conversion
  group_by(EVENT_TYPE) %>%
  summarise(total_damage = sum(DAMAGE_PROPERTY_NUMERIC, na.rm = TRUE)) %>%
  arrange(desc(total_damage))

head(damage_by_event, 10)
## # A tibble: 10 × 2
##    EVENT_TYPE        total_damage
##    <chr>                    <dbl>
##  1 Flash Flood           25467131
##  2 Tornado               18860111
##  3 Thunderstorm Wind     17938300
##  4 Flood                  4742261
##  5 Hail                   4165448
##  6 High Wind              2057550
##  7 Tropical Storm         1900684
##  8 Wildfire               1537223
##  9 Lightning              1024170
## 10 Strong Wind             479788

Plots

top_damage_events <- damage_by_event %>%
  slice_max(total_damage, n = 10)

ggplot(top_damage_events, aes(x = reorder(EVENT_TYPE, total_damage), y = total_damage)) +
  geom_bar(stat = "identity", fill = "tomato") +
  coord_flip() +
  labs(
    title = "Top 10 Weather Events by Total Property Damage (2024)",
    x = "Event Type",
    y = "Total Property Damage (numeric scale)",
    caption = "Source: NOAA Storm Events Database"
  ) +
  theme_minimal()