Introduction

This analysis investigates the impacts of various types of severe weather events across the United States. The focus is on:

  1. Identifying the most harmful weather events with respect to population health.
  2. Assessing which events have the greatest economic consequences.

This report provides insights on how resources can be prioritized to mitigate the damage caused by these events.

Data Processing

We begin by loading the raw data from the NOAA Storm Database and processing it to extract relevant information. The dataset contains information on severe weather events, including fatalities, injuries, and economic damage.

# Load necessary libraries
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
# Load the dataset
storm_data <- read.csv("C:/Users/medin/Downloads/repdata_data_StormData.csv/repdata_data_StormData.csv")

# Display the first few rows of the data
head(storm_data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6
# Results Section

### Most Harmful Events to Population Health

# Aggregate fatalities and injuries by event type
health_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(total_fatalities = sum(FATALITIES, na.rm = TRUE),
            total_injuries = sum(INJURIES, na.rm = TRUE)) %>%
  arrange(desc(total_fatalities + total_injuries))

# Select top 10 most harmful events
top_health_impact <- health_impact[1:10,]

# Plot the most harmful events
ggplot(top_health_impact, aes(x = reorder(EVTYPE, total_fatalities + total_injuries), 
                             y = total_fatalities + total_injuries)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  labs(title = "Top 10 Most Harmful Severe Weather Events (Population Health)",
       x = "Event Type", y = "Total Impact (Fatalities + Injuries)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

### Most Harmful Events to Economic Impact

# Convert the damage exponent columns to numeric values for scaling
storm_data$PROPDMGEXP <- toupper(storm_data$PROPDMGEXP)
storm_data$CROPDMGEXP <- toupper(storm_data$CROPDMGEXP)

# Convert the damage columns to appropriate scales
storm_data$property_damage <- storm_data$PROPDMG * 
  recode(storm_data$PROPDMGEXP, "K" = 1e3, "M" = 1e6, "B" = 1e9, .default = 1)
storm_data$crop_damage <- storm_data$CROPDMG * 
  recode(storm_data$CROPDMGEXP, "K" = 1e3, "M" = 1e6, "B" = 1e9, .default = 1)

# Aggregate economic damage by event type
economic_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(total_property_damage = sum(property_damage, na.rm = TRUE),
            total_crop_damage = sum(crop_damage, na.rm = TRUE)) %>%
  arrange(desc(total_property_damage + total_crop_damage))

# Select top 10 events with greatest economic impact
top_economic_impact <- economic_impact[1:10,]

# Plot economic impact
ggplot(top_economic_impact, aes(x = reorder(EVTYPE, total_property_damage + total_crop_damage), 
                                y = total_property_damage + total_crop_damage)) +
  geom_bar(stat = "identity", fill = "tomato") +
  labs(title = "Top 10 Severe Weather Events with Greatest Economic Impact",
       x = "Event Type", y = "Total Economic Impact (Property + Crop Damage)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))