Synopsis

This report explores the NOAA Storm Database to identify the types of severe weather events that are most harmful to public health and those that have the greatest economic consequences in the United States. The data spans from 1950 to November 2011 and includes fatalities, injuries, and property damage caused by various weather events. The analysis focuses on two key questions: (1) which events are most harmful to population health and (2) which have the most significant economic impact. After loading and cleaning the data, we summarize fatalities, injuries, and property damage across event types. The results reveal that tornadoes are the most harmful in terms of fatalities and injuries, while floods and hurricanes contribute most to economic damages.

Data Processing

# Load required packages
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
# Load the data
storm_data <- read.csv("repdata_data_StormData.csv.bz2")

# Check structure
str(storm_data)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
# Convert EVTYPE to uppercase for consistency
storm_data$EVTYPE <- toupper(storm_data$EVTYPE)

# Subset necessary columns
storm_data <- storm_data %>%
  select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

# Function to convert damage exponents to actual multipliers
exp_to_num <- function(exp) {
  ifelse(exp %in% c("H", "h"), 1e2,
  ifelse(exp %in% c("K", "k"), 1e3,
  ifelse(exp %in% c("M", "m"), 1e6,
  ifelse(exp %in% c("B", "b"), 1e9, 0))))
}

# Calculate actual property and crop damages
storm_data$PROPDMGEXP <- as.character(storm_data$PROPDMGEXP)
storm_data$CROPDMGEXP <- as.character(storm_data$CROPDMGEXP)

storm_data$PROPDMGVAL <- storm_data$PROPDMG * exp_to_num(storm_data$PROPDMGEXP)
storm_data$CROPDMGVAL <- storm_data$CROPDMG * exp_to_num(storm_data$CROPDMGEXP)

# Create summary data
health_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(Fatalities = sum(FATALITIES, na.rm=TRUE),
            Injuries = sum(INJURIES, na.rm=TRUE)) %>%
  arrange(desc(Fatalities + Injuries)) %>%
  top_n(10, Fatalities + Injuries)

economic_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarise(PropertyDamage = sum(PROPDMGVAL, na.rm=TRUE),
            CropDamage = sum(CROPDMGVAL, na.rm=TRUE),
            TotalDamage = PropertyDamage + CropDamage) %>%
  arrange(desc(TotalDamage)) %>%
  top_n(10, TotalDamage)

# Melt data for plotting
health_melt <- tidyr::pivot_longer(health_impact, cols = c(Fatalities, Injuries), names_to = "Type", values_to = "Count")

# Plot
ggplot(health_melt, aes(x=reorder(EVTYPE, Count), y=Count, fill=Type)) +
  geom_bar(stat="identity", position="dodge") +
  coord_flip() +
  labs(title="Top 10 Weather Events Impacting Population Health",
       x="Event Type", y="Number of People Affected") +
  theme_minimal()

# Melt data for plotting
economic_melt <- tidyr::pivot_longer(economic_impact, cols = c(PropertyDamage, CropDamage), names_to = "Type", values_to = "Damage")

# Plot
ggplot(economic_melt, aes(x=reorder(EVTYPE, Damage), y=Damage/1e9, fill=Type)) +
  geom_bar(stat="identity", position="dodge") +
  coord_flip() +
  labs(title="Top 10 Weather Events by Economic Damage",
       x="Event Type", y="Damage (in Billions USD)") +
  theme_minimal()