Synopsis

This analysis explores the NOAA Storm Database to determine the types of severe weather events that are most harmful to population health and those that have the greatest economic consequences across the United States. The analysis includes data processing steps, summary statistics, and visualizations to identify the most impactful event types.

# Load required packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

# Download and load the dataset
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
file <- "StormData.csv.bz2"
if (!file.exists(file)) {
  download.file(url, file)
}

# Read the data
storm_data <- read.csv(bzfile(file))

# Display the structure of the data
str(storm_data)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
# Select relevant columns
storm_data <- storm_data %>%
  select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

# Convert property and crop damage exponents to numeric
convert_exp <- function(exp) {
  if (is.na(exp)) return(1)
  exp <- toupper(exp)
  if (exp == "K") return(1e3)
  if (exp == "M") return(1e6)
  if (exp == "B") return(1e9)
  if (exp %in% c("", "0", "+", "-", "?", "H")) return(1)
  return(as.numeric(exp))
}

# Handle conversion warnings
storm_data <- storm_data %>%
  mutate(PROPDMGEXP = sapply(PROPDMGEXP, convert_exp),
         CROPDMGEXP = sapply(CROPDMGEXP, convert_exp),
         PROPDMG = PROPDMG * PROPDMGEXP,
         CROPDMG = CROPDMG * CROPDMGEXP)

# Summarize the data by event type
health_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarize(TotalFatalities = sum(FATALITIES, na.rm = TRUE),
            TotalInjuries = sum(INJURIES, na.rm = TRUE)) %>%
  arrange(desc(TotalFatalities), desc(TotalInjuries))

economic_impact <- storm_data %>%
  group_by(EVTYPE) %>%
  summarize(TotalPropDamage = sum(PROPDMG, na.rm = TRUE),
            TotalCropDamage = sum(CROPDMG, na.rm = TRUE),
            TotalDamage = TotalPropDamage + TotalCropDamage) %>%
  arrange(desc(TotalDamage))

Results

Events Most Harmful to Population Health

# Plot fatalities and injuries by event type
top_health_impact <- health_impact %>%
  top_n(10, wt = TotalFatalities)

ggplot(top_health_impact, aes(x = reorder(EVTYPE, -TotalFatalities), y = TotalFatalities)) +
  geom_bar(stat = "identity", fill = "red") +
  geom_text(aes(label = TotalFatalities), vjust = -0.5) +
  labs(title = "Top 10 Events Causing Fatalities", x = "Event Type", y = "Total Fatalities") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

top_injuries_impact <- health_impact %>%
  top_n(10, wt = TotalInjuries)

ggplot(top_injuries_impact, aes(x = reorder(EVTYPE, -TotalInjuries), y = TotalInjuries)) +
  geom_bar(stat = "identity", fill = "blue") +
  geom_text(aes(label = TotalInjuries), vjust = -0.5) +
  labs(title = "Top 10 Events Causing Injuries", x = "Event Type", y = "Total Injuries") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Events with Greatest Economic Consequences

# Plot property and crop damage by event type
top_economic_impact <- economic_impact %>%
  top_n(10, wt = TotalDamage)

ggplot(top_economic_impact, aes(x = reorder(EVTYPE, -TotalDamage), y = TotalDamage)) +
  geom_bar(stat = "identity", fill = "green") +
  geom_text(aes(label = TotalDamage), vjust = -0.5) +
  labs(title = "Top 10 Events with Greatest Economic Consequences", x = "Event Type", y = "Total Damage (USD)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Conclusion

The analysis shows that tornadoes are the most harmful event type concerning population health, causing the highest number of fatalities and injuries. In terms of economic consequences, floods lead to the greatest property and crop damage combined.