Synopsis

This analysis explores the NOAA Storm Database to identify which types of severe weather events are most harmful to population health and have the greatest economic consequences across the United States.

Data Processing

# Load necessary packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Load the data (assuming 'storm_data.csv.bz2' is in the same directory)
storm_data <- read.csv("storm_data.csv.bz2", header = TRUE)

# Preview the structure of the dataset
str(storm_data)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
# Data cleaning and transformation (if needed)
# Example: Convert relevant columns to appropriate data types, handle missing values, etc.

# Example: Convert DATE column to Date format
storm_data$DATE <- as.Date(storm_data$BGN_DATE, format = "%m/%d/%Y")

Results

Types of Events Most Harmful to Population Health

# Calculate fatalities and injuries by event type
events_health <- storm_data %>%
  group_by(EVTYPE) %>%
  summarize(total_fatalities = sum(FATALITIES, na.rm = TRUE),
            total_injuries = sum(INJURIES, na.rm = TRUE)) %>%
  mutate(total_health_impact = total_fatalities + total_injuries) %>%
  arrange(desc(total_health_impact)) %>%
  head(10)

# Plot top 10 event types by total health impact
library(ggplot2)
ggplot(events_health, aes(x = reorder(EVTYPE, total_health_impact), y = total_health_impact)) +
  geom_bar(stat = "identity", fill = "blue") +
  labs(title = "Top 10 Event Types by Total Health Impact",
       x = "Event Type",
       y = "Total Health Impact (Fatalities + Injuries)",
       caption = "Data source: NOAA Storm Database")

Types of Events with the Greatest Economic Consequences

# Calculate property and crop damage by event type
events_economic <- storm_data %>%
  group_by(EVTYPE) %>%
  summarize(total_property_damage = sum(PROPDMG, na.rm = TRUE),
            total_crop_damage = sum(CROPDMG, na.rm = TRUE)) %>%
  mutate(total_economic_impact = total_property_damage + total_crop_damage) %>%
  arrange(desc(total_economic_impact)) %>%
  head(10)

# Plot top 10 event types by total economic impact
ggplot(events_economic, aes(x = reorder(EVTYPE, total_economic_impact), y = total_economic_impact)) +
  geom_bar(stat = "identity", fill = "green") +
  labs(title = "Top 10 Event Types by Total Economic Impact",
       x = "Event Type",
       y = "Total Economic Impact (Property + Crop Damage)",
       caption = "Data source: NOAA Storm Database")

Conclusions

This analysis of the NOAA Storm Database highlights the severe weather events that pose the highest risks to population health and have the greatest economic consequences. Municipalities and government agencies can use this information to prioritize resources and improve preparedness efforts.