This report explores the NOAA Storm Database to identify the types of severe weather events that are most harmful to public health and those that have the greatest economic consequences in the United States. The data spans from 1950 to November 2011 and includes fatalities, injuries, and property damage caused by various weather events. The analysis focuses on two key questions: (1) which events are most harmful to population health and (2) which have the most significant economic impact. After loading and cleaning the data, we summarize fatalities, injuries, and property damage across event types. The results reveal that tornadoes are the most harmful in terms of fatalities and injuries, while floods and hurricanes contribute most to economic damages.
# Load required packages
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
# Load the data
storm_data <- read.csv("repdata_data_StormData.csv.bz2")
# Check structure
str(storm_data)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
# Convert EVTYPE to uppercase for consistency
storm_data$EVTYPE <- toupper(storm_data$EVTYPE)
# Subset necessary columns
storm_data <- storm_data %>%
select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
# Function to convert damage exponents to actual multipliers
exp_to_num <- function(exp) {
ifelse(exp %in% c("H", "h"), 1e2,
ifelse(exp %in% c("K", "k"), 1e3,
ifelse(exp %in% c("M", "m"), 1e6,
ifelse(exp %in% c("B", "b"), 1e9, 0))))
}
# Calculate actual property and crop damages
storm_data$PROPDMGEXP <- as.character(storm_data$PROPDMGEXP)
storm_data$CROPDMGEXP <- as.character(storm_data$CROPDMGEXP)
storm_data$PROPDMGVAL <- storm_data$PROPDMG * exp_to_num(storm_data$PROPDMGEXP)
storm_data$CROPDMGVAL <- storm_data$CROPDMG * exp_to_num(storm_data$CROPDMGEXP)
# Create summary data
health_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarise(Fatalities = sum(FATALITIES, na.rm=TRUE),
Injuries = sum(INJURIES, na.rm=TRUE)) %>%
arrange(desc(Fatalities + Injuries)) %>%
top_n(10, Fatalities + Injuries)
economic_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarise(PropertyDamage = sum(PROPDMGVAL, na.rm=TRUE),
CropDamage = sum(CROPDMGVAL, na.rm=TRUE),
TotalDamage = PropertyDamage + CropDamage) %>%
arrange(desc(TotalDamage)) %>%
top_n(10, TotalDamage)
# Melt data for plotting
health_melt <- tidyr::pivot_longer(health_impact, cols = c(Fatalities, Injuries), names_to = "Type", values_to = "Count")
# Plot
ggplot(health_melt, aes(x=reorder(EVTYPE, Count), y=Count, fill=Type)) +
geom_bar(stat="identity", position="dodge") +
coord_flip() +
labs(title="Top 10 Weather Events Impacting Population Health",
x="Event Type", y="Number of People Affected") +
theme_minimal()
# Melt data for plotting
economic_melt <- tidyr::pivot_longer(economic_impact, cols = c(PropertyDamage, CropDamage), names_to = "Type", values_to = "Damage")
# Plot
ggplot(economic_melt, aes(x=reorder(EVTYPE, Damage), y=Damage/1e9, fill=Type)) +
geom_bar(stat="identity", position="dodge") +
coord_flip() +
labs(title="Top 10 Weather Events by Economic Damage",
x="Event Type", y="Damage (in Billions USD)") +
theme_minimal()