The analysis uses NOAA Storm Database data to show the effect of
severe weather in the United States. The aim is to determine the types
of events which do the most damage to population and those that have a
high economic impact. Population health effects are quantified in terms
of death and injury; economic impact by the cost of damage to buildings
and crops. The results indicate that a small number of event categories
contribute significantly to human and economic losses. Knowledge of
these patterns can help decision-makers maintain a focus on disaster
preparedness and resource allocation.
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
storm <- read.csv("repdata_data_StormData.csv", stringsAsFactors = FALSE)
str(storm)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
storm <- storm %>%
select(EVTYPE, FATALITIES, INJURIES,
PROPDMG, PROPDMGEXP,
CROPDMG, CROPDMGEXP)
damage_multiplier <- function(exp) {
ifelse(exp == "K", 1e3,
ifelse(exp == "M", 1e6,
ifelse(exp == "B", 1e9, 1)))
}
storm$PROPDMGEXP <- toupper(storm$PROPDMGEXP)
storm$CROPDMGEXP <- toupper(storm$CROPDMGEXP)
storm$PROP_DAMAGE <- storm$PROPDMG * damage_multiplier(storm$PROPDMGEXP)
storm$CROP_DAMAGE <- storm$CROPDMG * damage_multiplier(storm$CROPDMGEXP)
storm <- storm %>%
mutate(
HEALTH_IMPACT = FATALITIES + INJURIES,
ECONOMIC_DAMAGE = PROP_DAMAGE + CROP_DAMAGE
)
health_summary <- storm %>%
group_by(EVTYPE) %>%
summarise(total_health = sum(HEALTH_IMPACT, na.rm = TRUE)) %>%
arrange(desc(total_health)) %>%
slice(1:10)
ggplot(health_summary, aes(x = reorder(EVTYPE, total_health),
y = total_health)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Top 10 Weather Events Harmful to Population Health",
x = "Event Type",
y = "Fatalities + Injuries")

economic_summary <- storm %>%
group_by(EVTYPE) %>%
summarise(total_damage = sum(ECONOMIC_DAMAGE, na.rm = TRUE)) %>%
arrange(desc(total_damage)) %>%
slice(1:10)
ggplot(economic_summary, aes(x = reorder(EVTYPE, total_damage),
y = total_damage)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Top 10 Weather Events by Economic Damage",
x = "Event Type",
y = "Total Damage (USD)")
