This analysis utilizes the NOAA Storm database (1950–2011) to address
two key questions:
1. Which severe weather event types cause the most harm to population
health (fatalities and injuries)?
2. Which event types result in the greatest economic consequences
(property and crop damage)?
By processing raw data, cleaning event types, and aggregating impacts,
the analysis identifies tornadoes as the primary health threat and
floods as the leading economic burden. Results are visualized to inform
resource prioritization for severe weather preparedness.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url, destfile = "storm_data.csv.bz2", method = "curl")
data <- read.csv("storm_data.csv.bz2", header = TRUE, sep = ",")
dim(data)
## [1] 902297 37
names(data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
data <- data %>% mutate(pop_casualties = INJURIES + FATALITIES)
casualties <- data %>% group_by(EVTYPE) %>% summarize(pop_casualties = sum(pop_casualties), FATALITIES = sum(FATALITIES),INJURIES = sum(INJURIES)) %>% select(EVTYPE, pop_casualties, INJURIES, FATALITIES) %>% arrange(desc(pop_casualties))
top10 <- casualties[c(1:10),]
top10
## # A tibble: 10 × 4
## EVTYPE pop_casualties INJURIES FATALITIES
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 96979 91346 5633
## 2 EXCESSIVE HEAT 8428 6525 1903
## 3 TSTM WIND 7461 6957 504
## 4 FLOOD 7259 6789 470
## 5 LIGHTNING 6046 5230 816
## 6 HEAT 3037 2100 937
## 7 FLASH FLOOD 2755 1777 978
## 8 ICE STORM 2064 1975 89
## 9 THUNDERSTORM WIND 1621 1488 133
## 10 WINTER STORM 1527 1321 206
top10_pivoted <- pivot_longer(top10[,-2], cols = c(INJURIES, FATALITIES), names_to = "casualty_type")
p <- ggplot(top10_pivoted, aes(fill = casualty_type, x = EVTYPE, y = value)) + geom_bar(stat = "identity",position = "stack")
p + theme(axis.text.x = element_text(angle = 45, hjust = 0.75)) + labs(title = "Casualties By Storm Type", x = "Storm Type", y = "Casualties", fill = "Casualty Type")
data <- data %>% mutate(econ_damage = PROPDMG + CROPDMG)
econ_damage <- data %>% group_by(EVTYPE) %>% summarize(econ_damage = sum(econ_damage), Property_Damage = sum(PROPDMG),Crop_Damage = sum(CROPDMG)) %>% select(EVTYPE, econ_damage, Property_Damage, Crop_Damage) %>% arrange(desc(econ_damage))
top10econ <- econ_damage[c(1:10),]
top10econ
## # A tibble: 10 × 4
## EVTYPE econ_damage Property_Damage Crop_Damage
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 3312277. 3212258. 100019.
## 2 FLASH FLOOD 1599325. 1420125. 179200.
## 3 TSTM WIND 1445168. 1335966. 109203.
## 4 HAIL 1268290. 688693. 579596.
## 5 FLOOD 1067976. 899938. 168038.
## 6 THUNDERSTORM WIND 943636. 876844. 66791.
## 7 LIGHTNING 606932. 603352. 3581.
## 8 THUNDERSTORM WINDS 464978. 446293. 18685.
## 9 HIGH WIND 342015. 324732. 17283.
## 10 WINTER STORM 134700. 132721. 1979.
top10econ <- rename(top10econ, Property = Property_Damage)
top10econ <- rename(top10econ, Crops = Crop_Damage)
top10econ_pivoted <- pivot_longer(top10econ[,-2], cols = c(Property, Crops), names_to = "damage_type")
top10econ_pivoted$value <- top10econ_pivoted$value/1000
top10econ_pivoted <- rename(top10econ_pivoted, value_thousands = value)
p <- ggplot(top10econ_pivoted, aes(fill = damage_type, x = EVTYPE, y = value_thousands)) + geom_bar(stat = "identity",position = "stack")
p + theme(axis.text.x = element_text(angle = 45, hjust = 0.75)) + labs(title = "Economic Damage By Storm Type", x = "Storm Type", y = "Damage (Thousands)", fill = "Damage Type")