synopsis:

This analysis utilizes the NOAA Storm database (1950–2011) to address two key questions:
1. Which severe weather event types cause the most harm to population health (fatalities and injuries)?
2. Which event types result in the greatest economic consequences (property and crop damage)?
By processing raw data, cleaning event types, and aggregating impacts, the analysis identifies tornadoes as the primary health threat and floods as the leading economic burden. Results are visualized to inform resource prioritization for severe weather preparedness.

Data Processing

Loading data and processing

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url, destfile = "storm_data.csv.bz2", method = "curl")
data <- read.csv("storm_data.csv.bz2", header = TRUE, sep = ",")

Checking data

dim(data)
## [1] 902297     37
names(data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Combine fatalities and injuries

data <- data %>% mutate(pop_casualties = INJURIES + FATALITIES)

Summarize the data by the total number of injuries and fatalities that each type of storm has caused

casualties <- data %>% group_by(EVTYPE) %>% summarize(pop_casualties = sum(pop_casualties), FATALITIES = sum(FATALITIES),INJURIES = sum(INJURIES)) %>% select(EVTYPE, pop_casualties, INJURIES, FATALITIES) %>% arrange(desc(pop_casualties))
top10 <- casualties[c(1:10),]
top10
## # A tibble: 10 × 4
##    EVTYPE            pop_casualties INJURIES FATALITIES
##    <chr>                      <dbl>    <dbl>      <dbl>
##  1 TORNADO                    96979    91346       5633
##  2 EXCESSIVE HEAT              8428     6525       1903
##  3 TSTM WIND                   7461     6957        504
##  4 FLOOD                       7259     6789        470
##  5 LIGHTNING                   6046     5230        816
##  6 HEAT                        3037     2100        937
##  7 FLASH FLOOD                 2755     1777        978
##  8 ICE STORM                   2064     1975         89
##  9 THUNDERSTORM WIND           1621     1488        133
## 10 WINTER STORM                1527     1321        206

Result

top10_pivoted <- pivot_longer(top10[,-2], cols = c(INJURIES, FATALITIES), names_to = "casualty_type")
p <- ggplot(top10_pivoted, aes(fill = casualty_type, x = EVTYPE, y = value)) + geom_bar(stat = "identity",position = "stack")

p + theme(axis.text.x = element_text(angle = 45, hjust = 0.75)) + labs(title = "Casualties By Storm Type", x = "Storm Type", y = "Casualties", fill = "Casualty Type") 

Economic consequences of each type of storm

data <- data %>% mutate(econ_damage = PROPDMG + CROPDMG)
econ_damage <- data %>% group_by(EVTYPE) %>% summarize(econ_damage = sum(econ_damage), Property_Damage = sum(PROPDMG),Crop_Damage = sum(CROPDMG)) %>% select(EVTYPE, econ_damage, Property_Damage, Crop_Damage) %>% arrange(desc(econ_damage))
top10econ <- econ_damage[c(1:10),]
top10econ
## # A tibble: 10 × 4
##    EVTYPE             econ_damage Property_Damage Crop_Damage
##    <chr>                    <dbl>           <dbl>       <dbl>
##  1 TORNADO               3312277.        3212258.     100019.
##  2 FLASH FLOOD           1599325.        1420125.     179200.
##  3 TSTM WIND             1445168.        1335966.     109203.
##  4 HAIL                  1268290.         688693.     579596.
##  5 FLOOD                 1067976.         899938.     168038.
##  6 THUNDERSTORM WIND      943636.         876844.      66791.
##  7 LIGHTNING              606932.         603352.       3581.
##  8 THUNDERSTORM WINDS     464978.         446293.      18685.
##  9 HIGH WIND              342015.         324732.      17283.
## 10 WINTER STORM           134700.         132721.       1979.

Plot Results

top10econ <- rename(top10econ, Property = Property_Damage)
top10econ <- rename(top10econ, Crops = Crop_Damage)
top10econ_pivoted <- pivot_longer(top10econ[,-2], cols = c(Property, Crops), names_to = "damage_type")
top10econ_pivoted$value <- top10econ_pivoted$value/1000
top10econ_pivoted <- rename(top10econ_pivoted, value_thousands = value)
p <- ggplot(top10econ_pivoted, aes(fill = damage_type, x = EVTYPE, y = value_thousands)) + geom_bar(stat = "identity",position = "stack")
p + theme(axis.text.x = element_text(angle = 45, hjust = 0.75)) + labs(title = "Economic Damage By Storm Type", x = "Storm Type", y = "Damage (Thousands)", fill = "Damage Type")