The analysis uses NOAA Storm Database data to show the effect of severe weather in the United States. The aim is to determine the types of events which do the most damage to population and those that have a high economic impact. Population health effects are quantified in terms of death and injury; economic impact by the cost of damage to buildings and crops. The results indicate that a small number of event categories contribute significantly to human and economic losses. Knowledge of these patterns can help decision-makers maintain a focus on disaster preparedness and resource allocation.

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
storm <- read.csv("repdata_data_StormData.csv", stringsAsFactors = FALSE)
str(storm)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
storm <- storm %>%
  select(EVTYPE, FATALITIES, INJURIES,
         PROPDMG, PROPDMGEXP,
         CROPDMG, CROPDMGEXP)
damage_multiplier <- function(exp) {
  ifelse(exp == "K", 1e3,
  ifelse(exp == "M", 1e6,
  ifelse(exp == "B", 1e9, 1)))
}

storm$PROPDMGEXP <- toupper(storm$PROPDMGEXP)
storm$CROPDMGEXP <- toupper(storm$CROPDMGEXP)

storm$PROP_DAMAGE <- storm$PROPDMG * damage_multiplier(storm$PROPDMGEXP)
storm$CROP_DAMAGE <- storm$CROPDMG * damage_multiplier(storm$CROPDMGEXP)
storm <- storm %>%
  mutate(
    HEALTH_IMPACT = FATALITIES + INJURIES,
    ECONOMIC_DAMAGE = PROP_DAMAGE + CROP_DAMAGE
  )
health_summary <- storm %>%
  group_by(EVTYPE) %>%
  summarise(total_health = sum(HEALTH_IMPACT, na.rm = TRUE)) %>%
  arrange(desc(total_health)) %>%
  slice(1:10)
ggplot(health_summary, aes(x = reorder(EVTYPE, total_health),
                           y = total_health)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Top 10 Weather Events Harmful to Population Health",
       x = "Event Type",
       y = "Fatalities + Injuries")

economic_summary <- storm %>%
  group_by(EVTYPE) %>%
  summarise(total_damage = sum(ECONOMIC_DAMAGE, na.rm = TRUE)) %>%
  arrange(desc(total_damage)) %>%
  slice(1:10)
ggplot(economic_summary, aes(x = reorder(EVTYPE, total_damage),
                             y = total_damage)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Top 10 Weather Events by Economic Damage",
       x = "Event Type",
       y = "Total Damage (USD)")