Synopsis

This analysis explores the U.S. NOAA Storm Database to determine which weather events are most harmful to population health and which have the greatest economic consequences. The dataset spans from 1950 to 2011 and includes information on fatalities, injuries, and economic damages. Data cleaning involved standardizing event types and converting damage multipliers into numeric values. The analysis shows that tornadoes are the most harmful in terms of population health, causing the highest number of fatalities and injuries. Floods and hurricanes contribute the most to economic damage. These findings can help authorities prioritize disaster preparedness and resource allocation. The results are based on aggregated summaries and visualized using bar plots.

Data Processing

library(tidyverse)
# Set working directory to where the file is located
# setwd("your_folder_path")

file <- "repdata_data_StormData.csv"
data <- read.csv(file, stringsAsFactors = FALSE)
data <- data %>%
  select(EVTYPE, FATALITIES, INJURIES,
         PROPDMG, PROPDMGEXP,
         CROPDMG, CROPDMGEXP)
data$EVTYPE <- toupper(data$EVTYPE)
convert_exp <- function(exp) {
  if (exp == "K") return(1e3)
  if (exp == "M") return(1e6)
  if (exp == "B") return(1e9)
  return(1)
}

data$PROPDMGEXP <- sapply(data$PROPDMGEXP, convert_exp)
data$CROPDMGEXP <- sapply(data$CROPDMGEXP, convert_exp)

data$PROP_DAMAGE <- data$PROPDMG * data$PROPDMGEXP
data$CROP_DAMAGE <- data$CROPDMG * data$CROPDMGEXP
health_data <- data %>%
  group_by(EVTYPE) %>%
  summarise(
    fatalities = sum(FATALITIES, na.rm = TRUE),
    injuries = sum(INJURIES, na.rm = TRUE)
  ) %>%
  mutate(total_harm = fatalities + injuries) %>%
  arrange(desc(total_harm))

top_health <- head(health_data, 10)
top_health
## # A tibble: 10 × 4
##    EVTYPE            fatalities injuries total_harm
##    <chr>                  <dbl>    <dbl>      <dbl>
##  1 TORNADO                 5633    91346      96979
##  2 EXCESSIVE HEAT          1903     6525       8428
##  3 TSTM WIND                504     6957       7461
##  4 FLOOD                    470     6789       7259
##  5 LIGHTNING                816     5230       6046
##  6 HEAT                     937     2100       3037
##  7 FLASH FLOOD              978     1777       2755
##  8 ICE STORM                 89     1975       2064
##  9 THUNDERSTORM WIND        133     1488       1621
## 10 WINTER STORM             206     1321       1527
ggplot(top_health, aes(x = reorder(EVTYPE, total_harm), y = total_harm)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Top 10 Most Harmful Events (Health Impact)",
       x = "Event Type",
       y = "Total Harm (Fatalities + Injuries)")

economic_data <- data %>%
  group_by(EVTYPE) %>%
  summarise(
    property = sum(PROP_DAMAGE, na.rm = TRUE),
    crop = sum(CROP_DAMAGE, na.rm = TRUE)
  ) %>%
  mutate(total_damage = property + crop) %>%
  arrange(desc(total_damage))

top_econ <- head(economic_data, 10)
top_econ
## # A tibble: 10 × 4
##    EVTYPE                 property        crop  total_damage
##    <chr>                     <dbl>       <dbl>         <dbl>
##  1 FLOOD             144657709807   5661968450 150319678257 
##  2 HURRICANE/TYPHOON  69305840000   2607872800  71913712800 
##  3 TORNADO            56925660790.   414953270  57340614060.
##  4 STORM SURGE        43323536000         5000  43323541000 
##  5 HAIL               15727367053.  3025537890  18752904943.
##  6 FLASH FLOOD        16140812067.  1421317100  17562129167.
##  7 DROUGHT             1046106000  13972566000  15018672000 
##  8 HURRICANE          11868319010   2741910000  14610229010 
##  9 RIVER FLOOD         5118945500   5029459000  10148404500 
## 10 ICE STORM           3944927860   5022113500   8967041360
ggplot(top_econ, aes(x = reorder(EVTYPE, total_damage), y = total_damage)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Top 10 Events by Economic Damage",
       x = "Event Type",
       y = "Total Damage (USD)")