Synopsis

This report analyzes the U.S. National Oceanic and Atmospheric Administration’s (NOAA)


Data Processing

Loading Required Libraries

library(ggplot2)
library(dplyr)
library(tidyr)

Loading the Raw Data

The data is loaded from the StormData.csv.bz2 file.

# File to be loaded
file_path <- "StormData.csv.bz2"


# Read directly from the compressed file
storm_data <- read.csv(file_path, stringsAsFactors = FALSE)

# Preview dimensions and column names
dim(storm_data)
## [1] 902297     37
names(storm_data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Subsetting Relevant Columns

We only need the event type, health impact variables (fatalities, injuries), and economic impact variables (property damage, crop damage).

storm_subset <- storm_data %>%
  select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

head(storm_subset)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0           
## 6 TORNADO          0        6     2.5          K       0

Cleaning Event Types

The EVTYPE variable is troublesome (mixed case, extra whitespace, abbreviations). We apply basic normalization: convert to uppercase and trim whitespace.

storm_subset$EVTYPE <- trimws(toupper(storm_subset$EVTYPE))

Parsing Damage Multipliers

The PROPDMGEXP and CROPDMGEXP columns contain multiplier codes (K = thousands, M = millions, B = billions). We convert these to numeric multipliers so we can compute actual dollar amounts.

parse_exp <- function(exp) {
  exp <- toupper(trimws(exp))
  case_when(
    exp == "K" ~ 1e3,
    exp == "M" ~ 1e6,
    exp == "B" ~ 1e9,
    exp == "H" ~ 1e2,
    exp %in% as.character(0:9) ~ 10 ^ as.numeric(exp),
    TRUE ~ 1
  )
}

storm_subset <- storm_subset %>%
  mutate(
    prop_multiplier = parse_exp(PROPDMGEXP),
    crop_multiplier = parse_exp(CROPDMGEXP),
    prop_damage_total = PROPDMG * prop_multiplier,
    crop_damage_total = CROPDMG * crop_multiplier,
    total_damage     = prop_damage_total + crop_damage_total
  )
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `prop_multiplier = parse_exp(PROPDMGEXP)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

Results

Question 1: Which Event Types Are Most Harmful to Population Health?

We aggregate total fatalities and injuries by event type and select the top 10 most harmful events.

health_impact <- storm_subset %>%
  group_by(EVTYPE) %>%
  summarise(
    Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
    Total_Injuries   = sum(INJURIES,   na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(Total_Health_Impact = Total_Fatalities + Total_Injuries) %>%
  arrange(desc(Total_Health_Impact)) %>%
  slice_head(n = 10)

health_impact
## # A tibble: 10 × 4
##    EVTYPE            Total_Fatalities Total_Injuries Total_Health_Impact
##    <chr>                        <dbl>          <dbl>               <dbl>
##  1 TORNADO                       5633          91346               96979
##  2 EXCESSIVE HEAT                1903           6525                8428
##  3 TSTM WIND                      504           6957                7461
##  4 FLOOD                          470           6789                7259
##  5 LIGHTNING                      816           5230                6046
##  6 HEAT                           937           2100                3037
##  7 FLASH FLOOD                    978           1777                2755
##  8 ICE STORM                       89           1975                2064
##  9 THUNDERSTORM WIND              133           1488                1621
## 10 WINTER STORM                   206           1321                1527
# Reshape for grouped bar chart
health_long <- health_impact %>%
  select(EVTYPE, Total_Fatalities, Total_Injuries) %>%
  pivot_longer(cols = c(Total_Fatalities, Total_Injuries),
               names_to = "Type", values_to = "Count") %>%
  mutate(EVTYPE = factor(EVTYPE, levels = health_impact$EVTYPE))

ggplot(health_long, aes(x = reorder(EVTYPE, -Count), y = Count, fill = Type)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_fill_manual(
    values = c("Total_Fatalities" = "#c0392b", "Total_Injuries" = "#e67e22"),
    labels = c("Fatalities", "Injuries")
  ) +
  labs(
    title    = "Top 10 Weather Events Most Harmful to Population Health (1950–2011)",
    subtitle = "Stacked bar showing fatalities and injuries by event type",
    x        = "Event Type",
    y        = "Total Casualties",
    fill     = "Impact Type"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x  = element_text(angle = 45, hjust = 1),
    plot.title   = element_text(face = "bold"),
    legend.position = "top"
  )
Figure 1: Top 10 weather event types by total population health impact (fatalities + injuries), 1950–2011. Tornadoes are by far the leading cause of combined casualties.

Figure 1: Top 10 weather event types by total population health impact (fatalities + injuries), 1950–2011. Tornadoes are by far the leading cause of combined casualties.

Tornadoes dominate all other event types in total population health impact. Excessive heat ranks second in fatalities, while thunderstorm winds rank second in total casualties.


Question 2: Which Event Types Have the Greatest Economic Consequences?

We aggregate total property and crop damage by event type and select the top 10 costliest events.

economic_impact <- storm_subset %>%
  group_by(EVTYPE) %>%
  summarise(
    Total_Prop_Damage = sum(prop_damage_total, na.rm = TRUE),
    Total_Crop_Damage = sum(crop_damage_total, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(Total_Economic_Damage = Total_Prop_Damage + Total_Crop_Damage) %>%
  arrange(desc(Total_Economic_Damage)) %>%
  slice_head(n = 10)

# Display in billions for readability
economic_impact %>%
  mutate(across(where(is.numeric), ~ round(. / 1e9, 2))) %>%
  rename_with(~ paste0(., " (Billions $)"), where(is.numeric))
## # A tibble: 10 × 4
##    EVTYPE   Total_Prop_Damage (B…¹ Total_Crop_Damage (B…² Total_Economic_Damag…³
##    <chr>                     <dbl>                  <dbl>                  <dbl>
##  1 FLOOD                    145.                     5.66                 150.  
##  2 HURRICA…                  69.3                    2.61                  71.9 
##  3 TORNADO                   57.0                    0.41                  57.4 
##  4 STORM S…                  43.3                    0                     43.3 
##  5 HAIL                      15.7                    3.03                  18.8 
##  6 FLASH F…                  16.8                    1.42                  18.2 
##  7 DROUGHT                    1.05                  14.0                   15.0 
##  8 HURRICA…                  11.9                    2.74                  14.6 
##  9 RIVER F…                   5.12                   5.03                  10.2 
## 10 ICE STO…                   3.94                   5.02                   8.97
## # ℹ abbreviated names: ¹​`Total_Prop_Damage (Billions $)`,
## #   ²​`Total_Crop_Damage (Billions $)`, ³​`Total_Economic_Damage (Billions $)`
economic_long <- economic_impact %>%
  select(EVTYPE, Total_Prop_Damage, Total_Crop_Damage) %>%
  pivot_longer(cols = c(Total_Prop_Damage, Total_Crop_Damage),
               names_to = "Type", values_to = "Damage") %>%
  mutate(
    EVTYPE = factor(EVTYPE, levels = economic_impact$EVTYPE),
    Damage = Damage / 1e9  # Convert to billions
  )

ggplot(economic_long, aes(x = reorder(EVTYPE, -Damage), y = Damage, fill = Type)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_fill_manual(
    values = c("Total_Prop_Damage" = "#2980b9", "Total_Crop_Damage" = "#27ae60"),
    labels = c("Property Damage", "Crop Damage")
  ) +
  labs(
    title    = "Top 10 Weather Events with Greatest Economic Consequences (1950–2011)",
    subtitle = "Stacked bar showing property and crop damage by event type (billions USD)",
    x        = "Event Type",
    y        = "Total Damage (Billions USD)",
    fill     = "Damage Type"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    axis.text.x  = element_text(angle = 45, hjust = 1),
    plot.title   = element_text(face = "bold"),
    legend.position = "top"
  )
Figure 2: Top 10 weather event types by total economic damage (property + crop, in billions USD), 1950–2011. Floods and hurricanes/typhoons lead in total economic losses.

Figure 2: Top 10 weather event types by total economic damage (property + crop, in billions USD), 1950–2011. Floods and hurricanes/typhoons lead in total economic losses.

Floods are the single largest contributor to total economic damage, primarily through property damage.


Summary Table

cat("=== Top 5 Events by Health Impact ===\n")
## === Top 5 Events by Health Impact ===
health_impact %>%
  select(EVTYPE, Total_Fatalities, Total_Injuries, Total_Health_Impact) %>%
  head(5) %>%
  knitr::kable(format = "markdown", col.names = c("Event Type", "Fatalities", "Injuries", "Total Casualties"))
Event Type Fatalities Injuries Total Casualties
TORNADO 5633 91346 96979
EXCESSIVE HEAT 1903 6525 8428
TSTM WIND 504 6957 7461
FLOOD 470 6789 7259
LIGHTNING 816 5230 6046
cat("\n=== Top 5 Events by Economic Damage (Billions USD) ===\n")
## 
## === Top 5 Events by Economic Damage (Billions USD) ===
economic_impact %>%
  mutate(across(where(is.numeric), ~ round(. / 1e9, 2))) %>%
  select(EVTYPE, Total_Prop_Damage, Total_Crop_Damage, Total_Economic_Damage) %>%
  head(5) %>%
  knitr::kable(format = "markdown", col.names = c("Event Type", "Property (B$)", "Crop (B$)", "Total (B$)"))
Event Type Property (B\()| Crop (B\)) Total (B$)
FLOOD 144.66 5.66 150.32
HURRICANE/TYPHOON 69.31 2.61 71.91
TORNADO 56.95 0.41 57.36
STORM SURGE 43.32 0.00 43.32
HAIL 15.74 3.03 18.76