Synopsis

This analysis explores the U.S. National Oceanic and Atmospheric Administration (NOAA) storm database to identify which types of severe weather events are most harmful to population health and which have the greatest economic consequences across the United States. The data spans from 1950 to November 2011. Population health impact is measured by aggregating fatalities and injuries by event type. Economic impact is measured by aggregating property damage and crop damage values, accounting for the magnitude codes (K, M, B) attached to each damage estimate. The results show that tornadoes are by far the most harmful event type for population health, responsible for the greatest number of fatalities and injuries combined. For economic consequences, floods cause the highest total property and crop damage. These findings suggest that emergency preparedness resources should prioritize tornado response for human safety and flood mitigation for economic protection.


Data Processing

Loading the Data

The raw data file is loaded directly from the CSV file. No preprocessing is done outside this document.

data <- read.csv("repdata_data_StormData1.csv", stringsAsFactors = FALSE)
dim(data)
## [1] 902297     37
names(data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Processing Health Data

We aggregate total fatalities and injuries by event type.

fatalities_by_event <- tapply(data$FATALITIES, data$EVTYPE, sum, na.rm = TRUE)
injuries_by_event   <- tapply(data$INJURIES,   data$EVTYPE, sum, na.rm = TRUE)

health_df <- data.frame(
  EVTYPE     = names(fatalities_by_event),
  FATALITIES = as.numeric(fatalities_by_event),
  INJURIES   = as.numeric(injuries_by_event)
)
health_df$TOTAL <- health_df$FATALITIES + health_df$INJURIES
health_df <- health_df[order(-health_df$TOTAL), ]
top10_health <- head(health_df, 10)
rownames(top10_health) <- NULL
print(top10_health)
##               EVTYPE FATALITIES INJURIES TOTAL
## 1            TORNADO       5633    91346 96979
## 2     EXCESSIVE HEAT       1903     6525  8428
## 3          TSTM WIND        504     6957  7461
## 4              FLOOD        470     6789  7259
## 5          LIGHTNING        816     5230  6046
## 6               HEAT        937     2100  3037
## 7        FLASH FLOOD        978     1777  2755
## 8          ICE STORM         89     1975  2064
## 9  THUNDERSTORM WIND        133     1488  1621
## 10      WINTER STORM        206     1321  1527

Processing Economic Data

The damage columns (PROPDMG, CROPDMG) have exponent codes in PROPDMGEXP and CROPDMGEXP. We convert these using a vectorized lookup.

# Vectorized multiplier function
get_multiplier <- function(exp_vec) {
  exp_vec <- toupper(trimws(as.character(exp_vec)))
  mult <- ifelse(exp_vec == "K", 1e3,
          ifelse(exp_vec == "M", 1e6,
          ifelse(exp_vec == "B", 1e9,
          ifelse(exp_vec == "H", 1e2,
          ifelse(exp_vec == "2", 1e2,
          ifelse(exp_vec == "3", 1e3,
          ifelse(exp_vec == "4", 1e4,
          ifelse(exp_vec == "5", 1e5,
          ifelse(exp_vec == "6", 1e6,
          ifelse(exp_vec == "7", 1e7, 1))))))))))
  return(mult)
}

data$PROP_DAMAGE <- data$PROPDMG * get_multiplier(data$PROPDMGEXP)
data$CROP_DAMAGE <- data$CROPDMG * get_multiplier(data$CROPDMGEXP)
data$TOTAL_DAMAGE <- data$PROP_DAMAGE + data$CROP_DAMAGE

prop_by_event  <- tapply(data$PROP_DAMAGE,  data$EVTYPE, sum, na.rm = TRUE)
crop_by_event  <- tapply(data$CROP_DAMAGE,  data$EVTYPE, sum, na.rm = TRUE)
total_by_event <- tapply(data$TOTAL_DAMAGE, data$EVTYPE, sum, na.rm = TRUE)

econ_df <- data.frame(
  EVTYPE       = names(total_by_event),
  PROP_DAMAGE  = as.numeric(prop_by_event),
  CROP_DAMAGE  = as.numeric(crop_by_event),
  TOTAL_DAMAGE = as.numeric(total_by_event)
)
econ_df <- econ_df[order(-econ_df$TOTAL_DAMAGE), ]
top10_econ <- head(econ_df, 10)
rownames(top10_econ) <- NULL

top10_econ$PROP_B  <- round(top10_econ$PROP_DAMAGE  / 1e9, 2)
top10_econ$CROP_B  <- round(top10_econ$CROP_DAMAGE  / 1e9, 2)
top10_econ$TOTAL_B <- round(top10_econ$TOTAL_DAMAGE / 1e9, 2)

print(top10_econ[, c("EVTYPE", "PROP_B", "CROP_B", "TOTAL_B")])
##               EVTYPE PROP_B CROP_B TOTAL_B
## 1              FLOOD 144.66   5.66  150.32
## 2  HURRICANE/TYPHOON  69.31   2.61   71.91
## 3            TORNADO  56.95   0.41   57.36
## 4        STORM SURGE  43.32   0.00   43.32
## 5               HAIL  15.74   3.03   18.76
## 6        FLASH FLOOD  16.82   1.42   18.24
## 7            DROUGHT   1.05  13.97   15.02
## 8          HURRICANE  11.87   2.74   14.61
## 9        RIVER FLOOD   5.12   5.03   10.15
## 10         ICE STORM   3.94   5.02    8.97

Results

Question 1: Which event types are most harmful to population health?

The chart below shows the top 10 weather event types by total casualties (fatalities + injuries) from 1950 to 2011.

par(mar = c(5, 11, 4, 2))

colors_health <- c("#D85A30", rep("#B4B2A9", 9))

bp <- barplot(
  rev(top10_health$TOTAL),
  names.arg = rev(top10_health$EVTYPE),
  horiz     = TRUE,
  las       = 1,
  col       = rev(colors_health),
  main      = "Top 10 Weather Events by Population Health Impact",
  xlab      = "Total Casualties (Fatalities + Injuries)",
  cex.names = 0.8,
  cex.axis  = 0.85,
  xlim      = c(0, max(top10_health$TOTAL) * 1.15)
)

text(
  x      = rev(top10_health$TOTAL) + max(top10_health$TOTAL) * 0.01,
  y      = bp,
  labels = format(rev(top10_health$TOTAL), big.mark = ","),
  cex    = 0.7,
  adj    = 0
)
Figure 1: Top 10 weather event types by total population health impact (fatalities + injuries), 1950-2011.

Figure 1: Top 10 weather event types by total population health impact (fatalities + injuries), 1950-2011.

Tornadoes are the most harmful event for population health, with 96,979 total casualties — over 11.5x more than the second-ranked event (EXCESSIVE HEAT).


Question 2: Which event types have the greatest economic consequences?

The chart below shows the top 10 event types by total economic damage (property + crop) in billions of USD.

par(mar = c(5, 12, 4, 2))

damage_matrix <- rbind(
  rev(top10_econ$PROP_B),
  rev(top10_econ$CROP_B)
)

bp2 <- barplot(
  damage_matrix,
  names.arg   = rev(top10_econ$EVTYPE),
  horiz       = TRUE,
  las         = 1,
  col         = c("#185FA5", "#3B6D11"),
  main        = "Top 10 Weather Events by Economic Damage",
  xlab        = "Total Damage (Billions USD)",
  cex.names   = 0.8,
  cex.axis    = 0.85,
  legend.text = c("Property Damage", "Crop Damage"),
  args.legend = list(x = "bottomright", cex = 0.8),
  xlim        = c(0, max(top10_econ$TOTAL_B) * 1.2)
)

text(
  x      = rev(top10_econ$TOTAL_B) + max(top10_econ$TOTAL_B) * 0.01,
  y      = bp2,
  labels = paste0("$", rev(top10_econ$TOTAL_B), "B"),
  cex    = 0.7,
  adj    = 0
)
Figure 2: Top 10 weather event types by total economic damage in billions USD, 1950-2011.

Figure 2: Top 10 weather event types by total economic damage in billions USD, 1950-2011.

Floods caused the most total economic damage at approximately $150.32 billion, followed by hurricanes/typhoons. Drought stands out as the leading cause of crop damage specifically.