Synopsis

This report uses the NOAA Storm Database (1950–2011) to find (1) which event types are most harmful to population health and (2) which event types cause the greatest economic damage. Health impact is measured using fatalities and injuries. Economic impact is measured using property and crop damage. The dataset is loaded from the original compressed CSV file and processed inside this document. Results are shown using tables and two plots.

Data Processing

1) Load the raw data

file <- "repdata_data_StormData.csv"
storm <- read.csv(file, stringsAsFactors = FALSE)
dim(storm)
## [1] 902297     37
### 2) Keep only needed columns and clean event names
storm2 <- storm[, c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
storm2$EVTYPE <- toupper(trimws(storm2$EVTYPE))

Convert damage values to dollars

exp_to_mult <- function(x){
  x <- toupper(trimws(x))
  m <- rep(1, length(x))
  m[x == "K"] <- 1e3
  m[x == "M"] <- 1e6
  m[x == "B"] <- 1e9
  m[x == "H"] <- 1e2
  digit_idx <- grepl("^[0-9]$", x)
  m[digit_idx] <- 10^(as.numeric(x[digit_idx]))
  m
}

storm2$PROP_USD <- storm2$PROPDMG * exp_to_mult(storm2$PROPDMGEXP)
storm2$CROP_USD <- storm2$CROPDMG * exp_to_mult(storm2$CROPDMGEXP)
storm2$TOTAL_USD <- storm2$PROP_USD + storm2$CROP_USD

Results

1) Most harmful events to population health (fatalities + injuries)

health <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data = storm2, sum)
health$TOTAL_HARM <- health$FATALITIES + health$INJURIES
health_top <- health[order(-health$TOTAL_HARM), ][1:10, ]
health_top
##                EVTYPE FATALITIES INJURIES TOTAL_HARM
## 750           TORNADO       5633    91346      96979
## 108    EXCESSIVE HEAT       1903     6525       8428
## 771         TSTM WIND        504     6957       7461
## 146             FLOOD        470     6789       7259
## 410         LIGHTNING        816     5230       6046
## 235              HEAT        937     2100       3037
## 130       FLASH FLOOD        978     1777       2755
## 379         ICE STORM         89     1975       2064
## 677 THUNDERSTORM WIND        133     1488       1621
## 880      WINTER STORM        206     1321       1527
par(mar=c(10,4,2,1))
barplot(health_top$TOTAL_HARM,
        names.arg = health_top$EVTYPE,
        las = 2,
        ylab = "Fatalities + Injuries",
        main = "Top 10 Events Most Harmful to Health")

2) Events with greatest economic consequences (property + crop damage)

econ <- aggregate(TOTAL_USD ~ EVTYPE, data = storm2, sum)
econ_top <- econ[order(-econ$TOTAL_USD), ][1:10, ]
econ_top
##                EVTYPE    TOTAL_USD
## 146             FLOOD 150319678257
## 364 HURRICANE/TYPHOON  71913712800
## 750           TORNADO  57362333947
## 591       STORM SURGE  43323541000
## 204              HAIL  18761221986
## 130       FLASH FLOOD  18244041079
## 76            DROUGHT  15018672000
## 355         HURRICANE  14610229010
## 521       RIVER FLOOD  10148404500
## 379         ICE STORM   8967041360
par(mar=c(10,4,2,1))
barplot(econ_top$TOTAL_USD/1e9,
        names.arg = econ_top$EVTYPE,
        las = 2,
        ylab = "Total Damage (Billion USD)",
        main = "Top 10 Events by Economic Damage")