This report uses the NOAA Storm Database (1950–2011) to find (1) which event types are most harmful to population health and (2) which event types cause the greatest economic damage. Health impact is measured using fatalities and injuries. Economic impact is measured using property and crop damage. The dataset is loaded from the original compressed CSV file and processed inside this document. Results are shown using tables and two plots.
file <- "repdata_data_StormData.csv"
storm <- read.csv(file, stringsAsFactors = FALSE)
dim(storm)
## [1] 902297 37
### 2) Keep only needed columns and clean event names
storm2 <- storm[, c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
storm2$EVTYPE <- toupper(trimws(storm2$EVTYPE))
exp_to_mult <- function(x){
x <- toupper(trimws(x))
m <- rep(1, length(x))
m[x == "K"] <- 1e3
m[x == "M"] <- 1e6
m[x == "B"] <- 1e9
m[x == "H"] <- 1e2
digit_idx <- grepl("^[0-9]$", x)
m[digit_idx] <- 10^(as.numeric(x[digit_idx]))
m
}
storm2$PROP_USD <- storm2$PROPDMG * exp_to_mult(storm2$PROPDMGEXP)
storm2$CROP_USD <- storm2$CROPDMG * exp_to_mult(storm2$CROPDMGEXP)
storm2$TOTAL_USD <- storm2$PROP_USD + storm2$CROP_USD
health <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data = storm2, sum)
health$TOTAL_HARM <- health$FATALITIES + health$INJURIES
health_top <- health[order(-health$TOTAL_HARM), ][1:10, ]
health_top
## EVTYPE FATALITIES INJURIES TOTAL_HARM
## 750 TORNADO 5633 91346 96979
## 108 EXCESSIVE HEAT 1903 6525 8428
## 771 TSTM WIND 504 6957 7461
## 146 FLOOD 470 6789 7259
## 410 LIGHTNING 816 5230 6046
## 235 HEAT 937 2100 3037
## 130 FLASH FLOOD 978 1777 2755
## 379 ICE STORM 89 1975 2064
## 677 THUNDERSTORM WIND 133 1488 1621
## 880 WINTER STORM 206 1321 1527
par(mar=c(10,4,2,1))
barplot(health_top$TOTAL_HARM,
names.arg = health_top$EVTYPE,
las = 2,
ylab = "Fatalities + Injuries",
main = "Top 10 Events Most Harmful to Health")
econ <- aggregate(TOTAL_USD ~ EVTYPE, data = storm2, sum)
econ_top <- econ[order(-econ$TOTAL_USD), ][1:10, ]
econ_top
## EVTYPE TOTAL_USD
## 146 FLOOD 150319678257
## 364 HURRICANE/TYPHOON 71913712800
## 750 TORNADO 57362333947
## 591 STORM SURGE 43323541000
## 204 HAIL 18761221986
## 130 FLASH FLOOD 18244041079
## 76 DROUGHT 15018672000
## 355 HURRICANE 14610229010
## 521 RIVER FLOOD 10148404500
## 379 ICE STORM 8967041360
par(mar=c(10,4,2,1))
barplot(econ_top$TOTAL_USD/1e9,
names.arg = econ_top$EVTYPE,
las = 2,
ylab = "Total Damage (Billion USD)",
main = "Top 10 Events by Economic Damage")