Synopsis

This report explores the storm data published by the National Oceanic and Atmospheric Administration (NOAA). This report aims to answer two questions:

  1. Across the United States, which types of events are most harmful with respect to population health? And,
  2. Across the United States, which types of events have the greatest economic consequences?

The results shows 10 storm events which results most fatalities and injuries; and 10 storm events which results in most property and crop damages. All results are shown in the form of barcharts.

Data Processing

#Load packages. Download and load data into R

library("dplyr")

if (!file.exists("StormData.csv.bz2")) {
        download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "StormData.csv.bz2", "curl")
}
sd = read.csv("StormData.csv.bz2")

Which types of event are most harmful with respect to population health?

#Aggregating fatalitiy/injuries by event type. Then pick the top 10 most deadly/injuring event types.

fat_top_10 = aggregate(FATALITIES~EVTYPE, sd, FUN = sum, na.rm = T) %>%
        .[order(-.$FATALITIES),] %>% 
        .[1:10,] 

injur_top_10 = aggregate(INJURIES~EVTYPE, sd, FUN = sum, na.rm = T) %>%
        .[order(-.$INJURIES),] %>% 
        .[1:10,]

Which types of events have the greatest economic consequences?

# The number came in exponent, therefore needs to be converted into digits in order to make calculation.

prop = sd[,c(8,25:28)]

# Convert property exponent to digits. 
prop$PROPEXP[prop$PROPDMGEXP == "K"] = 1000
prop$PROPEXP[prop$PROPDMGEXP == "M"] = 1e+06
prop$PROPEXP[prop$PROPDMGEXP == ""] = 1
prop$PROPEXP[prop$PROPDMGEXP == "B"] = 1e+09
prop$PROPEXP[prop$PROPDMGEXP == "m"] = 1e+06
prop$PROPEXP[prop$PROPDMGEXP == "0"] = 1
prop$PROPEXP[prop$PROPDMGEXP == "5"] = 1e+05
prop$PROPEXP[prop$PROPDMGEXP == "6"] = 1e+06
prop$PROPEXP[prop$PROPDMGEXP == "4"] = 10000
prop$PROPEXP[prop$PROPDMGEXP == "2"] = 100
prop$PROPEXP[prop$PROPDMGEXP == "3"] = 1000
prop$PROPEXP[prop$PROPDMGEXP == "h"] = 100
prop$PROPEXP[prop$PROPDMGEXP == "7"] = 1e+07
prop$PROPEXP[prop$PROPDMGEXP == "H"] = 100
prop$PROPEXP[prop$PROPDMGEXP == "1"] = 10
prop$PROPEXP[prop$PROPDMGEXP == "8"] = 1e+08
prop$PROPEXP[prop$PROPDMGEXP == "+"] = 0
prop$PROPEXP[prop$PROPDMGEXP == "-"] = 0
prop$PROPEXP[prop$PROPDMGEXP == "?"] = 0

# Convert crop exponent to digits. Same as above.
prop$CROPEXP[prop$CROPDMGEXP == "M"] = 1e+06
prop$CROPEXP[prop$CROPDMGEXP == "K"] = 1000
prop$CROPEXP[prop$CROPDMGEXP == "m"] = 1e+06
prop$CROPEXP[prop$CROPDMGEXP == "B"] = 1e+09
prop$CROPEXP[prop$CROPDMGEXP == "0"] = 1
prop$CROPEXP[prop$CROPDMGEXP == "k"] = 1000
prop$CROPEXP[prop$CROPDMGEXP == "2"] = 100
prop$CROPEXP[prop$CROPDMGEXP == ""] = 1
prop$CROPEXP[prop$CROPDMGEXP == "?"] = 0
# Convert property damage values. 
# Multiply the property / crop damage cost by its exponent to get full number.
# Aggregating property / crop damage cost by each event type. Then pick the top 10 most costly event types.

prop$PROPDMG = prop$PROPDMG * prop$PROPEXP
prop$CROPDMG = prop$CROPDMG * prop$CROPEXP

# Top 10 property damage events. 
prop_top_10 = aggregate(PROPDMG~EVTYPE, prop, FUN = sum, na.rm = T) %>%
        .[order(-.$PROPDMG),] %>% 
        .[1:10,]

#Top 10 crop damage events
crop_top_10 = aggregate(CROPDMG~EVTYPE, prop, FUN = sum, na.rm = T) %>%
        .[order(-.$CROPDMG),] %>% 
        .[1:10,]

Results

Fatalities and Injuries

par(mfrow = c(1, 2), mar = c(12, 2, 3, 2), mgp = c(3, 1, 0), cex = 0.8, oma = c(0,4,0,0))
barplot(fat_top_10$FATALITIES, las = 3, names.arg = fat_top_10$EVTYPE, main = "Top 10 Deadly Events")
barplot(injur_top_10$INJURIES, las = 3, names.arg = injur_top_10$EVTYPE,main = "Top 10 Injuring Events")
title(ylab = "Number of deaths/injures", outer = T, line = 1, las = 1)

Property and crop damage

par(mfrow = c(1, 2), mar = c(12, 2, 3, 2), mgp = c(3, 1, 0), cex = 0.8, oma = c(0,4,0,0))
barplot(prop_top_10$PROPDMG/(10^9), las = 3, names.arg = prop_top_10$EVTYPE, main = "Top 10 Property\nDamageing Events")
barplot(crop_top_10$CROPDMG/(10^9), las = 3, names.arg = crop_top_10$EVTYPE, ylim = c(0,14),main = "Top 10 Crop\nDamaging Events")
title(ylab = "Economic Damage (Billions $)", outer = T, line = 1, las = 1)

Findings

Tornado causes most fatalities and injuries.

Flood causes most property damages in terms of billions of U.S. dollar lost ( >$140 billions).

Drought causes most crop damages in terms of billions of U.S. dollar lost ( ~$14 billions).