This report explores the storm data published by the National Oceanic and Atmospheric Administration (NOAA). This report aims to answer two questions:
The results shows 10 storm events which results most fatalities and injuries; and 10 storm events which results in most property and crop damages. All results are shown in the form of barcharts.
#Load packages. Download and load data into R
library("dplyr")
if (!file.exists("StormData.csv.bz2")) {
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "StormData.csv.bz2", "curl")
}
sd = read.csv("StormData.csv.bz2")
#Aggregating fatalitiy/injuries by event type. Then pick the top 10 most deadly/injuring event types.
fat_top_10 = aggregate(FATALITIES~EVTYPE, sd, FUN = sum, na.rm = T) %>%
.[order(-.$FATALITIES),] %>%
.[1:10,]
injur_top_10 = aggregate(INJURIES~EVTYPE, sd, FUN = sum, na.rm = T) %>%
.[order(-.$INJURIES),] %>%
.[1:10,]
# The number came in exponent, therefore needs to be converted into digits in order to make calculation.
prop = sd[,c(8,25:28)]
# Convert property exponent to digits.
prop$PROPEXP[prop$PROPDMGEXP == "K"] = 1000
prop$PROPEXP[prop$PROPDMGEXP == "M"] = 1e+06
prop$PROPEXP[prop$PROPDMGEXP == ""] = 1
prop$PROPEXP[prop$PROPDMGEXP == "B"] = 1e+09
prop$PROPEXP[prop$PROPDMGEXP == "m"] = 1e+06
prop$PROPEXP[prop$PROPDMGEXP == "0"] = 1
prop$PROPEXP[prop$PROPDMGEXP == "5"] = 1e+05
prop$PROPEXP[prop$PROPDMGEXP == "6"] = 1e+06
prop$PROPEXP[prop$PROPDMGEXP == "4"] = 10000
prop$PROPEXP[prop$PROPDMGEXP == "2"] = 100
prop$PROPEXP[prop$PROPDMGEXP == "3"] = 1000
prop$PROPEXP[prop$PROPDMGEXP == "h"] = 100
prop$PROPEXP[prop$PROPDMGEXP == "7"] = 1e+07
prop$PROPEXP[prop$PROPDMGEXP == "H"] = 100
prop$PROPEXP[prop$PROPDMGEXP == "1"] = 10
prop$PROPEXP[prop$PROPDMGEXP == "8"] = 1e+08
prop$PROPEXP[prop$PROPDMGEXP == "+"] = 0
prop$PROPEXP[prop$PROPDMGEXP == "-"] = 0
prop$PROPEXP[prop$PROPDMGEXP == "?"] = 0
# Convert crop exponent to digits. Same as above.
prop$CROPEXP[prop$CROPDMGEXP == "M"] = 1e+06
prop$CROPEXP[prop$CROPDMGEXP == "K"] = 1000
prop$CROPEXP[prop$CROPDMGEXP == "m"] = 1e+06
prop$CROPEXP[prop$CROPDMGEXP == "B"] = 1e+09
prop$CROPEXP[prop$CROPDMGEXP == "0"] = 1
prop$CROPEXP[prop$CROPDMGEXP == "k"] = 1000
prop$CROPEXP[prop$CROPDMGEXP == "2"] = 100
prop$CROPEXP[prop$CROPDMGEXP == ""] = 1
prop$CROPEXP[prop$CROPDMGEXP == "?"] = 0
# Convert property damage values.
# Multiply the property / crop damage cost by its exponent to get full number.
# Aggregating property / crop damage cost by each event type. Then pick the top 10 most costly event types.
prop$PROPDMG = prop$PROPDMG * prop$PROPEXP
prop$CROPDMG = prop$CROPDMG * prop$CROPEXP
# Top 10 property damage events.
prop_top_10 = aggregate(PROPDMG~EVTYPE, prop, FUN = sum, na.rm = T) %>%
.[order(-.$PROPDMG),] %>%
.[1:10,]
#Top 10 crop damage events
crop_top_10 = aggregate(CROPDMG~EVTYPE, prop, FUN = sum, na.rm = T) %>%
.[order(-.$CROPDMG),] %>%
.[1:10,]
par(mfrow = c(1, 2), mar = c(12, 2, 3, 2), mgp = c(3, 1, 0), cex = 0.8, oma = c(0,4,0,0))
barplot(fat_top_10$FATALITIES, las = 3, names.arg = fat_top_10$EVTYPE, main = "Top 10 Deadly Events")
barplot(injur_top_10$INJURIES, las = 3, names.arg = injur_top_10$EVTYPE,main = "Top 10 Injuring Events")
title(ylab = "Number of deaths/injures", outer = T, line = 1, las = 1)
par(mfrow = c(1, 2), mar = c(12, 2, 3, 2), mgp = c(3, 1, 0), cex = 0.8, oma = c(0,4,0,0))
barplot(prop_top_10$PROPDMG/(10^9), las = 3, names.arg = prop_top_10$EVTYPE, main = "Top 10 Property\nDamageing Events")
barplot(crop_top_10$CROPDMG/(10^9), las = 3, names.arg = crop_top_10$EVTYPE, ylim = c(0,14),main = "Top 10 Crop\nDamaging Events")
title(ylab = "Economic Damage (Billions $)", outer = T, line = 1, las = 1)
Tornado causes most fatalities and injuries.
Flood causes most property damages in terms of billions of U.S. dollar lost ( >$140 billions).
Drought causes most crop damages in terms of billions of U.S. dollar lost ( ~$14 billions).