Synopsis

This report investigates the most destructive types of storms with respect to fatalities, injuries and financial damage to both property and crops. Tornados are the most dangerous weather type by a margin of nearly 3:1 over the next highest type when measured by fatalities and by 13:1 when measured by injuries. Heat (Regular and Excessive), flooding (Flash and Regular) and thunderstorm winds make up 5 of the top 10 weather events most injurious to public health. Flooding is the most damaging extreme weather to property and crops, by a factor of 2:1 against the next highest value, Hurricanes. Hurricanes, tornadoes and flooding (Regular, storm surges, flash floods and river flooding) make up 7 of the top 10 most damaging weather events.

Source Data

The data for this analysis comes from the National Weather Service’s National Climactic Data Center Storm Events data.

Documentation of the database available can be found here:

The events in the database start in the year 1950 and end in November 2011. In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years should be considered more complete.

Data Processing

Read in the data

setwd("C:\\Users\\cchubb\\Coursera\\DataScience\\5ReproducibleResearch\\Assignment2")

#Clear out any local variables, start fresh and remove unneeded data from the global environment
rm(list = ls())

#Connect to the server and download the file if it does not already exist
if (! file.exists("./data")) { dir.create("./data")}

if (! file.exists("./data/StormData.csv.bz2")) {
  fileUrl<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
  download.file(fileUrl, destfile="./data/StormData.csv.bz2")
  
  if (! file.exists("./data/StormData.csv.bz2")) { stop("Unable to download StormData.csv.bz2 file")}
}

StormData <- read.csv("./data/StormData.csv.bz2")
StormData$EVTYPE <- as.factor(StormData$EVTYPE) #Cast the event type as a factor.

Questions

  1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health? “Population health” is considered against both the total number of FATALITIES and INJURIES.
  2. Across the United States, which types of events have the greatest economic consequences? Economic consequences are calculated as the sum of PROPDMG * PROPDMGEXP plus CROPDMG * CROPDMGEXP to give a complete damage cost. PROPDMGEXP and CROPDMGEXP can have the values “K”, “M” or “B” to indicate “Thousands”, “Millions” and “Billions”, respectively.
#Get libraries
library(dplyr, quietly = TRUE)
library(reshape2, quietly = TRUE)
library(knitr, quietly = TRUE)

#Scale the PROPDMG AND CROPDMG fields
StormData = transform(StormData, PROPDMG_VAL = PROPDMG * 
                        if_else(PROPDMGEXP == "K", 1E3, 
                                if_else(PROPDMGEXP == "M", 1E6, 
                                        if_else(PROPDMGEXP == "B", 1E9, 1),
                                ), 
                        )
                      , CROPDMG_VAL = CROPDMG * 
                        if_else(CROPDMGEXP == "K", 1E3, 
                                if_else(CROPDMGEXP == "M", 1E6, 
                                        if_else(CROPDMGEXP == "B", 1E9, 1, ),
                                ), 
                        )
                      )
#Add up the damage value to get a total value
StormData = transform(StormData, DAMAGE_VAL = PROPDMG_VAL + CROPDMG_VAL);

#Replace "0" values with NA so that sums and means will count only valid events.
StormData = transform(StormData, FATALITIES = if_else(FATALITIES == 0, as.double(NA), FATALITIES),
                      INJURIES = if_else(INJURIES == 0, as.double(NA), INJURIES),
                      PROPDMG_VAL = if_else(PROPDMG_VAL == 0, as.double(NA), PROPDMG_VAL),
                      CROPDMG_VAL = if_else(CROPDMG_VAL == 0, as.double(NA), CROPDMG_VAL),
                      DAMAGE_VAL = if_else(DAMAGE_VAL == 0, as.double(NA), DAMAGE_VAL)
                      )


by_event <- StormData %>% 
  group_by(EVTYPE) %>% 
  summarise(FATALITIES_TOT=sum(FATALITIES, na.rm=T), 
            INJURIES_TOT = sum(INJURIES, na.rm=T),
            PROPDMG_VAL_TOT = sum(PROPDMG_VAL, na.rm=T),
            CROPDMG_VAL_TOT = sum(CROPDMG_VAL, na.rm=T),
            DAMAGE_VAL_TOT = sum(DAMAGE_VAL, na.rm=T)
            )

#Get the top event types by fatalities, injuries and total damage
how_many <- 10
top_by_fatalities = by_event[order(-by_event$FATALITIES_TOT)[1:how_many], c("EVTYPE", "FATALITIES_TOT")]
top_by_injuries = by_event[order(-by_event$INJURIES_TOT)[1:how_many], c("EVTYPE", "INJURIES_TOT")]
top_by_damage = by_event[order(-by_event$DAMAGE_VAL_TOT)[1:how_many], c("EVTYPE", "DAMAGE_VAL_TOT", "PROPDMG_VAL_TOT", "CROPDMG_VAL_TOT")]

Results

Fatalities and Injuries

The top 10 most harmful weather events over all time by fatalities and injuries are:

par(mfrow=c(1,2), mar = c(11,4,4,2) + 0.1)
barplot(top_by_fatalities$FATALITIES_TOT, names.arg = top_by_fatalities$EVTYPE, las=2, main="Fatalities")
barplot(top_by_injuries$INJURIES_TOT, names.arg = top_by_injuries$EVTYPE, las=2, main="Injuries")

Fatality and Injury Details

kable(rename(top_by_fatalities, Event.Type=EVTYPE, Fatalities.Total=FATALITIES_TOT))
Event.Type Fatalities.Total
TORNADO 5633
EXCESSIVE HEAT 1903
FLASH FLOOD 978
HEAT 937
LIGHTNING 816
TSTM WIND 504
FLOOD 470
RIP CURRENT 368
HIGH WIND 248
AVALANCHE 224
kable(rename(top_by_injuries, Event.Type=EVTYPE, Injuries.Total=INJURIES_TOT))
Event.Type Injuries.Total
TORNADO 91346
TSTM WIND 6957
FLOOD 6789
EXCESSIVE HEAT 6525
LIGHTNING 5230
HEAT 2100
ICE STORM 1975
FLASH FLOOD 1777
THUNDERSTORM WIND 1488
HAIL 1361

Table Caption: Top 10 Fatalities and Injuries by Event Type

Financial Impact

The top 10 most harmful weather events over all time by total damage are:

#Simple plot of the total damage
#barplot(top_by_damage$DAMAGE_VAL_TOT, names.arg = top_by_damage$EVTYPE, las=2, main="Total Damage")

#Making a stacked bar chart of the values
#Pivot using reshape2 library and then scale down by $Billion
x <- melt(top_by_damage[, c(1, 3,4)])
## Using EVTYPE as id variables
x$value <- x$value / 1E9 #Scale to Billions
plot_data <- dcast(x, variable ~ EVTYPE)
row.names(plot_data) <- plot_data$variable
plot_data$variable <- NULL

plot_data <- plot_data[, as.character(top_by_damage$EVTYPE)] #Rearrange the columns to order by total descending
par(mfrow=c(1,1), mar = c(11,4,4,2) + 0.1)
barplot(as.matrix(plot_data), 
        main="Economic Damage by Event Type",
        ylab="Dollars",
        las=2,
        col=c("red", "green"),
        axes = FALSE)
y_lab_val <- pretty(top_by_damage$DAMAGE_VAL_TOT / 1E9)
axis(2, at = y_lab_val, labels = paste0("$", y_lab_val, "B"))
legend("topright", legend=c("Property", "Crops"), fill=c("red", "green"))

Financial Impact Details

kable(rename(top_by_damage[, c(1,2,3,4)], Event.Type=EVTYPE, Damage.Total=DAMAGE_VAL_TOT, Property.Damage=PROPDMG_VAL_TOT, Crop.Damage=CROPDMG_VAL_TOT), format.args=list(big.mark = ','))
Event.Type Damage.Total Property.Damage Crop.Damage
FLOOD 150,319,678,257 144,657,709,807 5,661,968,450
HURRICANE/TYPHOON 71,913,712,800 69,305,840,000 2,607,872,800
TORNADO 57,340,614,060 56,925,660,790 414,953,270
STORM SURGE 43,323,541,000 43,323,536,000 5,000
HAIL 18,752,904,943 15,727,367,053 3,025,537,890
FLASH FLOOD 17,562,129,167 16,140,812,067 1,421,317,100
DROUGHT 15,018,672,000 1,046,106,000 13,972,566,000
HURRICANE 14,610,229,010 11,868,319,010 2,741,910,000
RIVER FLOOD 10,148,404,500 5,118,945,500 5,029,459,000
ICE STORM 8,967,041,360 3,944,927,860 5,022,113,500
kable(top_by_damage[, c(1,2,3,4)], format.args=list(big.mark = ',') )
EVTYPE DAMAGE_VAL_TOT PROPDMG_VAL_TOT CROPDMG_VAL_TOT
FLOOD 150,319,678,257 144,657,709,807 5,661,968,450
HURRICANE/TYPHOON 71,913,712,800 69,305,840,000 2,607,872,800
TORNADO 57,340,614,060 56,925,660,790 414,953,270
STORM SURGE 43,323,541,000 43,323,536,000 5,000
HAIL 18,752,904,943 15,727,367,053 3,025,537,890
FLASH FLOOD 17,562,129,167 16,140,812,067 1,421,317,100
DROUGHT 15,018,672,000 1,046,106,000 13,972,566,000
HURRICANE 14,610,229,010 11,868,319,010 2,741,910,000
RIVER FLOOD 10,148,404,500 5,118,945,500 5,029,459,000
ICE STORM 8,967,041,360 3,944,927,860 5,022,113,500

Table Caption: Top 10 Financial Damage by Event Type