Synopsis

The purpose of this analysis is to explore the NOAA Storm Database to answer two questions:

  1. Across the US which types of events are most harmful to population health
  2. Across the US which types of events have the greatest economic consequences

Events in the database start in the year 1950 and end in November of 2011. The database is available on the Coursera Reprodicible Research Class Website along with additional National Weather Service Data Documentation and National Climatic Data Storm Events FAQs.(See Reference section for URLs).

The results are meant to serve as input to Governments and/or Municipalities for preparing for severe weather events.

The results will show that Tornados have by far the most devastating health effects (fatalities + injuries) on the population. They will also show that floods are the most devastating in terms of property damage and that drought is the most devastating in terms of crop damage.

Data Processing

Load the following libraries

library(dplyr)
library(R.utils)
library(data.table)
library(ggplot2)
library(reshape2)
library(scales)

Download and unzip the datafile and record the download date.

temp <- tempfile()
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", temp)
storm_data <- read.csv(bunzip2(temp,"StormData.csv",skip = TRUE))
unlink(temp)
#record download date
date()
## [1] "Sun Mar 11 09:31:00 2018"

Subset the relevant variables,check for NAs, and explore the structure

storm <- select(storm_data,EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)
#Extract rows that do not have NAs in them
storm_na <- na.omit(storm)
str(storm)
## 'data.frame':    902297 obs. of  7 variables:
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...

Reduce the size of dataset for faster processing by extracting only those events where fatalities, injuries, property damage, or crop damage are greater than 0 since we are looking for max data.

storm_r <- filter(storm,FATALITIES >0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0)
nrow(storm_r)
## [1] 254633

The resulting data set has 254,633 rows vs 902,297 rows for the orignal so it is much smaller and performance is improved considerably.

Analysis of the EVTYPE factor shows alot of the levels are redundant and messy. Clean up and consolidate significant ones into a smaller set.

levels(storm_r$EVTYPE) <- gsub("^TSTM.*|^THUNDERSTORM.*","THUNDERSTORM WIND", levels(storm_r$EVTYPE))
levels(storm_r$EVTYPE) <- gsub("^RECORD HEAT.*|^RECORD/HEAT.*|EXTREME HEAT,*","EXCESSIVE HEAT",
                               levels(storm_r$EVTYPE))
levels(storm_r$EVTYPE) <- gsub("^FLASH.*|^FLOOD/FLASH.*","FLASH FLOOD", levels(storm_r$EVTYPE))
levels(storm_r$EVTYPE) <- gsub("^TORN.*","Tornado", levels(storm_r$EVTYPE))
levels(storm_r$EVTYPE) <- gsub("^FLOOD.*|^Coastal Flood.*|^COASTAL FLOOD|^TIDAL.*|^Tidal.*|
                          ^FLOOD.*/[^FLASH.*]|^URBAN.*", "FLOOD", levels(storm_r$EVTYPE))
levels(storm_r$EVTYPE) <- gsub("^UNSEASONABLY WARM.*|^Heat|^HEAT.*","HEAT", levels(storm_r$EVTYPE))
levels(storm_r$EVTYPE) <- gsub("^ICE.*|^ICY.*|^SLEET","ICE STORM", levels(storm_r$EVTYPE))
levels(storm_r$EVTYPE) <- gsub("^HURRICANE.*|^Hurricane.*|^TYPHOON","HURRICANE", levels(storm_r$EVTYPE))
levels(storm_r$EVTYPE) <- gsub("^HAIL.*","HAIL", levels(storm_r$EVTYPE))
levels(storm_r$EVTYPE) <- gsub("^DROUGHT.*","DROUGHT", levels(storm_r$EVTYPE))
levels(storm_r$EVTYPE) <- gsub("^LIGHTNING.*|^LIGNT.*","LIGHTNING", levels(storm_r$EVTYPE))
levels(storm_r$EVTYPE) <- gsub("^STORM SURGE.*","STORM SURGE", levels(storm_r$EVTYPE))

#Reduce EVTYPE FACTOR Levels to those left after cleaning
storm_r$EVTYPE <- factor(storm_r$EVTYPE)

RESULTS

Prepare Health Casualty Data for plotting

#Group and summarize fatalities and injuries by event type
casualties <- storm_r%>% group_by(EVTYPE) %>% 
summarise(tot_fatal = sum(FATALITIES,na.rm = TRUE), tot_inj = sum(INJURIES,na.rm = TRUE))
#Calculate total casualties as a sum of fatalities and injuries
casualties <- mutate(casualties, tot_cas = tot_fatal + tot_inj)
casualties <- data.frame(casualties)
#Arrange data in descending order and choose top 8 categries
max_casualties <- arrange(casualties, desc(tot_cas),EVTYPE)
max_casualties <- max_casualties[1:8,]
#Reduce Factor to those variables in the dataset
max_casualties$EVTYPE <- factor(max_casualties$EVTYPE)
#melt dataset
max_casualties <- select(max_casualties, EVTYPE,tot_fatal,tot_inj)
max_casualties <- melt(max_casualties, id.vars = "EVTYPE")

Plot max casualties

plot_max_casualties <- ggplot(data = max_casualties, aes(x = reorder(EVTYPE,-value), y = value, fill = variable)) +
  geom_bar(stat="identity", position = "stack") +
  labs(title ="Top 8 Causes of Storm Casualties", y = "Total Casualties", x = "Event Type") +
  theme(axis.text.x = element_text(angle = 60, hjust = 1), plot.title = element_text(hjust = 0.5)) +
  scale_fill_discrete(name = "Type",labels=c("Fatality", "Injury"))
plot_max_casualties

Figure 1: Plot of Storm Casualties by Event Type

The results show that Tornadoes are by far the most devasting events in terms of both injuries and fatalities. Thunderstorms are second in terms of injuries and excessive heat is second in terms of fatalities.

Prepare Economic Consequences Data for Plotting

Analysis of CROPDMG shows Multipliers need to be converted to numbers

#Convert Property and Crop Damage Mutlipliers to numbers
storm_r$cropexp[storm_r$CROPDMGEXP=="?"|storm_r$CROPDMGEXP==""|storm_r$PROPDMGEXP=="0"] <- 1
storm_r$cropexp[storm_r$CROPDMGEXP == "k"|storm_r$CROPDMGEXP == "K"] <- 1000
storm_r$cropexp[storm_r$CROPDMGEXP == "m"|storm_r$CROPDMGEXP == "M"] <- 10^6
storm_r$cropexp[storm_r$CROPDMGEXP == "2"] <- 100
storm_r$cropexp[storm_r$CROPDMGEXP == "B"] <- 10^9
storm_r$propexp[storm_r$PROPDMGEXP=="?"|storm_r$PROPDMGEXP==""|storm_r$PROPDMGEXP=="0"] <- 1
storm_r$propexp[storm_r$PROPDMGEXP == "-"|storm_r$PROPDMGEXP == "+"] <- 1
storm_r$cropexp[storm_r$CROPDMGEXP == "1"] <- 10
storm_r$propexp[storm_r$PROPDMGEXP=="2"|storm_r$PROPDMGEXP=="h"|storm_r$PROPDMGEXP=="H"] <- 100
storm_r$propexp[storm_r$PROPDMGEXP == "3"|storm_r$PROPDMGEXP == "K"] <- 1000
storm_r$cropexp[storm_r$CROPDMGEXP == "4"] <- 10^4
storm_r$cropexp[storm_r$CROPDMGEXP == "5"] <- 10^5
storm_r$propexp[storm_r$PROPDMGEXP=="6"|storm_r$PROPDMGEXP=="m"|storm_r$PROPDMGEXP=="M"] <- 10^6
storm_r$cropexp[storm_r$CROPDMGEXP == "7"] <- 10^7
storm_r$cropexp[storm_r$CROPDMGEXP == "8"] <- 10^8
storm_r$propexp[storm_r$PROPDMGEXP == "B"] <- 10^9

Calculate total crop damage and storm damage, summarize by category and add together to get total damage

storm_r <- mutate(storm_r, cropdmg_tot = cropexp*CROPDMG, propdmg_tot = propexp*PROPDMG)
damage <- storm_r%>% group_by(EVTYPE) %>% 
  summarise(tot_prop = sum(propdmg_tot,na.rm = TRUE), tot_crop = sum(cropdmg_tot,na.rm = TRUE))
tot_damage <- mutate(damage, tot_dmg = tot_prop + tot_crop)
tot_damage <- data.frame(tot_damage)

Arrange data in descending order and choose top 8 categories, melt data for plotting

max_dmg <- arrange(tot_damage, desc(tot_dmg),EVTYPE)
max_dmg <- max_dmg[1:8,]
max_dmg$EVTYPE <- factor(max_dmg$EVTYPE)

max_dmg <- select(max_dmg, EVTYPE,tot_prop,tot_crop)
max_dmg <- melt(max_dmg, id.vars = "EVTYPE")

Plot Max Damage

plot_max_dmg <- ggplot(data = max_dmg, aes(x = reorder(EVTYPE,-value), y = value/10^9, fill = variable)) +
  geom_bar(stat="identity", position = "stack") +
  labs(title ="Top 8 Causes of Storm Damage", y = "Total Damages ($B)", x = "Event Type") +
  theme(axis.text.x = element_text(angle = 60, hjust = 1), plot.title = element_text(hjust = 0.5)) +
  scale_fill_discrete(name = "Type",labels=c("Property", "Crops"))
plot_max_dmg

Figure 2: Plot of Economic Consequences by Event Type

The results show that floods are the most devasting events in terms of property damage followed by Hurricanes, Tornados, and Storm Surge. Drought is the most damaging in terms of crop damage.

References

Storm Data Documentation: https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf

National Climatic Data Center Storm Events FAQ: https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2FNCDC%20Storm%20Events-FAQ%20Page.pdf

Storm Database: https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2