This analysis of the NOAA Storm database evaluates the types of events that are most harmfull to health (fatalities and injuries) as well as have the largets economic impact. Tornados are the leading cause of death and injuries while temperature extremes, flooding, and ligntning are the next largest contributors to both. Property damage is primarly done by Flooding and Hurricanes while Crop Damage is inflicted by Drought and its partner in irony … flooding.
The NOAA database includes the field “EVTYPE” which classifies the storms into various categories. The database is downloaded, unzipped, and read in the function “ReadDataWeb”.
These categories in “EVTYPE” are not classifeid well, for example some hurricanes are classified simply as “HURRICANE” while others are classified by their name. This analysis groups them together using the function CombineEVTYPE. For example, if “EVTYPE” field with the words “Thunder”, “Tunder”, and “TSTM” were all reclassified “Thunder Storm”. This analysis creates Pareto of Fatalities and Injuries by Storm Type to access the events that are most harmful to public health.
The column “PROPDMG” contains the USD numerical value for damage while “PROPDMGEXP” has a letter code for the units (seriously). Replacing “K” with 1000, “M” with 1000000, etc. then multiplying PROPDMG times PROPDMGEXP provides a calculated cost of each event. The same was done for crop damage fields. The analysis of damage by type results in a Pareto for both.
These support functions do the bulk of the heaving lifting. Note that
knitr::opts_chunk$set(echo = TRUE, cache=TRUE)
##Read the data from the web
ReadDataWeb <- function()
{
##from https://stackoverflow.com/questions/38676283/extract-bz2-file-and-open-ncdf-using-r
library(ncdf4)
library(R.utils)
##download file
URL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
bzfil <- basename(URL)
if (!file.exists(bzfil)) download.file(URL, bzfil)
##Read file
fil <- bunzip2(bzfil, overwrite=TRUE, remove=FALSE)
userdata <- read.csv(fil)
userdata
}
##Read the data local. This is only used for debugging and testing. The course requried read from web is ##above.
ReadLocal <- function()
{
StormData <- read.csv("StormData.csv")
StormData
}
ReadData <- function()
{
ReadLocal()
}
##The data from National Weather Service in the field EVTYPE has the type of weather event
##This data has incorrectly spelled words (TUNDER instead of THUNDER) as well as poor grouping
##such as "HURRICANE" vs "TYPHOON" or even "HURRICANE <name>"
##This renames the EVTYPE to include better descriptors
CombineEVTYPE <- function(SData)
{
SData$EVTYPE <- as.character( SData$EVTYPE)
SData$EVTYPE <- gsub(".*HURRICANE.*", "Hurricane", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*TYPHOON.*", "Hurricane", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*HEAVY RAIN/SEVERE WEATHER.*", "Hurricane", SData$EVTYPE, ignore.case = TRUE) ##hurricane Erin
SData$EVTYPE <- gsub(".*REMNANTS OF FLOYD.*", "Hurricane", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Tropical Storm.*", "Hurricane", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*THUNDER.*", "Thunder Storm", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*TUNDER.*", "Thunder Storm", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*TSTM.*", "Thunder Storm", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*LIGHTNING.*", "Lightning", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*LIGHTING.*", "Lightning", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*LIGNTNING.*", "Lightning", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*TORNADO.*", "Tornado", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Funnel.*", "Tornado", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Water Spout.*", "Tornado", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*WATERSPOUT.*", "Tornado", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Flood.*", "Flood", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Fire.*", "Fire", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Volcan.*", "Volcanic Activity", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Snow.*", "Show Storm", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Wind.*", "Wind", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Hail.*", "Hail/Sleet", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Sleet.*", "Hail/Sleet", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Heat.*", "Temp Extreme", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Hot.*", "Temp Extreme", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Record High.*", "Temp Extreme", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Record Temperatures.*", "Temp Extreme", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Record.*", "Temp Extreme", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Cold.*", "Temp Extreme", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Record Low.*", "Temp Extreme", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Cool.*", "Temp Extreme", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Microburst.*", "Microburst", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Dry.*", "Dry/Drought", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Ice.*", "Ice Related", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*MUD.*", "Mud Slides", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Summary.*", "Summary", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Storm Surge.*", "Storm Surge", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Winter Storm.*", "Winter Storm", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Drought.*", "Drought", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Freeze.*", "Frost/Freeze", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Frost.*", "Frost/Freeze", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Rain.*", "Rain", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Rip.*", "Rip Currents", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- gsub(".*Avalanche.*", "Avalanche", SData$EVTYPE, ignore.case = TRUE)
SData$EVTYPE <- factor(SData$EVTYPE)
SData
}
##Calculates the total deaths by event type
PrepDeathData <- function(SData)
{
##column 23 is the fatalities, get a sum of all of them by type
Fatal <- aggregate(SData[,23], list(SData$EVTYPE), sum)
colnames(Fatal) <- c("evtype","fatalities")
Fatal <- Fatal[order(-Fatal$fatalities),] ##rank order for Pareto
Fatal <- Fatal[1:10,] ##The majority of fatalities are in first 10 bars
Fatal$fatalities <-Fatal$fatalities /1000
##column 24 is the injuries, get a sum of all of them by type
Injure <- aggregate(SData[,24], list(SData$EVTYPE), sum)
colnames(Injure) <- c("evtype","injuries")
Injure <- Injure[order(-Injure$injuries),] ##rank order for Pareto
Injure <- Injure[1:10,] ##The majority of fatalities are in first 10 bars
Injure$injuries <- Injure$injuries/1000
par(mfrow = c(1,2), mar = c(7, 4, 2, 2) + 0.2) #add room for the rotated labels
##Create Pareto of damage
barplot(Fatal$fatalities, names.arg = Fatal$evtype, las=2, ylab = "Number of Fatalities (Thousands)", main = "Fatalities by Storm Type")
barplot(Injure$injuries, names.arg = Injure$evtype, las=2, ylab = "Number of Injuries (Thousands)", main = "Injuries by Storm Type")
Fatal
}
##Calcualtes the property damage by event type
PrepPropDamage <- function(SData)
{
##The only value levels for property damage are K, M, and B remove everything else including blanks
returndata <- subset(SData, SData$PROPDMGEXP %in% c("K","m","M","B"))
##make PROPDMGEXP not be a factor and then replace the text values of K,M,B with their numeric equivalents
returndata$PROPDMGEXP <- as.character(returndata$PROPDMGEXP)
returndata$PROPDMGEXP[returndata$PROPDMGEXP == "M"] <- 1000000
returndata$PROPDMGEXP[returndata$PROPDMGEXP == "m"] <- 1000000
returndata$PROPDMGEXP[returndata$PROPDMGEXP == "B"] <- 1000000000
returndata$PROPDMGEXP[returndata$PROPDMGEXP == "K"] <- 1000
returndata$PROPDMGEXP[returndata$PROPDMGEXP == "H"] <- 100
returndata$PROPDMGEXP <- as.numeric(returndata$PROPDMGEXP)
##calculate the total damage as the product the two fields
returndata$PropDamageTot <- returndata$PROPDMGEXP * returndata$PROPDMG
PropDamage <- aggregate(returndata[,38], list(returndata$EVTYPE), sum)
colnames(PropDamage) <- c("evtype","total.damage")
PropDamage <- PropDamage[order(-PropDamage$total.damage),]
PropDamage <- PropDamage[1:10,]
PropDamage$total.damage <- PropDamage$total.damage/1000000000 ##make graph in Billions
par(mar = c(8, 4, 2, 2) + 0.2) #add room for the rotated labels
##Create Pareto of damage
barplot(PropDamage$total.damage, names.arg = PropDamage$evtype, las=2, ylab = "Damage (Billions)", main = "Damage to Property by Event Type")
PropDamage
}
##Calculate the crop damage by event type
PrepCropDamage <- function(SData)
{
##The only value levels for property damage are K, M, and B remove everything else including blanks
returndata <- subset(SData, SData$CROPDMGEXP %in% c("K","m","M","B"))
##make CROPDMGEXP not be a factor and then replace the text values of K,M,B with their numeric equivalents
returndata$CROPDMGEXP <- as.character(returndata$CROPDMGEXP)
returndata$CROPDMGEXP[returndata$CROPDMGEXP == "M"] <- 1000000
returndata$CROPDMGEXP[returndata$CROPDMGEXP == "m"] <- 1000000
returndata$CROPDMGEXP[returndata$CROPDMGEXP == "B"] <- 1000000000
returndata$CROPDMGEXP[returndata$CROPDMGEXP == "K"] <- 1000
returndata$CROPDMGEXP <- as.numeric(returndata$CROPDMGEXP)
##calculate the total damage as the product the two fields
returndata$CropDamageTot <- returndata$CROPDMGEXP * returndata$CROPDMG
CropDamage <- aggregate(returndata[,38], list(returndata$EVTYPE), sum)
colnames(CropDamage) <- c("evtype","total.damage")
CropDamage <- CropDamage[order(-CropDamage$total.damage),]
CropDamage <- CropDamage[1:10,]
CropDamage$total.damage <- CropDamage$total.damage/1000000000 ##make graph in Millions
par(mar = c(10, 4, 2, 2) + 0.2) #add room for the rotated labels
##Create Pareto of damage
barplot(CropDamage$total.damage, names.arg = CropDamage$evtype, las=2,ylab = "Damage (Billions)", main = "Damage to Crops by Event Type")
CropDamage
}
Tornados are the leading cause of death and injuries while temperature extremes, flooding, and ligntning are the next largest contributors to both.
StormData <- ReadDataWeb()
## Warning: package 'R.utils' was built under R version 3.4.2
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.21.0 (2016-10-30) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
## R.utils v2.5.0 (2016-11-07) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
StormData <- CombineEVTYPE(StormData)
sd <- PrepDeathData(StormData)
### Financial Impact Flooding and Hurricanes are responsible for over 250 Billion USD in damage to property. Drought and flooding are responsible for over 20 Billion USD in damage to crops.
pd <-PrepPropDamage(StormData)
cd <- PrepCropDamage(StormData)