This document will illustrate the procedure used to analyze NOAA severe event data. Based on the data, events that were most harmful to one’s health and caused the most economic consequence were discovered. In order to determine whether or not an event was harmful to our health, . Events with the greatest (cost) value therefore caused the greatest economic distress.

Data Processing

Data was collected from the “Storm Data” link on the Course Project 2 page at the following url: https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2. The code to import and process is below:

originalData <- read.csv("repdata-data-StormData.csv.bz2",stringsAsFactors = FALSE)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
s <- select(originalData, EVTYPE, FATALITIES, INJURIES, PROPDMG, CROPDMG)
su <- filter(s, FATALITIES != 0 | INJURIES != 0 | PROPDMG != 0 | CROPDMG != 0)
su <- arrange(su, EVTYPE)

In calculating harm to health and economy, my assumptions are that the greater total of FATALITIES and INJURIES will correspond to greater health risk and greater total of PROPDMG and CROPDMG will correspond to greater economic consequence.

su <- mutate(su, HealthRisk = FATALITIES + INJURIES, EcoConseq = PROPDMG + CROPDMG)
su <- select(su, -(FATALITIES:CROPDMG))

Now comes the lengthy process of consolidating EVTYPE names into common groups:

d <- su[grep("TORNADO*", su$EVTYPE),]
d$EVTYPE <- "TORNADO"
su <- su[-grep("TORNADO*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("MARINE TSTM*", su$EVTYPE),]
d$EVTYPE <- "MARINE THUNDERSTORM"
su <- su[-grep("MARINE TSTM*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("[Tt][Ss][Tt][Mm]",su$EVTYPE),]
d$EVTYPE <- "THUNDERSTORM"
su <- su[-grep("[Tt][Ss][Tt][Mm]",su$EVTYPE),]
su <- rbind(su,d)

su$EVTYPE[3] <- "OTHER"

d <- su[grep("[Oo][Tt][Hh][Ee][Rr]",su$EVTYPE),]
d$EVTYPE <- "OTHER"
su <- su[-grep("[Oo][Tt][Hh][Ee][Rr]",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("[Tt][Hh][Uu][Nn][Dd][Ee][Rr][Ss]*",su$EVTYPE),]
d$EVTYPE <- "THUNDERSTORM"
su <- su[-grep("[Tt][Hh][Uu][Nn][Dd][Ee][Rr][Ss]*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("THUDERSTORM WINDS",su$EVTYPE),]
d$EVTYPE <- "THUNDERSTORM"
su <- su[-grep("THUDERSTORM WINDS*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("THUNERSTORM WINDS",su$EVTYPE),]
d$EVTYPE <- "THUNDERSTORM"
su <- su[-grep("THUNERSTORM WINDS*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("TUNDERSTORM WIND",su$EVTYPE),]
d$EVTYPE <- "THUNDERSTORM"
su <- su[-grep("TUNDERSTORM WIND",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("THUNDEERSTORM WINDS",su$EVTYPE),]
d$EVTYPE <- "THUNDERSTORM"
su <- su[-grep("THUNDEERSTORM WINDS",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("[Ww][Ii][Nn][Dd][Cc][Hh]*",su$EVTYPE),]
d$EVTYPE <- "W CHILL"
su <- su[-grep("[Ww][Ii][Nn][Dd][Cc][Hh]*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("[Ww][Ii][Nn][Dd] [Cc][Hh]*",su$EVTYPE),]
d$EVTYPE <- "W CHILL"
su <- su[-grep("[Ww][Ii][Nn][Dd] [Cc][Hh]*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("[Ww][Ii][Nn][Tt][Ee][Rr]*",su$EVTYPE),]
d$EVTYPE <- "WINTER WEATHER"
su <- su[-grep("[Ww][Ii][Nn][Tt][Ee][Rr]*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("[Ww][Ii][Nn][Tt][Rr][Yy]*",su$EVTYPE),]
d$EVTYPE <- "WINTER WEATHER"
su <- su[-grep("[Ww][Ii][Nn][Tt][Rr][Yy]*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("[Aa][Vv][Aa][Ll][Aa][Nn][Cc][Hh][Ee]*",su$EVTYPE),]
d$EVTYPE <- "AVALANCHE"
su <- su[-grep("[Aa][Vv][Aa][Ll][Aa][Nn][Cc][Hh][Ee]*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("[Ss][Nn][Oo][Ww]*",su$EVTYPE),]
d$EVTYPE <- "WINTER WEATHER"
su <- su[-grep("[Ss][Nn][Oo][Ww]*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("ICY|ICE",su$EVTYPE),]
d$EVTYPE <- "WINTER WEATHER"
su <- su[-grep("ICY|ICE",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("[Ww][Ii][Nn][Dd][Ss]*",su$EVTYPE),]
d$EVTYPE <- "WIND"
su <- su[-grep("[Ww][Ii][Nn][Dd][Ss]*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("[Ff][Ll][Oo][Oo][Dd][Ss]*",su$EVTYPE),]
d$EVTYPE <- "FLOOD"
su <- su[-grep("[Ff][Ll][Oo][Oo][Dd][Ss]*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("HIGH WATER|RAPIDLY RISING WATER",su$EVTYPE),]
d$EVTYPE <- "FLOOD"
su <- su[-grep("HIGH WATER|RAPIDLY RISING WATER",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("[Hh][Uu][Rr][Rr][Ii][Cc][Aa][Nn][Ee]*",su$EVTYPE),]
d$EVTYPE <- "HURRICANE"
su <- su[-grep("[Hh][Uu][Rr][Rr][Ii][Cc][Aa][Nn][Ee]*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("[Ww][Aa][Tt][Ee][Rr][Ss][Pp]*",su$EVTYPE),]
d$EVTYPE <- "WATERSPOUT"
su <- su[-grep("[Ww][Aa][Tt][Ee][Rr][Ss][Pp]*",su$EVTYPE),]
su <- rbind(su,d)

d <- su[grep("[Hh][Ii][Gg][Hh] [Ss][Uu][Rr][Ff]*",su$EVTYPE),]
d$EVTYPE <- "HIGH SURF"
su <- su[-grep("[Hh][Ii][Gg][Hh] [Ss][Uu][Rr][Ff]*",su$EVTYPE),]
su <- rbind(su,d)

Results

Let’s see how HealthRisk and EcoConseq compare per EVTYPE:

su <- summarise(group_by(su,EVTYPE),HealthRisk = sum(HealthRisk), EcoConseq = sum(EcoConseq))
suHR <- arrange(su, desc(HealthRisk))
suEC <- arrange(su, desc(EcoConseq))
head(suHR)
## Source: local data frame [6 x 3]
## 
##           EVTYPE HealthRisk EcoConseq
##            (chr)      (dbl)     (dbl)
## 1        TORNADO      97068 3315778.8
## 2   THUNDERSTORM      10276 2877933.7
## 3          FLOOD      10131 2799617.5
## 4 EXCESSIVE HEAT       8428    1954.4
## 5      LIGHTNING       6046  606932.4
## 6 WINTER WEATHER       5866  384986.8
head(suEC)
## Source: local data frame [6 x 3]
## 
##         EVTYPE HealthRisk EcoConseq
##          (chr)      (dbl)     (dbl)
## 1      TORNADO      97068 3315778.8
## 2 THUNDERSTORM      10276 2877933.7
## 3        FLOOD      10131 2799617.5
## 4         HAIL       1376 1268289.7
## 5    LIGHTNING       6046  606932.4
## 6         WIND       2349  476649.0

From this, we can see that for both HealthRisk and EcoConseq, the top three are the same: Tornadoes, Thunderstorms and Floods. Let’s see what the HealthRisk and EcoConseq graphs look like for these three:

su <- su[su$EVTYPE=="TORNADO" | su$EVTYPE == "THUNDERSTORM" | su$EVTYPE == "FLOOD",]
library(ggplot2)
qplot(EVTYPE, HealthRisk, data = su, color = EVTYPE, main = "HealthRisk per Event Type")

qplot(EVTYPE, EcoConseq, data = su, color = EVTYPE, main = "Economic Consequence per Event Type")

qplot(EcoConseq, HealthRisk, data = su, color = EVTYPE, main = "HealthRisk per Economic Consequence")

Per the charts, it can be said that Tornadoes, Thunderstorms and Floods cause the most risk to public health and economic consequence in that order.