This document will illustrate the procedure used to analyze NOAA severe event data. Based on the data, events that were most harmful to one’s health and caused the most economic consequence were discovered. In order to determine whether or not an event was harmful to our health, . Events with the greatest (cost) value therefore caused the greatest economic distress.
Data was collected from the “Storm Data” link on the Course Project 2 page at the following url: https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2. The code to import and process is below:
originalData <- read.csv("repdata-data-StormData.csv.bz2",stringsAsFactors = FALSE)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
s <- select(originalData, EVTYPE, FATALITIES, INJURIES, PROPDMG, CROPDMG)
su <- filter(s, FATALITIES != 0 | INJURIES != 0 | PROPDMG != 0 | CROPDMG != 0)
su <- arrange(su, EVTYPE)
In calculating harm to health and economy, my assumptions are that the greater total of FATALITIES and INJURIES will correspond to greater health risk and greater total of PROPDMG and CROPDMG will correspond to greater economic consequence.
su <- mutate(su, HealthRisk = FATALITIES + INJURIES, EcoConseq = PROPDMG + CROPDMG)
su <- select(su, -(FATALITIES:CROPDMG))
Now comes the lengthy process of consolidating EVTYPE names into common groups:
d <- su[grep("TORNADO*", su$EVTYPE),]
d$EVTYPE <- "TORNADO"
su <- su[-grep("TORNADO*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("MARINE TSTM*", su$EVTYPE),]
d$EVTYPE <- "MARINE THUNDERSTORM"
su <- su[-grep("MARINE TSTM*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("[Tt][Ss][Tt][Mm]",su$EVTYPE),]
d$EVTYPE <- "THUNDERSTORM"
su <- su[-grep("[Tt][Ss][Tt][Mm]",su$EVTYPE),]
su <- rbind(su,d)
su$EVTYPE[3] <- "OTHER"
d <- su[grep("[Oo][Tt][Hh][Ee][Rr]",su$EVTYPE),]
d$EVTYPE <- "OTHER"
su <- su[-grep("[Oo][Tt][Hh][Ee][Rr]",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("[Tt][Hh][Uu][Nn][Dd][Ee][Rr][Ss]*",su$EVTYPE),]
d$EVTYPE <- "THUNDERSTORM"
su <- su[-grep("[Tt][Hh][Uu][Nn][Dd][Ee][Rr][Ss]*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("THUDERSTORM WINDS",su$EVTYPE),]
d$EVTYPE <- "THUNDERSTORM"
su <- su[-grep("THUDERSTORM WINDS*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("THUNERSTORM WINDS",su$EVTYPE),]
d$EVTYPE <- "THUNDERSTORM"
su <- su[-grep("THUNERSTORM WINDS*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("TUNDERSTORM WIND",su$EVTYPE),]
d$EVTYPE <- "THUNDERSTORM"
su <- su[-grep("TUNDERSTORM WIND",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("THUNDEERSTORM WINDS",su$EVTYPE),]
d$EVTYPE <- "THUNDERSTORM"
su <- su[-grep("THUNDEERSTORM WINDS",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("[Ww][Ii][Nn][Dd][Cc][Hh]*",su$EVTYPE),]
d$EVTYPE <- "W CHILL"
su <- su[-grep("[Ww][Ii][Nn][Dd][Cc][Hh]*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("[Ww][Ii][Nn][Dd] [Cc][Hh]*",su$EVTYPE),]
d$EVTYPE <- "W CHILL"
su <- su[-grep("[Ww][Ii][Nn][Dd] [Cc][Hh]*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("[Ww][Ii][Nn][Tt][Ee][Rr]*",su$EVTYPE),]
d$EVTYPE <- "WINTER WEATHER"
su <- su[-grep("[Ww][Ii][Nn][Tt][Ee][Rr]*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("[Ww][Ii][Nn][Tt][Rr][Yy]*",su$EVTYPE),]
d$EVTYPE <- "WINTER WEATHER"
su <- su[-grep("[Ww][Ii][Nn][Tt][Rr][Yy]*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("[Aa][Vv][Aa][Ll][Aa][Nn][Cc][Hh][Ee]*",su$EVTYPE),]
d$EVTYPE <- "AVALANCHE"
su <- su[-grep("[Aa][Vv][Aa][Ll][Aa][Nn][Cc][Hh][Ee]*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("[Ss][Nn][Oo][Ww]*",su$EVTYPE),]
d$EVTYPE <- "WINTER WEATHER"
su <- su[-grep("[Ss][Nn][Oo][Ww]*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("ICY|ICE",su$EVTYPE),]
d$EVTYPE <- "WINTER WEATHER"
su <- su[-grep("ICY|ICE",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("[Ww][Ii][Nn][Dd][Ss]*",su$EVTYPE),]
d$EVTYPE <- "WIND"
su <- su[-grep("[Ww][Ii][Nn][Dd][Ss]*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("[Ff][Ll][Oo][Oo][Dd][Ss]*",su$EVTYPE),]
d$EVTYPE <- "FLOOD"
su <- su[-grep("[Ff][Ll][Oo][Oo][Dd][Ss]*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("HIGH WATER|RAPIDLY RISING WATER",su$EVTYPE),]
d$EVTYPE <- "FLOOD"
su <- su[-grep("HIGH WATER|RAPIDLY RISING WATER",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("[Hh][Uu][Rr][Rr][Ii][Cc][Aa][Nn][Ee]*",su$EVTYPE),]
d$EVTYPE <- "HURRICANE"
su <- su[-grep("[Hh][Uu][Rr][Rr][Ii][Cc][Aa][Nn][Ee]*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("[Ww][Aa][Tt][Ee][Rr][Ss][Pp]*",su$EVTYPE),]
d$EVTYPE <- "WATERSPOUT"
su <- su[-grep("[Ww][Aa][Tt][Ee][Rr][Ss][Pp]*",su$EVTYPE),]
su <- rbind(su,d)
d <- su[grep("[Hh][Ii][Gg][Hh] [Ss][Uu][Rr][Ff]*",su$EVTYPE),]
d$EVTYPE <- "HIGH SURF"
su <- su[-grep("[Hh][Ii][Gg][Hh] [Ss][Uu][Rr][Ff]*",su$EVTYPE),]
su <- rbind(su,d)
Let’s see how HealthRisk and EcoConseq compare per EVTYPE:
su <- summarise(group_by(su,EVTYPE),HealthRisk = sum(HealthRisk), EcoConseq = sum(EcoConseq))
suHR <- arrange(su, desc(HealthRisk))
suEC <- arrange(su, desc(EcoConseq))
head(suHR)
## Source: local data frame [6 x 3]
##
## EVTYPE HealthRisk EcoConseq
## (chr) (dbl) (dbl)
## 1 TORNADO 97068 3315778.8
## 2 THUNDERSTORM 10276 2877933.7
## 3 FLOOD 10131 2799617.5
## 4 EXCESSIVE HEAT 8428 1954.4
## 5 LIGHTNING 6046 606932.4
## 6 WINTER WEATHER 5866 384986.8
head(suEC)
## Source: local data frame [6 x 3]
##
## EVTYPE HealthRisk EcoConseq
## (chr) (dbl) (dbl)
## 1 TORNADO 97068 3315778.8
## 2 THUNDERSTORM 10276 2877933.7
## 3 FLOOD 10131 2799617.5
## 4 HAIL 1376 1268289.7
## 5 LIGHTNING 6046 606932.4
## 6 WIND 2349 476649.0
From this, we can see that for both HealthRisk and EcoConseq, the top three are the same: Tornadoes, Thunderstorms and Floods. Let’s see what the HealthRisk and EcoConseq graphs look like for these three:
su <- su[su$EVTYPE=="TORNADO" | su$EVTYPE == "THUNDERSTORM" | su$EVTYPE == "FLOOD",]
library(ggplot2)
qplot(EVTYPE, HealthRisk, data = su, color = EVTYPE, main = "HealthRisk per Event Type")
qplot(EVTYPE, EcoConseq, data = su, color = EVTYPE, main = "Economic Consequence per Event Type")
qplot(EcoConseq, HealthRisk, data = su, color = EVTYPE, main = "HealthRisk per Economic Consequence")
Per the charts, it can be said that Tornadoes, Thunderstorms and Floods cause the most risk to public health and economic consequence in that order.