Synopsis

For this report, we will analyze weather events that had greatest impacts on human health and the economy between 1950 and 2011 using the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.

Human Health We will look into the top 5 causes of total injuries and fatalities.

Economic damage We will find the top 5 events which accounted for the most cumulative loss in crop production and property over the period during which the data was collected.

Data processing

dat<-read.csv("repdata-data-StormData.csv")
dim(dat)
## [1] 902297     37

The dataframe consists of 37 variables with 902297 observations each. For the purpose of this study, only the following variables are selected: EVTYPE (Event Type), FATALITIES (Numbers of fatalities), INJURIES (Numbers of injuries), PROPDMG (Property damage), CROPDMG (Crop damage). Since we are only interested in most severe events, we remove cases (rows) where there are zero damages.

vars <- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "CROPDMG")
dat <- dat[, vars]
dat<-dat[!(dat$FATALITIES==0 & dat$INJURIES == 0 & dat$PROPDMG == 0 & dat$CROPDMG == 0),]

However, a first glance at the EVTYPE column reveals that there are possible duplication issues, most commonly due to typo error e.g. there are “Avalance” and “Avalanche”, “Torndao” and “Tornado”, etc. As such, we need to further process the data by renaming/replacing error entries according to 48 officially recognized events given in the [National Weather Service Storm Data Documentation] (https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf).

dat$EVTYPE <- toupper(dat$EVTYPE)
dat$EVTYPE <- gsub("-", " ", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("/", " ", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("[\\]", " ", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("AVALANCE", "AVALANCHE", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("TORNDAO", "TORNADO", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("STORMS", "STORM", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("(TUNDERSTORM|TSTM|THUNDERSNOW|THUNERSTORM|THUNDEERSTORM|THUDERSTORM|THUNDERSTROM|THUNDERTORM|THUNDERSTORMS|THUNDERESTORM)", "THUNDERSTORM", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("(THUNDERSTORMW|THUNDERSTORMWINDS)", "THUNDERSTORM WIND", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("WINDS", "WIND", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("WINS", "WIND", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("^(THUNDERSTORM).+", "THUNDERSTORM WIND", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("WILD FIRES", "WILDFIRE", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("WILDFIRES", "WILDFIRE", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("WILD FOREST FIRE", "WILDFIRE", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("WILD FOREST FIRES", "WILDFIRE", dat$EVTYPE, ignore.case=T)

# remove leading and trailing spaces and replace multiple spaces with a single space
dat$EVTYPE <- gsub("^ *|(?<= ) | *$", "", dat$EVTYPE, perl = TRUE)

# events.txt contains a list of 48 "official" events
events<-read.table("events.txt", header=F, sep="\n")[,1]
events <- toupper(events)

test<-dat$EVTYPE
for (i in 1:length(events)){
  test[grepl(events[i],test,ignore.case=T)]=events[i]
}

test <- gsub("LIGHTING", "LIGHTNING", test, ignore.case=T)
test <- gsub("THUNDERSTORM$", "THUNDERSTORM WIND", test, ignore.case=T)
test <- gsub("(WINTER STORM|WINTRY MIX)", "WINTER WEATHER", test, ignore.case=T)
test <- gsub("^(URBAN).+", "FLOOD", test, ignore.case=T)
test <- gsub("(TYPHOON|HURRICANE)", "HURRICANE (TYPHOON)", test, ignore.case=T)
test <- gsub("(STORM SURGE|TIDE)", "STORM SURGE/TIDE", test, ignore.case=T)
test <- gsub("(FROST|FREEZE)", "FROST/FREEZE", test, ignore.case=T)
test <- gsub("COLD WIND CHILL", "COLD/WIND CHILL", test, ignore.case=T)
test <- gsub("^(EXTREME).+", "EXTREME COLD/WIND CHILL", test, ignore.case=T)

dat$EVTYPE <- test

We consider fatalities and injuries as detrimental to human health and group these variables into the dataframe datHuman. Likewise, the dataframe datEcon contains property and crop damages that affect the economy.
Only the top 5 events that caused the most damage will be displayed.

library(reshape2)
library(ggplot2)
dat<-melt(dat, id="EVTYPE", measure.vars = c("FATALITIES","INJURIES","PROPDMG", "CROPDMG"))
dat<-dcast(dat, EVTYPE~variable, function(x) sum(x, na.rm=TRUE))

datHuman<-dat[with(dat,order(-rowSums(dat[c("FATALITIES","INJURIES")]))),
              c(1,2:3)]
datEcon<-dat[with(dat,order(-rowSums(dat[c("PROPDMG","CROPDMG")]))),
              c(1,4:5)]

head(datHuman,5)
##                EVTYPE FATALITIES INJURIES
## 146           TORNADO       5636    91407
## 60               HEAT       3132     9209
## 145 THUNDERSTORM WIND        731     9544
## 39              FLOOD       1553     8683
## 100         LIGHTNING        817     5232
head(datEcon,5)
##                EVTYPE   PROPDMG   CROPDMG
## 146           TORNADO 3215748.4 100026.77
## 145 THUNDERSTORM WIND 2677160.7 199378.18
## 39              FLOOD 2462168.5 367270.53
## 58               HAIL  689833.4 581471.01
## 100         LIGHTNING  603401.8   3580.61
#Create bar plot
datHuman5 <- datHuman[1:5,]

datHuman5<-melt(datHuman5, id="EVTYPE", 
               measure.vars = c("FATALITIES","INJURIES"))

datHuman5$EVTYPE<-reorder(datHuman5$EVTYPE, datHuman5$value)

p1<-ggplot(datHuman5,aes(
  x=EVTYPE, 
  y=value,
  group = variable, 
  order=EVTYPE,
  fill = variable))

p1 + geom_bar(stat="identity") +
  xlab("Event name") +
  ylab("Number of incidents") +
  ggtitle("Figure 1: Weather-related fatalities or injuries 
          from 1950 to 2011") +
  coord_flip()

datEcon5 <- datEcon[1:5,]

datEcon5<-melt(datEcon5, id="EVTYPE", 
               measure.vars = c("PROPDMG","CROPDMG"))

datEcon5$EVTYPE<-reorder(datEcon5$EVTYPE, datEcon5$value)

p2<-ggplot(datEcon5,aes(
  x=EVTYPE, 
  y=value,
  group = variable, 
  order=EVTYPE,
  fill = variable))

p2 + geom_bar(stat="identity") +
  xlab("Event name") +
  ylab("Damage") +
  ggtitle("Figure 2: Weather-related property and crop damage 
          from 1950 to 2011") +
  coord_flip()

Results

We can see that tornado was the most harmful event across both categories. As far as human safety goes, the number of fatalities and injuries caused by tornadoes was almost twice as much as that caused by heat, thunderstorm, flood, and lightning combined. As for property damage, tornado still tops the list, followed thunderstorm wind and flood. However, when it comes to crop damage, hail is the main culprit.