For this report, we will analyze weather events that had greatest impacts on human health and the economy between 1950 and 2011 using the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.
Human Health We will look into the top 5 causes of total injuries and fatalities.
Economic damage We will find the top 5 events which accounted for the most cumulative loss in crop production and property over the period during which the data was collected.
dat<-read.csv("repdata-data-StormData.csv")
dim(dat)
## [1] 902297 37
The dataframe consists of 37 variables with 902297 observations each. For the purpose of this study, only the following variables are selected: EVTYPE (Event Type), FATALITIES (Numbers of fatalities), INJURIES (Numbers of injuries), PROPDMG (Property damage), CROPDMG (Crop damage). Since we are only interested in most severe events, we remove cases (rows) where there are zero damages.
vars <- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "CROPDMG")
dat <- dat[, vars]
dat<-dat[!(dat$FATALITIES==0 & dat$INJURIES == 0 & dat$PROPDMG == 0 & dat$CROPDMG == 0),]
However, a first glance at the EVTYPE column reveals that there are possible duplication issues, most commonly due to typo error e.g. there are “Avalance” and “Avalanche”, “Torndao” and “Tornado”, etc. As such, we need to further process the data by renaming/replacing error entries according to 48 officially recognized events given in the [National Weather Service Storm Data Documentation] (https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf).
dat$EVTYPE <- toupper(dat$EVTYPE)
dat$EVTYPE <- gsub("-", " ", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("/", " ", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("[\\]", " ", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("AVALANCE", "AVALANCHE", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("TORNDAO", "TORNADO", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("STORMS", "STORM", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("(TUNDERSTORM|TSTM|THUNDERSNOW|THUNERSTORM|THUNDEERSTORM|THUDERSTORM|THUNDERSTROM|THUNDERTORM|THUNDERSTORMS|THUNDERESTORM)", "THUNDERSTORM", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("(THUNDERSTORMW|THUNDERSTORMWINDS)", "THUNDERSTORM WIND", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("WINDS", "WIND", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("WINS", "WIND", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("^(THUNDERSTORM).+", "THUNDERSTORM WIND", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("WILD FIRES", "WILDFIRE", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("WILDFIRES", "WILDFIRE", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("WILD FOREST FIRE", "WILDFIRE", dat$EVTYPE, ignore.case=T)
dat$EVTYPE <- gsub("WILD FOREST FIRES", "WILDFIRE", dat$EVTYPE, ignore.case=T)
# remove leading and trailing spaces and replace multiple spaces with a single space
dat$EVTYPE <- gsub("^ *|(?<= ) | *$", "", dat$EVTYPE, perl = TRUE)
# events.txt contains a list of 48 "official" events
events<-read.table("events.txt", header=F, sep="\n")[,1]
events <- toupper(events)
test<-dat$EVTYPE
for (i in 1:length(events)){
test[grepl(events[i],test,ignore.case=T)]=events[i]
}
test <- gsub("LIGHTING", "LIGHTNING", test, ignore.case=T)
test <- gsub("THUNDERSTORM$", "THUNDERSTORM WIND", test, ignore.case=T)
test <- gsub("(WINTER STORM|WINTRY MIX)", "WINTER WEATHER", test, ignore.case=T)
test <- gsub("^(URBAN).+", "FLOOD", test, ignore.case=T)
test <- gsub("(TYPHOON|HURRICANE)", "HURRICANE (TYPHOON)", test, ignore.case=T)
test <- gsub("(STORM SURGE|TIDE)", "STORM SURGE/TIDE", test, ignore.case=T)
test <- gsub("(FROST|FREEZE)", "FROST/FREEZE", test, ignore.case=T)
test <- gsub("COLD WIND CHILL", "COLD/WIND CHILL", test, ignore.case=T)
test <- gsub("^(EXTREME).+", "EXTREME COLD/WIND CHILL", test, ignore.case=T)
dat$EVTYPE <- test
We consider fatalities and injuries as detrimental to human health and group these variables into the dataframe datHuman. Likewise, the dataframe datEcon contains property and crop damages that affect the economy.
Only the top 5 events that caused the most damage will be displayed.
library(reshape2)
library(ggplot2)
dat<-melt(dat, id="EVTYPE", measure.vars = c("FATALITIES","INJURIES","PROPDMG", "CROPDMG"))
dat<-dcast(dat, EVTYPE~variable, function(x) sum(x, na.rm=TRUE))
datHuman<-dat[with(dat,order(-rowSums(dat[c("FATALITIES","INJURIES")]))),
c(1,2:3)]
datEcon<-dat[with(dat,order(-rowSums(dat[c("PROPDMG","CROPDMG")]))),
c(1,4:5)]
head(datHuman,5)
## EVTYPE FATALITIES INJURIES
## 146 TORNADO 5636 91407
## 60 HEAT 3132 9209
## 145 THUNDERSTORM WIND 731 9544
## 39 FLOOD 1553 8683
## 100 LIGHTNING 817 5232
head(datEcon,5)
## EVTYPE PROPDMG CROPDMG
## 146 TORNADO 3215748.4 100026.77
## 145 THUNDERSTORM WIND 2677160.7 199378.18
## 39 FLOOD 2462168.5 367270.53
## 58 HAIL 689833.4 581471.01
## 100 LIGHTNING 603401.8 3580.61
#Create bar plot
datHuman5 <- datHuman[1:5,]
datHuman5<-melt(datHuman5, id="EVTYPE",
measure.vars = c("FATALITIES","INJURIES"))
datHuman5$EVTYPE<-reorder(datHuman5$EVTYPE, datHuman5$value)
p1<-ggplot(datHuman5,aes(
x=EVTYPE,
y=value,
group = variable,
order=EVTYPE,
fill = variable))
p1 + geom_bar(stat="identity") +
xlab("Event name") +
ylab("Number of incidents") +
ggtitle("Figure 1: Weather-related fatalities or injuries
from 1950 to 2011") +
coord_flip()
datEcon5 <- datEcon[1:5,]
datEcon5<-melt(datEcon5, id="EVTYPE",
measure.vars = c("PROPDMG","CROPDMG"))
datEcon5$EVTYPE<-reorder(datEcon5$EVTYPE, datEcon5$value)
p2<-ggplot(datEcon5,aes(
x=EVTYPE,
y=value,
group = variable,
order=EVTYPE,
fill = variable))
p2 + geom_bar(stat="identity") +
xlab("Event name") +
ylab("Damage") +
ggtitle("Figure 2: Weather-related property and crop damage
from 1950 to 2011") +
coord_flip()
We can see that tornado was the most harmful event across both categories. As far as human safety goes, the number of fatalities and injuries caused by tornadoes was almost twice as much as that caused by heat, thunderstorm, flood, and lightning combined. As for property damage, tornado still tops the list, followed thunderstorm wind and flood. However, when it comes to crop damage, hail is the main culprit.