This document summarizes the process and results of the analysis of the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. It is used to answer two main questions 1- Which type of events have the highest consequences on population health ? 2- which type of events have the highest economic consequences ?
The data for this analysis is downloaded from the site https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2 It is loaded into Rstudio. The values of the exponents(PROPDMGEXP and CROPDMGEXP) stored in variables expval and expval2 respectively and then converted to their numeric values (as given in the documentation at https://rstudio-pubs-static.s3.amazonaws.com/58957_37b6723ee52b455990e149edde45e5b6.html)
library(readr)
library(R.utils)
library(ggplot2)
bunzip2("~/repdata%2Fdata%2FStormData.csv.bz2",remove=F,skip=T)
## [1] "~/repdata%2Fdata%2FStormData.csv"
## attr(,"temporary")
## [1] FALSE
stormdata <- read_csv("repdata%2Fdata%2FStormData.csv")
## Parsed with column specification:
## cols(
## .default = col_character(),
## STATE__ = col_double(),
## COUNTY = col_double(),
## BGN_RANGE = col_double(),
## COUNTY_END = col_double(),
## END_RANGE = col_double(),
## LENGTH = col_double(),
## WIDTH = col_double(),
## F = col_integer(),
## MAG = col_double(),
## FATALITIES = col_double(),
## INJURIES = col_double(),
## PROPDMG = col_double(),
## CROPDMG = col_double(),
## LATITUDE = col_double(),
## LONGITUDE = col_double(),
## LATITUDE_E = col_double(),
## LONGITUDE_ = col_double(),
## REFNUM = col_double()
## )
## See spec(...) for full column specifications.
expval <-stormdata$CROPDMGEXP
expval2 <-stormdata$PROPDMGEXP
for(i in 1:length(expval)){
if(identical(expval[i],'k') || identical(expval[i],'K'))
expval[i] <- 1000
else if(identical(expval[i],'m') || identical(expval[i],'M'))
expval[i] <- 1000000
else if(identical(expval[i],'b') || identical(expval[i],'B'))
expval[i] <- 1000000000
else if(identical(expval[i],'+'))
expval[i] <- 1
else if(identical(expval[i],'0') || identical(expval[i],'1') || identical(expval[i],'2') || identical(expval[i],'3') || identical(expval[i],'4') || identical(expval[i],'5') || identical(expval[i],'6') || identical(expval[i],'7') || identical(expval[i],'8') || identical(expval[i],'9') || identical(expval[i],'10'))
expval[i] <- 10
else
expval[i] <- 0
}
for(i in 1:length(expval2)){
if(identical(expval2[i],'k') || identical(expval2[i],'K'))
expval2[i] <- 1000
else if(identical(expval2[i],'m') || identical(expval2[i],'M'))
expval2[i] <- 1000000
else if(identical(expval2[i],'b') || identical(expval2[i],'B'))
expval2[i] <- 1000000000
else if(identical(expval2[i],'+'))
expval2[i] <- 1
else if(identical(expval2[i],'0') || identical(expval2[i],'1') || identical(expval2[i],'2') || identical(expval2[i],'3') || identical(expval2[i],'4') || identical(expval2[i],'5') || identical(expval2[i],'6') || identical(expval2[i],'7') || identical(expval2[i],'8') || identical(expval2[i],'9') || identical(expval2[i],'10'))
expval2[i] <- 10
else
expval2[i] <- 0
}
expval <- as.numeric(expval)
expval2 <- as.numeric(expval2)
stormdata1 <- stormdata[,c("EVTYPE","FATALITIES","INJURIES","CROPDMG","PROPDMG")]
stormdata1 <- cbind(stormdata1,expval,expval2)
A subset of the main dataset is used to increase efficiency. The columns in the subset are EVTYPE FATALITIES INJURIES CROPDMG PROPDMG The columns expval and expval2 store the numeric values of the exponents from the original table
The EVTYPE variable officially has 48 unique events although the database shows over a 1000 different unique events. As the questions to be answered focus on the maximum damage that is inflicted, the entries with no damage to human life or property can be safely removed.
Separating the dataset further based on damages allows for more processing of data. The data can be further reduced based on zero damage to either human life or property
The event names need further processing as they are not uniform. The official list of events provided by the NOAA is used to guide this processing. Some of the events have been combined for simplicity(eg. all marine events have been grouped under the event “MARINE OCCURENCES”)
The main database has been divided into 4 parts where each part focuses on - fatalities -injuries -crop damage - property damage Each dataset is used to plot a graph and has been clubbed together to either represent human damage or property damage
stormdata2 <- stormdata1[ -which(stormdata1$FATALITIES==0 & stormdata1$INJURIES==0 & stormdata1$CROPDMG==0 & stormdata1$PROPDMG==0) ,]
stormdata2$EVTYPE <- toupper(stormdata2$EVTYPE)
stormdatahuman <- stormdata2[-which(stormdata2$FATALITIES==0 & stormdata2$INJURIES==0),]
stormdataproperty <-stormdata2[-which(stormdata2$CROPDMG==0 & stormdata2$PROPDMG==0),]
stormdatahumanf <- stormdatahuman[-which(stormdatahuman$FATALITIES==0),]
quantiles1 <- quantile(stormdatahumanf$FATALITIES,probs=0.75)
stormdatahumanfcut <- subset(stormdatahumanf,stormdatahumanf$FATALITIES>=quantile(stormdatahumanf$FATALITIES,probs=0.75))
##stormdatahumanicut <- cut(stormdatahumani$INJURIES, breaks =as.numeric(quantiles2))
heat1 <- grep("HEAT|WARM", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% heat1,'HEAT/EXCESSIVE HEAT',stormdatahumanfcut$EVTYPE)
hurr1 <- grep("HURRICANE", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% hurr1,'HURRICANE',stormdatahumanfcut$EVTYPE)
cold1 <- grep("COLD|CHILL|HYPOTHERMIA|WINTER", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% cold1,'COLD/ CHILL',stormdatahumanfcut$EVTYPE)
flood1 <- grep("FLOOD|FLD", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% flood1,'FLOODING',stormdatahumanfcut$EVTYPE)
hwind1 <- grep("HIGH WIND|^WIND", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% hwind1,'HIGH WIND',stormdatahumanfcut$EVTYPE)
snow1 <- grep("SNOW|AVALANCHE|ICE|ICY|HAIL", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% snow1,'SNOW',stormdatahumanfcut$EVTYPE)
surf1 <- grep("SURF", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% surf1,'HIGH SURF',stormdatahumanfcut$EVTYPE)
tstm1 <- grep("^TSTM|^THUNDERSTORM", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% tstm1,' THUNDERSTORMS',stormdatahumanfcut$EVTYPE)
mar1 <- grep("MARINE", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% mar1,'MARINE OCCURENCES',stormdatahumanfcut$EVTYPE)
fire1 <- grep("FIRE", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% fire1,'WILDFIRE',stormdatahumanfcut$EVTYPE)
torn1 <- grep("TORNADO", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% torn1,'TORNADO',stormdatahumanfcut$EVTYPE)
trop1 <- grep("TROPICAL", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% trop1,'TROPICAL STORM',stormdatahumanfcut$EVTYPE)
rip1 <- grep("RIP", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% rip1,'RIP CURRENT',stormdatahumanfcut$EVTYPE)
stor1 <- grep("^STORM", stormdatahumanfcut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanfcut$EVTYPE <- ifelse(stormdatahumanfcut$EVTYPE %in% stor1,'STORM SURGE',stormdatahumanfcut$EVTYPE)
fata1 <- as.data.frame(aggregate(stormdatahumanfcut$FATALITIES, by=list(Category=stormdatahumanfcut$EVTYPE), FUN=sum))
stormdatahumani <- stormdatahuman[-which(stormdatahuman$INJURIES==0),]
quantiles2 <- quantile(stormdatahumani$INJURIES, probs=0.75)
stormdatahumanicut <- subset(stormdatahumani,stormdatahumani$INJURIES>=quantile(stormdatahumani$INJURIES,probs=0.75))
heat2 <- grep("HEAT|WARM", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% heat2,'HEAT/EXCESSIVE HEAT',stormdatahumanicut$EVTYPE)
hurr2 <- grep("HURRICANE", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% hurr2,'HURRICANE',stormdatahumanicut$EVTYPE)
cold2 <- grep("COLD|CHILL|HYPOTHERMIA|WINTER", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% cold2,'COLD/ CHILL',stormdatahumanicut$EVTYPE)
flood2 <- grep("FLOOD|FLD", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% flood2,'FLOODING',stormdatahumanicut$EVTYPE)
hwind2 <- grep("HIGH WIND|^WIND|STRONG", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% hwind2,'HIGH WIND',stormdatahumanicut$EVTYPE)
snow2 <- grep("SNOW|AVALANCHE|ICE|ICY|HAIL", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% snow2,'SNOW',stormdatahumanicut$EVTYPE)
surf2 <- grep("SURF", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% surf2,'HIGH SURF',stormdatahumanicut$EVTYPE)
tstm2 <- grep("^TSTM|^THUNDERSTORM", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% tstm2,' THUNDERSTORMS',stormdatahumanicut$EVTYPE)
mar2 <- grep("MARINE", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% mar2,'MARINE OCCURENCES',stormdatahumanicut$EVTYPE)
torn2 <- grep("TORNADO", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% torn2,'TORNADO',stormdatahumanicut$EVTYPE)
fire2 <- grep("FIRE", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% fire2,'WILDFIRE',stormdatahumanicut$EVTYPE)
trop2 <- grep("TROPICAL", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% trop2,'TROPICAL STORM',stormdatahumanicut$EVTYPE)
rain2 <- grep("RAIN", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% rain2,'HEAVY RAIN',stormdatahumanicut$EVTYPE)
rip2 <- grep("RIP", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% rip2,'RIP CURRENT',stormdatahumanicut$EVTYPE)
stor2 <- grep("^STORM", stormdatahumanicut$EVTYPE, ignore.case = TRUE, value = TRUE)
stormdatahumanicut$EVTYPE <- ifelse(stormdatahumanicut$EVTYPE %in% stor2,'STORM SURGE',stormdatahumanicut$EVTYPE)
inj1 <- as.data.frame(aggregate(stormdatahumanicut$INJURIES, by=list(Category=stormdatahumanicut$EVTYPE), FUN=sum))
colnames(fata1)[1] <- "event"
colnames(inj1)[1] <- "event"
The following panel plots show the damage inflicted by various events on human life, the first plot show the fatalities and the second the injuries(non-fatal)
library(ggplot2)
library(gridExtra)
plot1 <- ggplot(fata1,aes(x=fata1[,1],y=fata1[,2],fill=fata1[,1])) + geom_bar(stat="identity") + xlab("event") + ylab("fatalities") + guides(fill=guide_legend(title="Event")) + labs(title = "Human Fatalities")
plot2 <- ggplot(inj1,aes(x=inj1[,1],y=inj1[,2],fill=inj1[,1])) + geom_bar(stat="identity") + xlab("event") + ylab("injuries") + guides(fill=guide_legend(title="Event")) + labs(title = "Human Injuries")
grid.arrange(plot1, plot2, nrow=2)
The following computations calculate the total damage to crops and property using the exponent values given in the data along with the processing needed for the event variable as described above
totcropdmg <- as.data.frame(stormdataproperty$CROPDMG * stormdataproperty$expval)
totpropdmg <- as.data.frame(stormdataproperty$PROPDMG * stormdataproperty$expval2)
stormdatapropertyc <- cbind(stormdataproperty$EVTYPE,totcropdmg,totpropdmg)
colnames(stormdatapropertyc)[1] <- "event"
colnames(stormdatapropertyc)[2] <- "totcropdmg"
colnames(stormdatapropertyc)[3] <- "totpropdmg"
stormdatapropertyc <- stormdatapropertyc[-which(stormdatapropertyc$totcropdmg==0),]
stormdatapropccut <- subset(stormdatapropertyc,stormdatapropertyc$totcropdmg>=quantile(stormdatapropertyc$totcropdmg,probs=0.75))
event1 <- as.character(stormdatapropccut$event)
heat3 <- grep("HEAT|WARM", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% heat3,'HEAT/EXCESSIVE HEAT',event1)
hurr3 <- grep("HURRICANE", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% hurr3,'HURRICANE',event1)
cold3 <- grep("COLD|CHILL|HYPOTHERMIA|WINTER", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% cold3,'COLD/ CHILL',event1)
flood3 <- grep("FLOOD|FLD", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% flood3,'FLOODING',event1)
hwind3 <- grep("HIGH WIND|^WIND|STRONG", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% hwind3,'HIGH WIND',event1)
snow3 <- grep("SNOW|AVALANCHE|ICE|ICY|HAIL", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% snow3,'SNOW',event1)
surf3 <- grep("SURF", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% surf3,'HIGH SURF',event1)
tstm3 <- grep("^TSTM|^THUNDERSTORM", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% tstm3,' THUNDERSTORMS',event1)
mar3 <- grep("MARINE", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% mar3,'MARINE OCCURENCES',event1)
torn3 <- grep("TORNADO", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% torn3,'TORNADO',event1)
fire3 <- grep("FIRE", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% fire3,'WILDFIRE',event1)
trop3 <- grep("TROPICAL", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% trop3,'TROPICAL STORM',event1)
rain3 <- grep("RAIN|WET", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% rain3,'HEAVY RAIN',event1)
rip3 <- grep("RIP", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% rip3,'RIP CURRENT',event1)
stor3 <- grep("^STORM", event1, ignore.case = TRUE, value = TRUE)
event1 <- ifelse(event1 %in% stor3,'STORM SURGE',event1)
crp1 <- as.data.frame(aggregate(stormdatapropccut$totcropdmg, by=list(Category=event1), FUN=sum))
stormdatapropertyp<- stormdatapropertyc[-which(stormdatapropertyc$totpropdmg==0),]
stormdataproppcut <- subset(stormdatapropertyp,stormdatapropertyp$totpropdmg>=quantile(stormdatapropertyp$totpropdmg,probs=0.75))
event2 <- as.character(stormdataproppcut$event)
heat4 <- grep("HEAT|WARM", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% heat4,'HEAT/EXCESSIVE HEAT',event2)
hurr4 <- grep("HURRICANE", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% hurr4,'HURRICANE',event2)
cold4 <- grep("COLD|CHILL|HYPOTHERMIA|WINTER", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% cold4,'COLD/ CHILL',event2)
flood4 <- grep("FLOOD|FLD", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% flood4,'FLOODING',event2)
hwind4 <- grep("HIGH WIND|^WIND|STRONG", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% hwind4,'HIGH WIND',event2)
snow4 <- grep("SNOW|AVALANCHE|ICE|ICY|HAIL", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% snow4,'SNOW',event2)
surf4 <- grep("SURF", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% surf4,'HIGH SURF',event2)
tstm4 <- grep("^TSTM|^THUNDERSTORM", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% tstm4,' THUNDERSTORMS',event2)
mar4 <- grep("MARINE", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% mar4,'MARINE OCCURENCES',event2)
torn4 <- grep("TORNADO", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% torn4,'TORNADO',event2)
fire4 <- grep("FIRE", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% fire4,'WILDFIRE',event2)
trop4 <- grep("TROPICAL", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% trop4,'TROPICAL STORM',event2)
rain4 <- grep("RAIN|WET", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% rain4,'HEAVY RAIN',event2)
rip4 <- grep("RIP", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% rip4,'RIP CURRENT',event2)
stor4 <- grep("^STORM", event2, ignore.case = TRUE, value = TRUE)
event2 <- ifelse(event2 %in% stor4,'STORM SURGE',event2)
crp2 <- as.data.frame(aggregate(stormdataproppcut$totpropdmg, by=list(Category=event2), FUN=sum))
colnames(crp1)[1] <- "event"
colnames(crp2)[1] <- "event"
The following panel plot shows the damage inflicted by various events on property, the first plot show the crop damage and the second the property damage
library(ggplot2)
options("scipen"=100)
##library(gridExtra)
plot3 <- ggplot(crp1,aes(x=crp1[,1],y=crp1[,2],fill=crp1[,1])) + geom_bar(stat="identity") + xlab("event") + ylab("crop damage") + theme(
axis.text.x = element_blank()) + guides(fill=guide_legend(title="Event")) + labs(title = "Crop Damage")
plot4 <- ggplot(crp2,aes(x=crp2[,1],y=crp2[,2],fill=crp2[,1])) + geom_bar(stat="identity")+ xlab("event") + ylab("prop damage") + theme(
axis.text.x = element_blank()) + guides(fill=guide_legend(title="Event")) + labs(title = "Property Damage")
grid.arrange(plot3, plot4, nrow=2)
The plots clearly show that tornadoes are the major cause of human fatalities closely followed by heat conditions
The injuries plot also shows that tornadoes are majorly responsible to human injuries
The crop damage plot shows that droughts and floods are the major causes of crop destruction closely followed by snow and hurricanes
The property damage plot shows that flooding is the major caus eof property damage followed by hurricanes