Storms and other sever weather events can cause both public and eonomic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involved exploring the U.S. National Oceanic and Atmospheric Adminnistration’s (NOAA) storm database to determine the severe weather events that had the greatest health and economic impacts.
library(ggplot2)
library(knitr)
The data was downloaded from the NOAA database and loaded into R as a data frame.
##URL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStrormData.csv.bz2"
destfile <- "C:/Users/Dave/Documents/DataScienceSpecialization/ReproducibleResearch/Project02/Working/StormData.csv"
##download.file(URL,destfile)
StormDataRaw <- read.csv((destfile), strip.white = TRUE)
On examining the data it was determined that only select columns were needed for the analysis and that data prior to 1996 was incomplete. In addition the Event Type data column was formated to uppper case as the case of the events was not always consistent and could cause problems with summarizing the data.
## Convert BGN_DATE to date type
StormDataRaw$BGN_DATE <- as.Date(StormDataRaw$BGN_DATE, format = "%m/%d/%Y")
## Subset the data on health and damage relevant columns
StormData <- subset(StormDataRaw, select = c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP", "BGN_DATE", "REMARKS"))
StromData <- subset(StormData, StormData$BGN_DATE > as.Date("1995-12-31"))
## Convert EVTYPE to upper case
StormData$EVTYPE <- toupper(StormData$EVTYPE)
Property and crop damage columns recorded economic data in two columns. PROPDMGEXP and CROPDMGEXP recorded the modifier of the damage columns as characters. It was necessary to convert the modifier to numeric and then calculate the economic damage in U.S. dollars.
## Calculate DMG in US dollars
format <- function(x){
x <- as.character(x)
ifelse (x == "H", as.numeric(100),
ifelse (x == "K", as.numeric(1000),
ifelse (x == "M", as.numeric(1000000),
ifelse (x == "B", as.numeric(1000000000), 0))))
}
## Calculate property DMG in US dollars
StormData$PROPDMGEXP <- toupper(StormData$PROPDMGEXP)
StormData$PROPDMGEXP <- format(StormData$PROPDMGEXP)
StormData$PDMGUSD <- as.numeric(StormData$PROPDMG*StormData$PROPDMGEXP)
## Calculate crop DMG in US dollars
StormData$CROPDMGEXP <- toupper(StormData$CROPDMGEXP)
StormData$CROPDMGEXP <- format(StormData$CROPDMGEXP)
StormData$CDMGUSD <- as.numeric(StormData$CROPDMG*StormData$CROPDMGEXP)
To prepare the data for plotting, all relevant datasets were summarized by Event Type and records with no impact recorded were excluded.
## Sum of Health and DMG types
PropDMG <- aggregate(PDMGUSD ~ EVTYPE, StormData, FUN = sum)
CropDMG <- aggregate(CDMGUSD ~ EVTYPE, StormData, FUN = sum)
Fatal <- aggregate(FATALITIES ~ EVTYPE, StormData, FUN= sum)
Injury <- aggregate(INJURIES ~ EVTYPE, StormData, FUN = sum)
For the purposes of this analysis, only the top ten events causing the greatest health and economic impact will be used.
PropDMG <- PropDMG[(PropDMG$PDMGUSD > 0),]
CropDMG <- CropDMG[(CropDMG$CDMGUSD > 0),]
Fatal <- Fatal[(Fatal$FATALITIES > 0),]
Injury <- Injury[(Injury$INJURIES > 0),]
PropDMGsorted <- PropDMG[order(PropDMG$PDMGUSD, decreasing = TRUE),]
CropDMGsorted <- CropDMG[order(CropDMG$CDMGUSD, decreasing = TRUE),]
Fatalsorted <- Fatal[order(Fatal$FATALITIES, decreasing = TRUE),]
Injurysorted <- Injury[order(Injury$INJURIES, decreasing = TRUE),]
An R code snippet was taken from the online R Cookbook and used to create a function that plots multiple plots in one panel.
## From R Cookbook (http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/)
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
pInjury <- ggplot(data = head(Injurysorted,10), aes(x=reorder(EVTYPE, INJURIES), y = INJURIES)) +
geom_bar(fill="blue", stat="identity") + coord_flip() +
ylab("Total number of injuries") + xlab("Event type") +
ggtitle("Injuries") + theme(legend.position="none")
pFatal <- ggplot(data = head(Fatalsorted,10), aes(x=reorder(EVTYPE, FATALITIES), y = FATALITIES)) +
geom_bar(fill = "red", stat="identity") + coord_flip() +
ylab("Total number of fatalities") + xlab("Event Type") + ggtitle("Fatalities") + theme(legend.position="none")
multiplot(pInjury, pFatal, cols = 2)
pPropDMG <- ggplot(data=head(PropDMGsorted,10), aes(x=reorder(EVTYPE, PDMGUSD), y = PDMGUSD)) +
geom_bar(fill = "green", stat="identity") + coord_flip() + ylab("Damage (USD)") + xlab("Event Type") + ggtitle("Property damage (USD)") + theme(legend.position="none")
pCropDMG <- ggplot(data=head(CropDMGsorted,10), aes(x=reorder(EVTYPE,CDMGUSD), y = CDMGUSD)) + geom_bar(fill="orange", stat="identity") + coord_flip() +
ylab("Damage (USD)") + xlab("Event Type") + ggtitle("Crop damage (USD)") + theme(legend.position="none")
multiplot(pPropDMG, pCropDMG, cols = 2)