Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
The events in the database start in the year 1950 and end in November 2011. In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years should be considered more complete.
library(knitr)
library(ggplot2)
library(plyr)
require(gridExtra)
## Loading required package: gridExtra
## Loading required package: grid
opts_chunk$set(echo=TRUE)
setwd("C:/Users/Brian/Documents")
data <- read.csv("repdata-data-StormData.csv")
head(data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
if (dim(data)[2] == 37) {
data$YEAR <- as.numeric(format(as.Date(data$BGN_DATE, format = "%m/%d/%Y %H:%M:%S"), "%Y"))
}
hist(data$YEAR, breaks = 61, xlab="Year", main="Histogram of Events by Year", col="red")
abline(v=1993,col=3,lty=3)
abline(h=20000,col=3,lty=3)
# subset the data to health and economic impact analysis against weather
# event
mycol <- c("YEAR", "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
mydata <- data[mycol]
studydata <- mydata[mydata$YEAR >= 1993, ]
head(studydata)
## YEAR EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP
## 187560 1995 FREEZING RAIN 0 0 0
## 187561 1995 SNOW 0 0 0
## 187562 1994 ICE STORM/FLASH FLOOD 0 2 0
## 187563 1995 SNOW/ICE 0 0 0
## 187564 1993 WINTER STORM 4 0 5 B
## 187565 1995 SNOW/ICE 0 0 0
## CROPDMG CROPDMGEXP
## 187560 0
## 187561 0
## 187562 0
## 187563 0
## 187564 0
## 187565 0
convert <- function(dataset = studydata, fieldName, newFieldName) {
totalLen <- dim(dataset)[2]
index <- which(colnames(dataset) == fieldName)
dataset[, index] <- as.character(dataset[, index])
logic <- !is.na(toupper(dataset[, index]))
dataset[logic & toupper(dataset[, index]) == "B", index] <- "9"
dataset[logic & toupper(dataset[, index]) == "M", index] <- "6"
dataset[logic & toupper(dataset[, index]) == "K", index] <- "3"
dataset[logic & toupper(dataset[, index]) == "H", index] <- "2"
dataset[logic & toupper(dataset[, index]) == "", index] <- "0"
dataset[, index] <- as.numeric(dataset[, index])
dataset[is.na(dataset[, index]), index] <- 0
dataset <- cbind(dataset, dataset[, index - 1] * 10^dataset[, index])
names(dataset)[totalLen + 1] <- newFieldName
return(dataset)
}
studydata <- convert(studydata, "PROPDMGEXP", "propertyDamage")
## Warning in convert(studydata, "PROPDMGEXP", "propertyDamage"): NAs
## introduced by coercion
studydata <- convert(studydata, "CROPDMGEXP", "cropDamage")
## Warning in convert(studydata, "CROPDMGEXP", "cropDamage"): NAs introduced
## by coercion
head(studydata)
## YEAR EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP
## 187560 1995 FREEZING RAIN 0 0 0 0
## 187561 1995 SNOW 0 0 0 0
## 187562 1994 ICE STORM/FLASH FLOOD 0 2 0 0
## 187563 1995 SNOW/ICE 0 0 0 0
## 187564 1993 WINTER STORM 4 0 5 9
## 187565 1995 SNOW/ICE 0 0 0 0
## CROPDMG CROPDMGEXP propertyDamage cropDamage
## 187560 0 0 0e+00 0
## 187561 0 0 0e+00 0
## 187562 0 0 0e+00 0
## 187563 0 0 0e+00 0
## 187564 0 0 5e+09 0
## 187565 0 0 0e+00 0
# aggregate the data by event
fatal <- aggregate(FATALITIES ~ EVTYPE, data = studydata, FUN = sum)
injury <- aggregate(INJURIES ~ EVTYPE, data = studydata, FUN = sum)
# get top 10 event with highest fatalities
fatal10 <- fatal[order(-fatal$FATALITIES), ][1:10, ]
# get top 10 event with highest injuries
injury10 <- injury[order(-injury$INJURIES), ][1:10, ]
fatal10
## EVTYPE FATALITIES
## 130 EXCESSIVE HEAT 1903
## 834 TORNADO 1621
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
## 170 FLOOD 470
## 585 RIP CURRENT 368
## 359 HIGH WIND 248
## 856 TSTM WIND 241
## 19 AVALANCHE 224
injury10
## EVTYPE INJURIES
## 834 TORNADO 23310
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
## 856 TSTM WIND 3631
## 275 HEAT 2100
## 427 ICE STORM 1975
## 153 FLASH FLOOD 1777
## 760 THUNDERSTORM WIND 1488
## 972 WINTER STORM 1321
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(fatal10$FATALITIES, las = 3, names.arg = fatal10$EVTYPE, main = "Weather Events With Highest Fatalities",
ylab = "Number of Fatalities", col = "purple")
barplot(injury10$INJURIES, las = 3, names.arg = injury10$EVTYPE, main = "Weather Events With Highest Injuries",
ylab = "Number of Injuries", col = "blue")
# aggregate the data by event
property <- aggregate(propertyDamage ~ EVTYPE, data = studydata, FUN = sum)
crop <- aggregate(cropDamage ~ EVTYPE, data = studydata, FUN = sum)
# get top 10 event with highest property damage
property10 <- property[order(-property$propertyDamage), ][1:10, ]
# get top 10 event with highest crop damage
crop10 <- crop[order(-crop$cropDamage), ][1:10, ]
property10
## EVTYPE propertyDamage
## 170 FLOOD 144657709807
## 411 HURRICANE/TYPHOON 69305840000
## 670 STORM SURGE 43323536000
## 834 TORNADO 26349182107
## 153 FLASH FLOOD 16822673979
## 244 HAIL 15735267513
## 402 HURRICANE 11868319010
## 848 TROPICAL STORM 7703890550
## 972 WINTER STORM 6688497251
## 359 HIGH WIND 5270046295
crop10
## EVTYPE cropDamage
## 95 DROUGHT 13972566000
## 170 FLOOD 5661968450
## 590 RIVER FLOOD 5029459000
## 427 ICE STORM 5022113500
## 244 HAIL 3025954473
## 402 HURRICANE 2741910000
## 411 HURRICANE/TYPHOON 2607872800
## 153 FLASH FLOOD 1421317100
## 140 EXTREME COLD 1292973000
## 212 FROST/FREEZE 1094086000
propertyPlot <- qplot(EVTYPE, data = property10, weight = propertyDamage, geom = "bar", binwidth = 1, color="lightgreen") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_y_continuous("Property Damage in US dollars") + xlab("Severe Weather Type") + ggtitle("Total Property Damage by\n Severe Weather Events in\n the U.S. from 1993 - 2011")
cropPlot<- qplot(EVTYPE, data = crop10, weight = cropDamage, geom = "bar", binwidth = 1, color="brown") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + scale_y_continuous("Crop Damage in US dollars") + xlab("Severe Weather Type") + ggtitle("Total Crop Damage by\n Severe Weather Events in\n the U.S. from 1993 - 2011")
grid.arrange(propertyPlot, cropPlot, ncol = 2)