This analysis aims to determine which severe weather events have the highest injury and fatality rates and the highest economic effects in terms of property and crop damages. To answer these questionswe will analyze data from the U.S. National Oceanic and Atmospheric Administration’s storm databases.
Download the data from the http and save the data into a destination file, then read into memory.
Stormsbz2 <- download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "Storms.csv.bz2")
Strms <- read.csv(bzfile("Storms.csv.bz2"))
Strms$year <- as.numeric(format(as.Date(Strms$BGN_DATE, format = "%m/%d/%Y %H:%M:%S"), "%Y"))
After reading in the data and taking a look at the str lets see how many records there are for each year.
hist(Strms$year, breaks = 30, col="lightblue")
It looks like 1995 is a good place start from as there are so few records per year prior to 1995.
Storms <- Strms[Strms$year >= 1995, ]
Now lets look at which severe weather events have the highest effects on agriculture and property. First wee need to replace the codes for billion(B), million(M), thousand(K), and hundred(H) with their order of magnitude.
Storms$PROPDMGEXP = as.character(Storms$PROPDMGEXP)
Storms$PROPDMGEXP[toupper(Storms$PROPDMGEXP) == "B"] = "9"
Storms$PROPDMGEXP[toupper(Storms$PROPDMGEXP) == "M"] = "6"
Storms$PROPDMGEXP[toupper(Storms$PROPDMGEXP) == "K"] = "3"
Storms$PROPDMGEXP[toupper(Storms$PROPDMGEXP) == "H"] = "2"
Storms$PROPDMGEXP = as.numeric(Storms$PROPDMGEXP)
## Warning: NAs introduced by coercion
Storms$PROPDMGEXP[is.na(Storms$PROPDMGEXP)] = 0
Storms$PropertyDamage = Storms$PROPDMG * 10^Storms$PROPDMGEXP
Storms$CROPDMGEXP = as.character(Storms$CROPDMGEXP)
Storms$CROPDMGEXP[toupper(Storms$CROPDMGEXP) == "B"] = "9"
Storms$CROPDMGEXP[toupper(Storms$CROPDMGEXP) == "M"] = "6"
Storms$CROPDMGEXP[toupper(Storms$CROPDMGEXP) == "K"] = "3"
Storms$CROPDMGEXP[toupper(Storms$CROPDMGEXP) == "H"] = "2"
Storms$CROPDMGEXP[toupper(Storms$CROPDMGEXP) == ""] = "0"
Storms$CROPDMGEXP = as.numeric(Storms$CROPDMGEXP)
## Warning: NAs introduced by coercion
Storms$CROPDMGEXP[is.na(Storms$CROPDMGEXP)] = 0
Storms$CropDamage = Storms$CROPDMG * 10^Storms$CROPDMGEXP
Use the ddply function in the pylr package to find the sums of fatalities and injuries by event type. Order the data for each of the outcomes are descending from highest to lowest and select the first 5 rows.
library(plyr); library(ggplot2); library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:plyr':
##
## join
SummarizeEventType <- ddply(Storms, "EVTYPE", summarize,
SumOfFatalities = sum(FATALITIES),
SumOfInjuries = sum(INJURIES))
TopInjuries <- head(SummarizeEventType[order(-SummarizeEventType$SumOfInjuries), c(1, 3)], 5)
TopFatalities <- head(SummarizeEventType[order(-SummarizeEventType$SumOfFatalities), c(1, 2)], 5)
Again, use the ddply function to summarize the PropertyDamage and CropDamage by event type. Order by the damage type and select the top 5 rows.
SummarizeDamageType <- ddply(Storms, "EVTYPE", summarize,
SumOfPropertyDamage = sum(PropertyDamage),
SumOfCropDamage = sum(CropDamage))
TopPropertyDamage <- head(SummarizeDamageType[order(-SummarizeDamageType$SumOfPropertyDamage), c(1, 2)], 5)
TopCropDamage <- head(SummarizeDamageType[order(-SummarizeDamageType$SumOfCropDamage), c(1, 3)], 5)
Display most frequent 5 events for fatalities and injuries.
TopFatalities; TopInjuries
## EVTYPE SumOfFatalities
## 112 EXCESSIVE HEAT 1903
## 666 TORNADO 1545
## 134 FLASH FLOOD 934
## 231 HEAT 924
## 358 LIGHTNING 729
## EVTYPE SumOfInjuries
## 666 TORNADO 21765
## 144 FLOOD 6769
## 112 EXCESSIVE HEAT 6525
## 358 LIGHTNING 4631
## 683 TSTM WIND 3630
We can see in the output from above that excessive heat, tornados, and flash floods are the most fatal events and that tornados, floods, and excessive heat have the highest number of injuries. See figure below
p1a <- ggplot(TopInjuries, aes(x= reorder(EVTYPE, SumOfInjuries) , y = SumOfInjuries))
p1a <- p1a + geom_point(size = 3, colour = "darkblue")
p1a <- p1a + geom_bar(stat="identity", width = .001, colour ="darkblue")
p1a <- p1a + coord_flip()+ xlab("EVENT") + ylab("Sum Of Injuries") + ggtitle("Most Prone To Injury")
p1a <- p1a + theme_bw()
p1b <- ggplot(TopFatalities, aes(x= reorder(EVTYPE, SumOfFatalities) , y = SumOfFatalities))
p1b <- p1b + geom_point(size = 3, colour = "darkred")
p1b <- p1b + geom_bar(stat="identity", width = .001, colour ="darkred")
p1b <- p1b + coord_flip() + xlab("EVENT") + ylab("Sum Of Fatalities") + ggtitle("Most Prone To Death")
p1b <- p1b + theme_bw()
grid.arrange(p1a, p1b, ncol=1)
Display top 5 events for property and crop damage.
TopPropertyDamage; TopCropDamage
## EVTYPE SumOfPropertyDamage
## 144 FLOOD 144022037057
## 313 HURRICANE/TYPHOON 69305840000
## 519 STORM SURGE 43193536000
## 666 TORNADO 24935939545
## 134 FLASH FLOOD 16047794571
## EVTYPE SumOfCropDamage
## 84 DROUGHT 13922066000
## 144 FLOOD 5422810400
## 306 HURRICANE 2741410000
## 206 HAIL 2614127070
## 313 HURRICANE/TYPHOON 2607872800
We can see from the output above that the most expensive severe weather events to __property __were floods, hurricane/typhoons, and storm surge. The most expensive severe weather events to crops were drought, flood, and hurricane. See figure below
p3a <- ggplot(TopPropertyDamage, aes(x= reorder(EVTYPE, SumOfPropertyDamage) , y = SumOfPropertyDamage))
p3a <- p3a + geom_point(size = 3, colour = "darkblue")
p3a <- p3a + geom_bar(stat="identity", width = .001, colour ="blue")
p3a <- p3a + coord_flip() + xlab("EVENT") + ylab("Sum Of Property Damage") + ggtitle("Most Expensive To Property")
p3a <- p3a + theme_bw()
p3b <- ggplot(TopCropDamage, aes(x= reorder(EVTYPE, SumOfCropDamage) , y = SumOfCropDamage))
p3b <- p3b + geom_point(size = 3, colour = "darkred")
p3b <- p3b+ geom_bar(stat="identity", width = .001, colour ="red")
p3b <- p3b + coord_flip() + xlab("EVENT") + ylab("Sum Of Crop Damage") + ggtitle("Most Expensive To Crops")
p3b <- p3b + theme_bw()
grid.arrange(p3a, p3b, ncol=1)