Severe Weather in the United States

Economic and Public Health Effects.

Synopsis

This analysis aims to determine which severe weather events have the highest injury and fatality rates and the highest economic effects in terms of property and crop damages. To answer these questionswe will analyze data from the U.S. National Oceanic and Atmospheric Administration’s storm databases.

Data Processing:

Download the data from the http and save the data into a destination file, then read into memory.

Stormsbz2 <- download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "Storms.csv.bz2")
Strms <- read.csv(bzfile("Storms.csv.bz2"))


Strms$year <- as.numeric(format(as.Date(Strms$BGN_DATE, format = "%m/%d/%Y %H:%M:%S"), "%Y"))

After reading in the data and taking a look at the str lets see how many records there are for each year.

hist(Strms$year, breaks = 30, col="lightblue")

It looks like 1995 is a good place start from as there are so few records per year prior to 1995.

Storms <- Strms[Strms$year >= 1995, ]

Convert the Crop and Property Damages

Now lets look at which severe weather events have the highest effects on agriculture and property. First wee need to replace the codes for billion(B), million(M), thousand(K), and hundred(H) with their order of magnitude.

Storms$PROPDMGEXP = as.character(Storms$PROPDMGEXP)
Storms$PROPDMGEXP[toupper(Storms$PROPDMGEXP) == "B"] = "9"
Storms$PROPDMGEXP[toupper(Storms$PROPDMGEXP) == "M"] = "6"
Storms$PROPDMGEXP[toupper(Storms$PROPDMGEXP) == "K"] = "3"
Storms$PROPDMGEXP[toupper(Storms$PROPDMGEXP) == "H"] = "2"
Storms$PROPDMGEXP = as.numeric(Storms$PROPDMGEXP)
## Warning: NAs introduced by coercion
Storms$PROPDMGEXP[is.na(Storms$PROPDMGEXP)] = 0
Storms$PropertyDamage = Storms$PROPDMG * 10^Storms$PROPDMGEXP


Storms$CROPDMGEXP = as.character(Storms$CROPDMGEXP)
Storms$CROPDMGEXP[toupper(Storms$CROPDMGEXP) == "B"] = "9"
Storms$CROPDMGEXP[toupper(Storms$CROPDMGEXP) == "M"] = "6"
Storms$CROPDMGEXP[toupper(Storms$CROPDMGEXP) == "K"] = "3"
Storms$CROPDMGEXP[toupper(Storms$CROPDMGEXP) == "H"] = "2"
Storms$CROPDMGEXP[toupper(Storms$CROPDMGEXP) == ""] = "0"
Storms$CROPDMGEXP = as.numeric(Storms$CROPDMGEXP)
## Warning: NAs introduced by coercion
Storms$CROPDMGEXP[is.na(Storms$CROPDMGEXP)] = 0
Storms$CropDamage = Storms$CROPDMG * 10^Storms$CROPDMGEXP

Find the events with the highest injuries and fatalities

Use the ddply function in the pylr package to find the sums of fatalities and injuries by event type. Order the data for each of the outcomes are descending from highest to lowest and select the first 5 rows.

library(plyr); library(ggplot2); library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:plyr':
## 
##     join
SummarizeEventType <- ddply(Storms, "EVTYPE", summarize,
                            SumOfFatalities = sum(FATALITIES), 
                            SumOfInjuries = sum(INJURIES))

TopInjuries <- head(SummarizeEventType[order(-SummarizeEventType$SumOfInjuries), c(1, 3)], 5)
TopFatalities <- head(SummarizeEventType[order(-SummarizeEventType$SumOfFatalities), c(1, 2)], 5)

Find the events with the most damage to crops and property

Again, use the ddply function to summarize the PropertyDamage and CropDamage by event type. Order by the damage type and select the top 5 rows.

SummarizeDamageType <- ddply(Storms, "EVTYPE", summarize,
                             SumOfPropertyDamage = sum(PropertyDamage),
                            SumOfCropDamage = sum(CropDamage))

TopPropertyDamage <- head(SummarizeDamageType[order(-SummarizeDamageType$SumOfPropertyDamage), c(1, 2)], 5)
TopCropDamage <- head(SummarizeDamageType[order(-SummarizeDamageType$SumOfCropDamage), c(1, 3)], 5)

Results

Injury and Fatality

Display most frequent 5 events for fatalities and injuries.

 TopFatalities; TopInjuries
##             EVTYPE SumOfFatalities
## 112 EXCESSIVE HEAT            1903
## 666        TORNADO            1545
## 134    FLASH FLOOD             934
## 231           HEAT             924
## 358      LIGHTNING             729
##             EVTYPE SumOfInjuries
## 666        TORNADO         21765
## 144          FLOOD          6769
## 112 EXCESSIVE HEAT          6525
## 358      LIGHTNING          4631
## 683      TSTM WIND          3630

We can see in the output from above that excessive heat, tornados, and flash floods are the most fatal events and that tornados, floods, and excessive heat have the highest number of injuries. See figure below

p1a <- ggplot(TopInjuries, aes(x= reorder(EVTYPE, SumOfInjuries) , y =  SumOfInjuries)) 
p1a <- p1a +  geom_point(size = 3, colour = "darkblue") 
p1a <- p1a + geom_bar(stat="identity", width = .001, colour ="darkblue")
p1a <- p1a + coord_flip()+ xlab("EVENT") + ylab("Sum Of Injuries") + ggtitle("Most Prone To Injury")
p1a <- p1a +  theme_bw()
p1b <- ggplot(TopFatalities, aes(x= reorder(EVTYPE, SumOfFatalities) , y =  SumOfFatalities)) 
p1b <- p1b +  geom_point(size = 3, colour = "darkred") 
p1b <- p1b + geom_bar(stat="identity", width = .001, colour ="darkred")
p1b <- p1b + coord_flip() + xlab("EVENT") + ylab("Sum Of Fatalities") + ggtitle("Most Prone To Death")
p1b <- p1b + theme_bw()
grid.arrange(p1a, p1b, ncol=1)

Property and Crop Damage

Display top 5 events for property and crop damage.

TopPropertyDamage; TopCropDamage
##                EVTYPE SumOfPropertyDamage
## 144             FLOOD        144022037057
## 313 HURRICANE/TYPHOON         69305840000
## 519       STORM SURGE         43193536000
## 666           TORNADO         24935939545
## 134       FLASH FLOOD         16047794571
##                EVTYPE SumOfCropDamage
## 84            DROUGHT     13922066000
## 144             FLOOD      5422810400
## 306         HURRICANE      2741410000
## 206              HAIL      2614127070
## 313 HURRICANE/TYPHOON      2607872800

We can see from the output above that the most expensive severe weather events to __property __were floods, hurricane/typhoons, and storm surge. The most expensive severe weather events to crops were drought, flood, and hurricane. See figure below

p3a <- ggplot(TopPropertyDamage, aes(x= reorder(EVTYPE, SumOfPropertyDamage) , y = SumOfPropertyDamage)) 
p3a <- p3a +  geom_point(size = 3, colour = "darkblue") 
p3a <- p3a + geom_bar(stat="identity", width = .001, colour ="blue")
p3a <- p3a + coord_flip() + xlab("EVENT") + ylab("Sum Of Property Damage") + ggtitle("Most Expensive To Property")
p3a <- p3a +  theme_bw()

p3b <- ggplot(TopCropDamage, aes(x= reorder(EVTYPE, SumOfCropDamage) , y =  SumOfCropDamage)) 
p3b <- p3b +  geom_point(size = 3, colour = "darkred") 
p3b <- p3b+ geom_bar(stat="identity", width = .001, colour ="red")
p3b <- p3b + coord_flip() + xlab("EVENT") + ylab("Sum Of Crop Damage") + ggtitle("Most Expensive To Crops")
p3b <- p3b +  theme_bw()
grid.arrange(p3a, p3b, ncol=1)