Introduction

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Download the data set:

“The data for this assignment come in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size. You can download the file from the course web site.”

download.file('https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2',destfile = 'stormData.csv.bz2',method = 'curl',mode = 'w')

        


  strmDataZip <- 'stormData.csv.bz2'
  stormDataFile <- bzfile(description=strmDataZip, open="r")
  stormData <- read.csv(stormDataFile,fill = TRUE,header = T)  
  close(stormDataFile)

DATA Preparation

Process the data into a workable format.

View(stormData)

Load R packages :

library(plyr)
## Warning: package 'plyr' was built under R version 3.4.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(data.table)
## Warning: package 'data.table' was built under R version 3.4.4
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4

Question 1:

Across the United States, which types of events (EVTYPE variable) are most harmful with respect to population health?

Variables : Event.Type
Fatalities and injuries

Create a data frame on Event Type and fataly and injury .

StormDataEVFAT <- as.data.frame(cbind(stormData$EVTYPE, stormData$FATALITIES+stormData$INJURIES))
names(StormDataEVFAT) <- c('EVENT_TYPE','fatal_and_injury')

Factorise

StormDataEVFAT$EVENT_TYPE <- as.factor(stormData$EVTYPE)
levels(StormDataEVFAT$EVENT_TYPE) <- levels(stormData$EVTYPE)

Processing the data

summary <- ddply(.data = StormDataEVFAT,.(EVENT_TYPE),summarize,sum(fatal_and_injury))
names(summary)[2] <- 'fatal_and_injury'
summary$EVENT_TYPE  <- as.factor(summary$EVENT_TYPE)
levels(summary$EVENT_TYPE) <- levels(stormData$EVTYPE)
ord_summary <- summary[order(summary$fatal_and_injury,decreasing = T),]
m <- mean(ord_summary$fatal_and_injury)
subfi <- subset(ord_summary,fatal_and_injury>m)
median <- median(unique(StormDataEVFAT$fatal_and_injury))
subData <- subset(StormDataEVFAT,fatal_and_injury>median)

Plot : Fatalities and Injuries from Major Disasters

ggplot(subfi,aes(EVENT_TYPE,fatal_and_injury)) + geom_point(aes(colour=EVENT_TYPE)) + theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle('Fatalities and Injuries from Major Disasters ') 

# Results Top 10 adverse weather(events) conditions that cause fatalities and injury

top10 <- head(subfi, 10)
rownames(top10) <- 1:10
print(top10)
##           EVENT_TYPE fatal_and_injury
## 1            TORNADO            96979
## 2     EXCESSIVE HEAT             8428
## 3          TSTM WIND             7461
## 4              FLOOD             7259
## 5          LIGHTNING             6046
## 6               HEAT             3037
## 7        FLASH FLOOD             2755
## 8          ICE STORM             2064
## 9  THUNDERSTORM WIND             1621
## 10      WINTER STORM             1527

Question 2

Across the United States, which types of events have the greatest economic consequences?

Variables : Event Type Property and Corp Damagae

Create a data frame for Property and Corp

StormDataEVPC <- as.data.frame(cbind(stormData$EVTYPE, stormData$PROPDMG))
names(StormDataEVPC) <- c('EVENT_TYPE', 'Property' )

Factorise

StormDataEVPC$EVENT_TYPE <- as.factor(stormData$EVTYPE)
levels(StormDataEVPC$EVENT_TYPE) <- levels(stormData$EVTYPE)

Processing the data

summaryCP <- ddply (.data =StormDataEVPC, .(EVENT_TYPE), summarize,sum(Property))
names(summaryCP) [2] <- 'Property'
summaryCP$EVENT_TYPE  <- as.factor(summaryCP$EVENT_TYPE)
levels(summaryCP$EVENT_TYPE) <- levels(stormData$EVTYPE)
ord_summaryCP <- summaryCP[order(summaryCP$Property,decreasing = T),]
n <- mean(ord_summaryCP$Property)
subpc <- subset(ord_summaryCP,Property>n)
mediancp <- median(unique(StormDataEVPC$Property))
subDatacp <- subset(StormDataEVPC,Property>mediancp)

Plot 2 Economic Consequences from Major Disasters on Property

ggplot(subpc,aes(EVENT_TYPE,Property)) + geom_point(aes(colour=EVENT_TYPE)) + theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle('Economic Consequences from Major Disasters on Property ') 

Top 10 adverse weather(events) conditions that cause economic consequences due to Property Damage

top10 <- head(subpc, 10)
rownames(top10) <- 1:10
print(top10)
##            EVENT_TYPE  Property
## 1             TORNADO 3212258.2
## 2         FLASH FLOOD 1420124.6
## 3           TSTM WIND 1335965.6
## 4               FLOOD  899938.5
## 5   THUNDERSTORM WIND  876844.2
## 6                HAIL  688693.4
## 7           LIGHTNING  603351.8
## 8  THUNDERSTORM WINDS  446293.2
## 9           HIGH WIND  324731.6
## 10       WINTER STORM  132720.6

Plot 3 Economic Consequences from Major Disasters on Corps

Create a data frame for Property and Corp

StormDataEVPC <- as.data.frame(cbind(stormData$EVTYPE, stormData$CROPDMG))
names(StormDataEVPC) <- c('EVENT_TYPE', 'Corp' )

Factorise

StormDataEVPC$EVENT_TYPE <- as.factor(stormData$EVTYPE)
levels(StormDataEVPC$EVENT_TYPE) <- levels(stormData$EVTYPE)

Processing the data

summaryCR <- ddply (.data =StormDataEVPC, .(EVENT_TYPE), summarize,sum(Corp))
names(summaryCR) [2] <- 'Corp'
summaryCR$EVENT_TYPE  <- as.factor(summaryCR$EVENT_TYPE)
levels(summaryCR$EVENT_TYPE) <- levels(stormData$EVTYPE)
ord_summaryCR <- summaryCR[order(summaryCR$Corp,decreasing = T),]
n <- mean(ord_summaryCR$Corp)
subcr <- subset(ord_summaryCR,Corp>n)
mediancr <- median(unique(StormDataEVPC$Corp))
subDatacr <- subset(StormDataEVPC,Corp>mediancp)

Generating the Plot

ggplot(subcr,aes(EVENT_TYPE,Corp)) + geom_point(aes(colour=EVENT_TYPE)) + theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle('Economic Consequences from Major Disasters on Corp ') 

Top 10 adverse weather(events) conditions that cause economic consequences due to Crop Damage

top10 <- head(subcr, 10)
rownames(top10) <- 1:10
print(top10)
##            EVENT_TYPE      Corp
## 1                HAIL 579596.28
## 2         FLASH FLOOD 179200.46
## 3               FLOOD 168037.88
## 4           TSTM WIND 109202.60
## 5             TORNADO 100018.52
## 6   THUNDERSTORM WIND  66791.45
## 7             DROUGHT  33898.62
## 8  THUNDERSTORM WINDS  18684.93
## 9           HIGH WIND  17283.21
## 10         HEAVY RAIN  11122.80