Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
“The data for this assignment come in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size. You can download the file from the course web site.”
download.file('https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2',destfile = 'stormData.csv.bz2',method = 'curl',mode = 'w')
strmDataZip <- 'stormData.csv.bz2'
stormDataFile <- bzfile(description=strmDataZip, open="r")
stormData <- read.csv(stormDataFile,fill = TRUE,header = T)
close(stormDataFile)
Process the data into a workable format.
View(stormData)
Load R packages :
library(plyr)
## Warning: package 'plyr' was built under R version 3.4.1
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(data.table)
## Warning: package 'data.table' was built under R version 3.4.4
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.4
Across the United States, which types of events (EVTYPE variable) are most harmful with respect to population health?
Variables : Event.Type
Fatalities and injuries
Create a data frame on Event Type and fataly and injury .
StormDataEVFAT <- as.data.frame(cbind(stormData$EVTYPE, stormData$FATALITIES+stormData$INJURIES))
names(StormDataEVFAT) <- c('EVENT_TYPE','fatal_and_injury')
Factorise
StormDataEVFAT$EVENT_TYPE <- as.factor(stormData$EVTYPE)
levels(StormDataEVFAT$EVENT_TYPE) <- levels(stormData$EVTYPE)
Processing the data
summary <- ddply(.data = StormDataEVFAT,.(EVENT_TYPE),summarize,sum(fatal_and_injury))
names(summary)[2] <- 'fatal_and_injury'
summary$EVENT_TYPE <- as.factor(summary$EVENT_TYPE)
levels(summary$EVENT_TYPE) <- levels(stormData$EVTYPE)
ord_summary <- summary[order(summary$fatal_and_injury,decreasing = T),]
m <- mean(ord_summary$fatal_and_injury)
subfi <- subset(ord_summary,fatal_and_injury>m)
median <- median(unique(StormDataEVFAT$fatal_and_injury))
subData <- subset(StormDataEVFAT,fatal_and_injury>median)
ggplot(subfi,aes(EVENT_TYPE,fatal_and_injury)) + geom_point(aes(colour=EVENT_TYPE)) + theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle('Fatalities and Injuries from Major Disasters ')
# Results Top 10 adverse weather(events) conditions that cause fatalities and injury
top10 <- head(subfi, 10)
rownames(top10) <- 1:10
print(top10)
## EVENT_TYPE fatal_and_injury
## 1 TORNADO 96979
## 2 EXCESSIVE HEAT 8428
## 3 TSTM WIND 7461
## 4 FLOOD 7259
## 5 LIGHTNING 6046
## 6 HEAT 3037
## 7 FLASH FLOOD 2755
## 8 ICE STORM 2064
## 9 THUNDERSTORM WIND 1621
## 10 WINTER STORM 1527
Across the United States, which types of events have the greatest economic consequences?
Variables : Event Type Property and Corp Damagae
Create a data frame for Property and Corp
StormDataEVPC <- as.data.frame(cbind(stormData$EVTYPE, stormData$PROPDMG))
names(StormDataEVPC) <- c('EVENT_TYPE', 'Property' )
Factorise
StormDataEVPC$EVENT_TYPE <- as.factor(stormData$EVTYPE)
levels(StormDataEVPC$EVENT_TYPE) <- levels(stormData$EVTYPE)
Processing the data
summaryCP <- ddply (.data =StormDataEVPC, .(EVENT_TYPE), summarize,sum(Property))
names(summaryCP) [2] <- 'Property'
summaryCP$EVENT_TYPE <- as.factor(summaryCP$EVENT_TYPE)
levels(summaryCP$EVENT_TYPE) <- levels(stormData$EVTYPE)
ord_summaryCP <- summaryCP[order(summaryCP$Property,decreasing = T),]
n <- mean(ord_summaryCP$Property)
subpc <- subset(ord_summaryCP,Property>n)
mediancp <- median(unique(StormDataEVPC$Property))
subDatacp <- subset(StormDataEVPC,Property>mediancp)
ggplot(subpc,aes(EVENT_TYPE,Property)) + geom_point(aes(colour=EVENT_TYPE)) + theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle('Economic Consequences from Major Disasters on Property ')
Top 10 adverse weather(events) conditions that cause economic consequences due to Property Damage
top10 <- head(subpc, 10)
rownames(top10) <- 1:10
print(top10)
## EVENT_TYPE Property
## 1 TORNADO 3212258.2
## 2 FLASH FLOOD 1420124.6
## 3 TSTM WIND 1335965.6
## 4 FLOOD 899938.5
## 5 THUNDERSTORM WIND 876844.2
## 6 HAIL 688693.4
## 7 LIGHTNING 603351.8
## 8 THUNDERSTORM WINDS 446293.2
## 9 HIGH WIND 324731.6
## 10 WINTER STORM 132720.6
Create a data frame for Property and Corp
StormDataEVPC <- as.data.frame(cbind(stormData$EVTYPE, stormData$CROPDMG))
names(StormDataEVPC) <- c('EVENT_TYPE', 'Corp' )
Factorise
StormDataEVPC$EVENT_TYPE <- as.factor(stormData$EVTYPE)
levels(StormDataEVPC$EVENT_TYPE) <- levels(stormData$EVTYPE)
Processing the data
summaryCR <- ddply (.data =StormDataEVPC, .(EVENT_TYPE), summarize,sum(Corp))
names(summaryCR) [2] <- 'Corp'
summaryCR$EVENT_TYPE <- as.factor(summaryCR$EVENT_TYPE)
levels(summaryCR$EVENT_TYPE) <- levels(stormData$EVTYPE)
ord_summaryCR <- summaryCR[order(summaryCR$Corp,decreasing = T),]
n <- mean(ord_summaryCR$Corp)
subcr <- subset(ord_summaryCR,Corp>n)
mediancr <- median(unique(StormDataEVPC$Corp))
subDatacr <- subset(StormDataEVPC,Corp>mediancp)
Generating the Plot
ggplot(subcr,aes(EVENT_TYPE,Corp)) + geom_point(aes(colour=EVENT_TYPE)) + theme(legend.position="none",axis.text.x = element_text(angle = 90, hjust = 1)) + ggtitle('Economic Consequences from Major Disasters on Corp ')
Top 10 adverse weather(events) conditions that cause economic consequences due to Crop Damage
top10 <- head(subcr, 10)
rownames(top10) <- 1:10
print(top10)
## EVENT_TYPE Corp
## 1 HAIL 579596.28
## 2 FLASH FLOOD 179200.46
## 3 FLOOD 168037.88
## 4 TSTM WIND 109202.60
## 5 TORNADO 100018.52
## 6 THUNDERSTORM WIND 66791.45
## 7 DROUGHT 33898.62
## 8 THUNDERSTORM WINDS 18684.93
## 9 HIGH WIND 17283.21
## 10 HEAVY RAIN 11122.80