Created by Varunesh Rai on March 22, 2015
In this report, we aim to analyze the impact of different weather events on public health and economy based on the storm database collected from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) from 1950 - 2011. We will use the estimates of fatalities, injuries, property and crop damage to decide which types of event are most harmful to the population health and economy.
echo=TRUE
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gridExtra)
## Loading required package: grid
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.0 (2015-02-19) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.19.0 (2015-02-27) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
##
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
##
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
##
## R.utils v2.0.0 (2015-02-28) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
##
## The following object is masked from 'package:utils':
##
## timestamp
##
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
if (!"repdata-data-StormData.csv" %in% dir())
{
download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile="repdata-data-StormData.csv.bz2")
bunzip2("repdata-data-StormData.csv.bz2", overwrite=T, remove=F)
}
data<-read.csv(file="repdata-data-StormData.csv",header=TRUE,stringsAsFactor=FALSE)
options(scipen = 999)
data_health<-aggregate(data[,c("FATALITIES","INJURIES")],list(data$EVTYPE),sum)
names(data_health)[1]<-"EVTYPE"
health_1<-select(data_health,EVTYPE,FATALITIES)
health_1<-arrange(health_1,desc(FATALITIES))
health_1<-head(health_1,n=10L)
health_2<-select(data_health,EVTYPE,INJURIES)
health_2<-arrange(health_2,desc(INJURIES))
health_2<-head(health_2,n=10L)
fig1<-ggplot(health_1,aes(x=EVTYPE,weight=FATALITIES)) + xlab("Weather Event Type") + ylab("Number of Fatalities") +
geom_bar(aes(x=EVTYPE,y=FATALITIES),stat="identity",fill="steelblue") + theme(axis.text.x = element_text(angle = 45,hjust = 1)) +
ggtitle("Total Fatalities by Severe Weather")
fig2<-ggplot(health_2,aes(x=EVTYPE,weight=INJURIES)) + xlab("Weather Event Type") + ylab("Number of Injuries") +
geom_bar(aes(x=EVTYPE,y=INJURIES),stat="identity",fill="steelblue") + theme(axis.text.x = element_text(angle = 45,hjust = 1)) +
ggtitle("Total Injuries by Severe Weather")
grid.arrange(fig1, fig2, ncol = 2)
TORNADO cause maximum number of fatalities and injuries.
options(scipen = 999)
data_economy<-select(data, EVTYPE, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
economy_1<-data_economy[toupper(data_economy$PROPDMGEXP)=="H" | toupper(data_economy$PROPDMGEXP)=="K" |
toupper(data_economy$PROPDMGEXP)=="M" |toupper(data_economy$PROPDMGEXP)=="B" |
toupper(data_economy$PROPDMGEXP)=="" | is.na(data_economy$PROPDMGEXP),c("EVTYPE", "PROPDMG", "PROPDMGEXP")]
economy_2<-data_economy[toupper(data_economy$CROPDMGEXP)=="H" | toupper(data_economy$CROPDMGEXP)=="K" |
toupper(data_economy$CROPDMGEXP)=="M" |toupper(data_economy$CROPDMGEXP)=="B" |
toupper(data_economy$CROPDMGEXP)=="" | is.na(data_economy$CROPDMGEXP),c("EVTYPE", "CROPDMG", "CROPDMGEXP")]
economy_1[toupper(economy_1$PROPDMGEXP)=="H","PROPDMGEXP"]<-2
economy_1[toupper(economy_1$PROPDMGEXP)=="K","PROPDMGEXP"]<-3
economy_1[toupper(economy_1$PROPDMGEXP)=="M","PROPDMGEXP"]<-6
economy_1[toupper(economy_1$PROPDMGEXP)=="B","PROPDMGEXP"]<-9
economy_1[toupper(economy_1$PROPDMGEXP)=="" | is.na(economy_1$PROPDMGEXP),"PROPDMGEXP"]<-0
economy_1$result <- economy_1$PROPDMG * 10^(as.numeric(economy_1$PROPDMGEXP))
eco1<-aggregate(economy_1$result,list(economy_1$EVTYPE),sum)
names(eco1)<-c("EVTYPE","PROPERTY_DAMAGE")
eco1<- arrange(eco1,desc(PROPERTY_DAMAGE))
eco1<-head(eco1,n=10L)
economy_2[toupper(economy_2$CROPDMGEXP)=="H","CROPDMGEXP"]<-2
economy_2[toupper(economy_2$CROPDMGEXP)=="K","CROPDMGEXP"]<-3
economy_2[toupper(economy_2$CROPDMGEXP)=="M","CROPDMGEXP"]<-6
economy_2[toupper(economy_2$CROPDMGEXP)=="B","CROPDMGEXP"]<-9
economy_2[toupper(economy_2$CROPDMGEXP)=="" | is.na(economy_2$CROPDMGEXP),"CROPDMGEXP"]<-0
economy_2$result <- economy_2$CROPDMG * 10^(as.numeric(economy_2$CROPDMGEXP))
eco2<-aggregate(economy_2$result,list(economy_2$EVTYPE),sum)
names(eco2)<-c("EVTYPE","CROP_DAMAGE")
eco2<- arrange(eco2,desc(CROP_DAMAGE))
eco2<-head(eco2,n=10L)
fig3<-ggplot(eco1, aes(x=EVTYPE,weight = PROPERTY_DAMAGE)) +
geom_bar(aes(x=EVTYPE, y=PROPERTY_DAMAGE),stat="identity",fill="steelblue") +
ylab("Property Damage (US$)") +
xlab("Weather Event Type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
ggtitle("Total property loss by Severe Weather")
fig4<-ggplot(eco2, aes(x=EVTYPE,weight = CROP_DAMAGE)) +
geom_bar(aes(x=EVTYPE, y=CROP_DAMAGE),stat="identity",fill="steelblue") +
ylab("Crop Damage (US$)") +
xlab("Weather Event Type") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
ggtitle("Total Crop loss by Severe Weather")
grid.arrange(fig3, fig4, ncol = 2)
FLOODS cause maximum property damage whereas DROUGHT cause maximum crop damage.
Weather event causing the most number of fatalities - TORNADO
Weather event causing the most number of injuries - TORNADO
Weather event causing the most number of property damage - FLOODS
Weather event causing the most number of crop damage - DROUGHT