This report is based on the National Oceanic and Atmospheric Association (NOAA) storm data to analyze the impact on the resources which causes economic damage and affect on the human health.
setwd("C:\\Tejo\\Datascience\\ReproducibleResearch\\Week4")
getwd()
## [1] "C:/Tejo/Datascience/ReproducibleResearch/Week4"
Load the data which into variable stormdata which is already downloaded to the current working directory.
stormdata<-read.csv("repdata_data_StormData.csv.bz2", header = TRUE, sep = ",")
Since we are analyzing the data related to Property and human health, lets subset the data.
##EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP"
stormdata<-stormdata[, c(8, 23, 24, 25, 26, 27, 28)]
let’s see how the top 6 records looks like
head(stormdata)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
load all the required libraries.
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape2)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.1
library(digest)
## Warning: package 'digest' was built under R version 3.3.1
stormdata$PROPDMGEXP <- tolower(stormdata$PROPDMGEXP)
stormdata$CROPDMGEXP <- tolower(stormdata$CROPDMGEXP)
stormdata$PROPDMGEXP[stormdata$PROPDMGEXP %in% c("+","0","?","-","")] <- 0
stormdata$PROPDMGEXP[stormdata$PROPDMGEXP == "k"] <- 3
stormdata$PROPDMGEXP[stormdata$PROPDMGEXP == "m"] <- 6
stormdata$PROPDMGEXP[stormdata$PROPDMGEXP == "h"] <- 2
stormdata$PROPDMGEXP[stormdata$PROPDMGEXP == "b"] <- 9
stormdata$CROPDMGEXP[stormdata$CROPDMGEXP %in% c("","?")] <- 0
stormdata$CROPDMGEXP[stormdata$CROPDMGEXP == "m"] <- 6
stormdata$CROPDMGEXP[stormdata$CROPDMGEXP == "k"] <- 3
stormdata$CROPDMGEXP[stormdata$CROPDMGEXP == "b"] <- 9
stormdata$PROPDMGEXP <- as.numeric(stormdata$PROPDMGEXP)
stormdata$CROPDMGEXP <- as.numeric(stormdata$CROPDMGEXP)
stormdata$CROP <- stormdata$CROPDMG * 10^(stormdata$CROPDMGEXP)
stormdata$PROP <- stormdata$PROPDMG * 10^(stormdata$PROPDMGEXP)
aggstormdata <- aggregate(cbind(FATALITIES,INJURIES,PROP,CROP)~EVTYPE, stormdata , sum)
stormdata.filtered <- aggstormdata[aggstormdata$FATALITIES>0|aggstormdata$INJURIES>0|aggstormdata$PROP>0|aggstormdata$CROP>0,]
stormdata.filtered$EVTYPE <- tolower(stormdata.filtered$EVTYPE)
stormdata.filtered$EVTYPE[grep("avalan",stormdata.filtered$EVTYPE)] = "avalanche"
stormdata.filtered$EVTYPE[grep("snow",stormdata.filtered$EVTYPE)] = "snow"
stormdata.filtered$EVTYPE[grep("heat",stormdata.filtered$EVTYPE)] = "heat"
stormdata.filtered$EVTYPE[grep("cold",stormdata.filtered$EVTYPE)] = "cold"
stormdata.filtered$EVTYPE[grep("drought|dry",stormdata.filtered$EVTYPE)] = "drought"
stormdata.filtered$EVTYPE[grep("flood",stormdata.filtered$EVTYPE)] = "flood"
stormdata.filtered$EVTYPE[grep("rain",stormdata.filtered$EVTYPE)] = "rain"
stormdata.filtered$EVTYPE[grep("wind",stormdata.filtered$EVTYPE)] = "wind"
stormdata.filtered$EVTYPE[grep("hurricane",stormdata.filtered$EVTYPE)] = "hurricane"
stormdata.filtered$EVTYPE[grep("ic[ey]",stormdata.filtered$EVTYPE)] = "ice"
stormdata.filtered$EVTYPE[grep("light[n ]ing",stormdata.filtered$EVTYPE)] = "lightning"
stormdata.filtered$EVTYPE[grep("winter",stormdata.filtered$EVTYPE)] = "winter"
stormdata.filtered$EVTYPE[grep("thunderstorm",stormdata.filtered$EVTYPE)] = "thunderstorm"
stormdata.filtered$EVTYPE[grep("tornado",stormdata.filtered$EVTYPE)] = "tornado"
stormdata.filtered$EVTYPE[grep("wild(.*)fire",stormdata.filtered$EVTYPE)] = "wildfire"
stormdata.filtered$EVTYPE[grep("free",stormdata.filtered$EVTYPE)] = "freeze"
stormdata.filtered$EVTYPE[grep("[^r]storm|^storm",stormdata.filtered$EVTYPE)] = "storm"
stormdata.filtered$EVTYPE[grep("mud",stormdata.filtered$EVTYPE)] = "mudslide"
stormdata.filtered$EVTYPE[grep("hail",stormdata.filtered$EVTYPE)] = "hail"
stormimpacted.economy <- stormdata.filtered[stormdata.filtered$PROP>0|stormdata.filtered$CROP>0,c("EVTYPE","PROP","CROP")]
stormimpacted.economy$totDMG <- stormimpacted.economy$PROP + stormimpacted.economy$CROP
stormimpacted.economyTop10 <- stormimpacted.economy[order(stormimpacted.economy$totDMG,decreasing=TRUE)[1:10],]
Meltstormimpacted.economy <- melt(stormimpacted.economyTop10[,1:3], id.vars=c("EVTYPE"))
Meltstormimpacted.economy$EVTYPE <- factor(Meltstormimpacted.economy$EVTYPE,levels=unique(Meltstormimpacted.economy$EVTYPE))
casualties<-aggregate(stormdata[,c("FATALITIES", "INJURIES")], by=list(stormdata$EVTYPE), sum)
casualties <- rename(casualties, EVTYPE = Group.1)
casualties$totalCasualities <- casualties$FATALITIES + casualties$INJURIES
Top10Casualities <- casualties[order(casualties$totalCasualities,decreasing=TRUE)[1:10],]
Top10Casualities$EVTYPE <- tolower(Top10Casualities$EVTYPE)
meltCasualities<- melt(Top10Casualities[,1:3], id.vars=c("EVTYPE"))
meltCasualities$EVTYPE <- factor(meltCasualities$EVTYPE,levels=unique(meltCasualities$EVTYPE))
Question 1: Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
ggplot(meltCasualities,aes(x=EVTYPE,y=value,fill=variable)) +
geom_bar(position="dodge",stat="identity") +
ggtitle("The 10 most harmful events to population health") +
labs(x="",y="Number of people affected") +
theme(axis.text.x=element_text(angle=45,hjust=1))
Question 2: Across the United States, which types of events have the greatest economic consequences?
ggplot(Meltstormimpacted.economy,aes(x=EVTYPE,y=value,fill=variable)) +
geom_bar(position="dodge",stat="identity") +
ggtitle("The 10 events caused the most economic damage") +
labs(x="",y="Economic damge ($)") +
theme(axis.text.x=element_text(angle=45,hjust=1))