this analysis is for prepareing to prevent the population health from severe weather events. the goal of this analysis is to explore the storm data from NOAA Storm Database and explore the effects of severe weather events on both population health and economic.
the data be used in this analysis are covered from year 1950 and 2011 Nov.In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years should be considered more complete.
Finally, this analysis aim to investigate which types of events are most harmful on population health and what types of events have caused the greatest economic consequences across the united states. To achieve this purpose, We will explore related data to answer below question.
The data can be downloaded from the course website:StormData. There is also some documentation of the database available. Here you will find how some of the variables are constructed/defined. - National Weather Service Storm Data Documentation - National Climatic Data Center Storm Events FAQ
the following packages will be required.
library(ggplot2)
library(gridExtra)
The first step is to read the data. you should set working directory you work now.
# setwd("E:/github/datascientist/coursera_reproducible_research")
setwd("C:/Users/acorn/git/data scientist/coursera_reproducible_research")
# storm_data <- read.csv("repdata-data-StormData.csv",stringsAsFactors = FALSE)
stormdata <- read.csv(bzfile("repdata-data-StormData.csv.bz2"),sep=",",header = T,stringsAsFactors = F)
after I read the data, Subsetting the needed column for analysis.
storm_data_sub <- stormdata[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
head(storm_data_sub)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0.00 15.00 25.00 K 0.00
## 2 TORNADO 0.00 0.00 2.50 K 0.00
## 3 TORNADO 0.00 2.00 25.00 K 0.00
## 4 TORNADO 0.00 2.00 2.50 K 0.00
## 5 TORNADO 0.00 2.00 2.50 K 0.00
## 6 TORNADO 0.00 6.00 2.50 K 0.00
stormdatasub<-storm_data_sub[1:547364,]
stormdatasub$FATALITIES <- as.numeric(stormdatasub$FATALITIES)
stormdatasub$INJURIES <- as.numeric(stormdatasub$INJURIES)
fatalities <- aggregate(stormdatasub$FATALITIES,by=list(stormdatasub$EVTYPE),sum)
names(fatalities)<- c("EVTYPE","fatalities")
fatalities <- fatalities[order(fatalities$fatalities,decreasing = T),]
injuries <- aggregate(stormdatasub$INJURIES,by=list(stormdatasub$EVTYPE),sum)
names(injuries)<- c("EVTYPE","injuries")
injuries <- injuries[order(injuries$injuries,decreasing = T),]
Second, i also have prepared the data for Economic consequences. Since the exponential value like propdmgexp, cropdmgexp has stored by letters(h = hundred, k = thousand, m = million, b = billion), so that it should be converted to number. for this, i have created a converting function.
exp <- function(e){
if(e %in% c("h","H"))
return(2)
else if(e %in% c("k","K"))
return(3)
else if(e %in% c("m","M"))
return(6)
else if(e %in% c("b","B"))
return(9)
else if(!is.na(as.numeric(e)))
return(suppressWarnings(as.numeric(e)))
else if(e %in% c("?","","+","-"))
return(0)
else{
stop("invalid value")
}
}
after this is done, i have created new column(propdmg, cropdmg).
proexp <- sapply(stormdatasub$PROPDMGEXP,FUN=exp)
stormdatasub$PROPDMG <- as.numeric(stormdatasub$PROPDMG)
stormdatasub$propdamage<-stormdatasub$PROPDMG*(10**proexp)
cropexp <- sapply(stormdatasub$CROPDMGEXP,FUN=exp)
stormdatasub$CROPDMG <- as.numeric(stormdatasub$CROPDMG)
stormdatasub$cropdamage<-stormdatasub$CROPDMG*(10**cropexp)
and i have made the data for Economic consequences.
propdamage <- aggregate(stormdatasub$propdamage,by=list(stormdatasub$EVTYPE),sum)
names(propdamage) <- c("EVTYPE","propdamage")
propdamage <- propdamage[order(propdamage$propdamage,decreasing = T),]
cropdamage <- aggregate(stormdatasub$cropdamage,by=list(stormdatasub$EVTYPE),sum)
names(cropdamage) <- c("EVTYPE","cropdamage")
cropdamage <- cropdamage[order(cropdamage$cropdamage,decreasing = T),]
fatalities$EVTYPE <- factor(fatalities$EVTYPE,levels = fatalities$EVTYPE)
fatal <- fatalities[1:10,]
g1<-ggplot(fatal,aes(x=reorder(EVTYPE,fatalities),y=fatalities,fill=fatalities))+
geom_bar(stat="identity")+
coord_flip()+
geom_text(aes(label=fatalities))+
theme(axis.text.x=element_text(angle=90))+
xlab("EVTYPE")+
ylab("Number of fatalities")+
ggtitle("Weather type TOP10 that affected the population health in US")
injuries$EVTYPE <- factor(injuries$EVTYPE,levels = injuries$EVTYPE)
inju <- injuries[1:10,]
g2<-ggplot(inju,aes(x=reorder(EVTYPE,injuries),y=injuries,fill=injuries))+
geom_bar(stat="identity")+
coord_flip()+
geom_text(aes(label=injuries))+
theme(axis.text.x=element_text(angle=90))+
xlab("EVTYPE")+
ylab("Number of injuries")
grid.arrange(g1,g2, nrow=2)
propdamage$EVTYPE <- factor(propdamage$EVTYPE,levels = propdamage$EVTYPE)
propdmg <- propdamage[1:10,]
g3<-ggplot(propdmg,aes(x=reorder(EVTYPE,propdamage),y=propdamage))+
geom_bar(stat="identity",fill="orange")+
coord_flip()+
geom_text(aes(label=propdamage))+
theme(axis.text.x=element_text(angle=0))+
xlab("EVTYPE")+
ylab("the economic damage of property by weather type(US dollar)")+
ggtitle("Weather type TOP10 that affected the economic consequences in US")
cropdamage$EVTYPE <- factor(cropdamage$EVTYPE,levels = cropdamage$EVTYPE)
cropdmg <- cropdamage[1:10,]
g4<-ggplot(cropdmg,aes(x=reorder(EVTYPE,cropdamage),y=cropdamage))+
geom_bar(stat="identity",fill="red")+
coord_flip()+
geom_text(aes(label=cropdamage))+
theme(axis.text.x=element_text(angle=0))+
xlab("EVTYPE")+
ylab("the economic damage of crops by weather type(US dollar)")
grid.arrange(g3,g4,nrow=2)