knitr::opts_chunk$set(echo = FALSE)
Storm Event Impact on Health and Economy
The goal of this project is to assess the impact of various storm events on public health and economy of various communities and municiplaities in the United states using the data available from the National Oceanic and Atmospheric Administartins storm database.
The impact on health is analyzed using the fatalities and injuries columns provided in the dataset.
The impact on economy is analyzed using the crop damage and property damage columns provided in the dataset.
The sections below describe in detail all the individual steps and code written for this analysis.
I downloaded the data from the NOAA Storm Database and read it using the read.csv function and explored whats in the data set.
data_url<- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(data_url,"StormData.csv.bz2")
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.22.0 (2018-04-21) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
## R.utils v2.7.0 successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
bunzip2("StormData.csv.bz2","StromData.csv")
Stormdata<- read.csv("StormData.csv",sep=",",header=TRUE,stringsAsFactors = FALSE)
head(Stormdata)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
Creating a new dataset with columns related to health and economy
Health_Economy <- Stormdata[,c("BGN_DATE","EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
Creating a subset for health using fatalities and injuries coloumns
Health <- subset(Health_Economy,!Health_Economy$FATALITIES == 0 & !Health_Economy$INJURIES== 0, select =c(EVTYPE, FATALITIES, INJURIES))
head(Health)
## EVTYPE FATALITIES INJURIES
## 9 TORNADO 1 14
## 13 TORNADO 1 26
## 16 TORNADO 4 50
## 26 TORNADO 1 8
## 34 TORNADO 6 195
## 36 TORNADO 7 12
Creating a subset for Economy using Property Damage and Crop Damage columns
Economy <- subset(Health_Economy, !Health_Economy$PROPDMG == 0 & !Health_Economy$CROPDMG == 0, select=c(EVTYPE, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP))
head(Economy)
## EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 187566 HURRICANE OPAL/HIGH WINDS 0.1 B 10 M
## 187571 THUNDERSTORM WINDS 5.0 M 500 K
## 187581 HURRICANE ERIN 25.0 M 1 M
## 187583 HURRICANE OPAL 48.0 M 4 M
## 187584 HURRICANE OPAL 20.0 m 10 m
## 187653 THUNDERSTORM WINDS 50.0 K 50 K
Seperating the fatalities and Injuries data sets so we can look at the impact on each of these variable individually
Health_Fatalities<- aggregate(Health$FATALITIES, by = list(Health$EVTYPE), FUN = sum)
colnames(Health_Fatalities)<- c("EVTYPE","FATALITIES")
Health_Injuries <- aggregate(Health$INJURIES, by = list(Health$EVTYPE), FUN = sum)
colnames(Health_Injuries)<- c("EVTYPE","INJURIES")
Reordering the data in decreasing order and including the top 10 events
Health_Fatalities<-Health_Fatalities[order(Health_Fatalities$FATALITIES,decreasing=TRUE),][1:10,]
Health_Injuries<-Health_Injuries[order(Health_Injuries$INJURIES,decreasing=TRUE),][1:10,]
Further Subsetting economy data by including “K”,“k”,“M”,“m”,“B”, or “b” based on the information provided in the documentation of the database
Economy <- subset(Economy, Economy$PROPDMGEXP== "K" | Economy$PROPDMGEXP == "k" | Economy$PROPDMGEXP =="M" | Economy$PROPDMGEXP == "m" | Economy$PROPDMGEXP =="B" | Economy$PROPDMGEXP == "b")
Economy <- subset(Economy, Economy$CROPDMGEXP== "K" | Economy$CROPDMGEXP == "k" | Economy$CROPDMGEXP =="M" | Economy$CROPDMGEXP == "m" | Economy$CROPDMGEXP =="B" | Economy$CROPDMGEXP == "b")
Converting the values in economy to numbers using the information provided in the documentation
Economy$PROPDMGEXP <- gsub("m",1e+06,Economy$PROPDMGEXP,ignore.case=TRUE)
Economy$PROPDMGEXP <- gsub("k",1000,Economy$PROPDMGEXP,ignore.case=TRUE)
Economy$PROPDMGEXP <- gsub("b",1e+09,Economy$PROPDMGEXP,ignore.case=TRUE)
Economy$PROPDMGEXP <- as.numeric(Economy$PROPDMGEXP)
Economy$CROPDMGEXP <- gsub("m",1e+06,Economy$CROPDMGEXP,ignore.case=TRUE)
Economy$CROPDMGEXP <- gsub("k",1000,Economy$CROPDMGEXP,ignore.case=TRUE)
Economy$CROPDMGEXP <- gsub("b",1e+09,Economy$CROPDMGEXP,ignore.case=TRUE)
Economy$CROPDMGEXP <- as.numeric(Economy$CROPDMGEXP)
Total of all the damamges (property and crop)
Economy$ALLDAMAGE <- (Economy$CROPDMG * Economy$CROPDMGEXP) + (Economy$PROPDMG * Economy$PROPDMGEXP)
Economy <- aggregate(Economy$ALLDAMAGE, by=list(Economy$EVTYPE), FUN = sum)
colnames(Economy) <- c("EVTYPE","ALLDAMAGE")
Economy<- Economy[order(Economy$ALLDAMAGE,decreasing=TRUE),][1:10,]
loading the library required to make the plots
library(ggplot2)
Making three plots
Based on these results we can conclude that 1) Tornadoes were most harmful for public health 2) Floods were most hamrful for ecomony
ggplot()+ geom_bar(data= Health_Fatalities,aes(x=EVTYPE, y=FATALITIES, fill= interaction(FATALITIES, EVTYPE)), stat="identity",show.legend=F) + theme(axis.text.x=element_text(angle=30,hjust=1))+ xlab("Event types")+ ylab("Fatalities")+ ggtitle("Impact of storm events on fatalities")
ggplot()+ geom_bar(data= Health_Injuries,aes(x=EVTYPE, y=INJURIES, fill= interaction(INJURIES, EVTYPE)), stat="identity",show.legend=F) + theme(axis.text.x=element_text(angle=30,hjust=1))+ xlab("Event types")+ ylab("Injuries")+ ggtitle("Impact of storm events on Injuries")
ggplot()+ geom_bar(data=Economy, aes(x=EVTYPE, y=ALLDAMAGE,fill=interaction(ALLDAMAGE,EVTYPE)),stat="identity",show.legend=F)+ theme(axis.text.x=element_text(angle=30,hjust=1))+ xlab("Events")+ ylab("Economy")+ ggtitle("Impact of storm events on economy")