knitr::opts_chunk$set(echo = FALSE)

Title

Storm Event Impact on Health and Economy

Synopsis

The goal of this project is to assess the impact of various storm events on public health and economy of various communities and municiplaities in the United states using the data available from the National Oceanic and Atmospheric Administartins storm database.

The impact on health is analyzed using the fatalities and injuries columns provided in the dataset.

The impact on economy is analyzed using the crop damage and property damage columns provided in the dataset.

The sections below describe in detail all the individual steps and code written for this analysis.

Data Processing

I downloaded the data from the NOAA Storm Database and read it using the read.csv function and explored whats in the data set.

Downloading the data

data_url<- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(data_url,"StormData.csv.bz2")
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.22.0 (2018-04-21) successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods
## The following objects are masked from 'package:base':
## 
##     attach, detach, gc, load, save
## R.utils v2.7.0 successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
## 
##     timestamp
## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, inherits, isOpen, parse, warnings
bunzip2("StormData.csv.bz2","StromData.csv")

Reading the data

Stormdata<- read.csv("StormData.csv",sep=",",header=TRUE,stringsAsFactors = FALSE)
head(Stormdata)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6

Preprocessing the data

Creating a new dataset with columns related to health and economy

Health_Economy <- Stormdata[,c("BGN_DATE","EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]

Creating a subset for health using fatalities and injuries coloumns

Health <- subset(Health_Economy,!Health_Economy$FATALITIES == 0 & !Health_Economy$INJURIES== 0, select =c(EVTYPE, FATALITIES, INJURIES))
head(Health)
##     EVTYPE FATALITIES INJURIES
## 9  TORNADO          1       14
## 13 TORNADO          1       26
## 16 TORNADO          4       50
## 26 TORNADO          1        8
## 34 TORNADO          6      195
## 36 TORNADO          7       12

Creating a subset for Economy using Property Damage and Crop Damage columns

Economy <- subset(Health_Economy, !Health_Economy$PROPDMG == 0 & !Health_Economy$CROPDMG == 0, select=c(EVTYPE, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP))
head(Economy)
##                           EVTYPE PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 187566 HURRICANE OPAL/HIGH WINDS     0.1          B      10          M
## 187571        THUNDERSTORM WINDS     5.0          M     500          K
## 187581            HURRICANE ERIN    25.0          M       1          M
## 187583            HURRICANE OPAL    48.0          M       4          M
## 187584            HURRICANE OPAL    20.0          m      10          m
## 187653        THUNDERSTORM WINDS    50.0          K      50          K

Organizing the data

Seperating the fatalities and Injuries data sets so we can look at the impact on each of these variable individually

Health_Fatalities<- aggregate(Health$FATALITIES, by = list(Health$EVTYPE), FUN = sum)
colnames(Health_Fatalities)<- c("EVTYPE","FATALITIES")
Health_Injuries <- aggregate(Health$INJURIES, by = list(Health$EVTYPE), FUN = sum)
colnames(Health_Injuries)<- c("EVTYPE","INJURIES")

Reordering the data in decreasing order and including the top 10 events

Health_Fatalities<-Health_Fatalities[order(Health_Fatalities$FATALITIES,decreasing=TRUE),][1:10,]
Health_Injuries<-Health_Injuries[order(Health_Injuries$INJURIES,decreasing=TRUE),][1:10,]

Further Subsetting economy data by including “K”,“k”,“M”,“m”,“B”, or “b” based on the information provided in the documentation of the database

Economy <- subset(Economy, Economy$PROPDMGEXP== "K" | Economy$PROPDMGEXP == "k" | Economy$PROPDMGEXP =="M" | Economy$PROPDMGEXP == "m" | Economy$PROPDMGEXP =="B" | Economy$PROPDMGEXP == "b")
Economy <- subset(Economy, Economy$CROPDMGEXP== "K" | Economy$CROPDMGEXP == "k" | Economy$CROPDMGEXP =="M" | Economy$CROPDMGEXP == "m" | Economy$CROPDMGEXP =="B" | Economy$CROPDMGEXP == "b")

Converting the values in economy to numbers using the information provided in the documentation

Economy$PROPDMGEXP <- gsub("m",1e+06,Economy$PROPDMGEXP,ignore.case=TRUE)
Economy$PROPDMGEXP <- gsub("k",1000,Economy$PROPDMGEXP,ignore.case=TRUE)
Economy$PROPDMGEXP <- gsub("b",1e+09,Economy$PROPDMGEXP,ignore.case=TRUE)
Economy$PROPDMGEXP <- as.numeric(Economy$PROPDMGEXP)
Economy$CROPDMGEXP <- gsub("m",1e+06,Economy$CROPDMGEXP,ignore.case=TRUE)
Economy$CROPDMGEXP <- gsub("k",1000,Economy$CROPDMGEXP,ignore.case=TRUE)
Economy$CROPDMGEXP <- gsub("b",1e+09,Economy$CROPDMGEXP,ignore.case=TRUE)
Economy$CROPDMGEXP <- as.numeric(Economy$CROPDMGEXP)

Total of all the damamges (property and crop)

Economy$ALLDAMAGE <- (Economy$CROPDMG * Economy$CROPDMGEXP) + (Economy$PROPDMG * Economy$PROPDMGEXP)
Economy <- aggregate(Economy$ALLDAMAGE, by=list(Economy$EVTYPE), FUN = sum)
colnames(Economy) <- c("EVTYPE","ALLDAMAGE")
Economy<- Economy[order(Economy$ALLDAMAGE,decreasing=TRUE),][1:10,]

loading the library required to make the plots

library(ggplot2)

Results

Making three plots

  1. Fatalities Vs Event type
  2. Injuries Vs Event type
  3. All damages Vs Event type

Based on these results we can conclude that 1) Tornadoes were most harmful for public health 2) Floods were most hamrful for ecomony

ggplot()+ geom_bar(data= Health_Fatalities,aes(x=EVTYPE, y=FATALITIES, fill= interaction(FATALITIES, EVTYPE)), stat="identity",show.legend=F) + theme(axis.text.x=element_text(angle=30,hjust=1))+ xlab("Event types")+ ylab("Fatalities")+ ggtitle("Impact of storm events on fatalities")

ggplot()+ geom_bar(data= Health_Injuries,aes(x=EVTYPE, y=INJURIES, fill= interaction(INJURIES, EVTYPE)), stat="identity",show.legend=F) + theme(axis.text.x=element_text(angle=30,hjust=1))+ xlab("Event types")+ ylab("Injuries")+ ggtitle("Impact of storm events on Injuries")

ggplot()+ geom_bar(data=Economy, aes(x=EVTYPE, y=ALLDAMAGE,fill=interaction(ALLDAMAGE,EVTYPE)),stat="identity",show.legend=F)+ theme(axis.text.x=element_text(angle=30,hjust=1))+ xlab("Events")+ ylab("Economy")+ ggtitle("Impact of storm events on economy")