Synopsis

Severe weather can have huge negative impacts on the society, in terms of both economics and health. The purpose of this study is to investigate the damages caused by different weather events. Toward that end, I will analyze the Storm Data Set from the course website to quantify the impacts. I will especially look at the health costs in terms of injuries and fatalities and economic costs in terms of property and crop damages in dollar terms. The study will identify top 10 events that have the highest economic and health costs.

Data Processing

The data was downloaded from the coursera website. I first load and extract the relevant variables which include “EVTYPE”, “FATALITIES”, “INJURIES”, “PROPDMG”, “PROPDMGEXP”,“CROPDMG” and “CROPDMGEXP”

library(dplyr)
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
dat <- read.csv('repdata-data-StormData.csv.bz2',header = T)
#select relevant variables
dat1 <- dat %>% select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

For PROPEXP and CROPEXP, I keep the values ‘B’, ‘m’,‘M’, ‘k’, ‘K’, ‘h’,‘H’, and change other values to ‘0’ and then transform them to numeric values as follows: ‘B’ for billion, ‘m’ and ‘M’ for million, ‘k’ and ‘K’ for thousand, ‘h’ and ‘H’ for hundred, and ‘0’ for one. The property cost and crop cost are calculated, respectively, by PROPDMG*PROPEXP and CROPDMG*CROPEXP. Heath damages are proxied by the numbers of fatalities and injuries.

dat1$PROPDMGEXP <- ifelse(dat1$PROPDMGEXP %in% c('B', 'h','H','K','m','M'), as.character(dat1$PROPDMGEXP), '0')
dat1$CROPDMGEXP <- ifelse(dat1$CROPDMGEXP %in% c('B', 'k','K','m','M'), as.character(dat1$CROPDMGEXP), '0')

dat1$PROPDMGEXP <- plyr::mapvalues(dat1$PROPDMGEXP, from = c('B', 'h','H','K','m','M', '0'), 
                             to = c(10^9, 10^2, 10^2, 10^3, 10^6, 10^6, 1))
dat1$CROPDMGEXP <- plyr::mapvalues(dat1$CROPDMGEXP, from = c('B', 'k','K','m','M','0'), 
                             to = c(10^9, 10^3, 10^3, 10^6, 10^6, 1))
dat1$PROPDMGEXP <- as.numeric(dat1$PROPDMGEXP)
dat1$CROPDMGEXP <- as.numeric(dat1$CROPDMGEXP)
#Damages
dat2 <- dat1 %>% mutate(PROPDMG = PROPDMG*PROPDMGEXP, CROPDMG = CROPDMG*CROPDMGEXP)%>% select(EVTYPE,FATALITIES, INJURIES, PROPDMG, CROPDMG) %>%
        rename(fatalities = FATALITIES, injuries = INJURIES, propdmg = PROPDMG, cropdmg = CROPDMG, evtype = EVTYPE)

Results

1. Top 10 events that are most harmful with respect to population health:

#Fatalities
fat <- dat2 %>% group_by(evtype) %>% summarise(fatalities = sum(fatalities)) %>% arrange(desc(fatalities))
## `summarise()` ungrouping output (override with `.groups` argument)
fat10 <- slice(fat,1:10) #Top 20 fatality event types
fat10$evtype <- factor(fat10$evtype, levels = fat10$evtype[order(-fat10$fatalities)])

fatplot <- ggplot(fat10, aes(x=evtype, y=fatalities))+geom_bar(stat = 'identity', fill='blue')+theme(axis.text.x = element_text(angle = 90, hjust = 1,vjust = 0.5))+labs(title="Total Number of Fatalities by Event Type")+labs(x="Event Type",y="Number of Fatalities")+theme(plot.title = element_text(hjust = 0.5,size = 10, face = "bold"),axis.text.x = element_text(size=7),axis.title.x=element_text(size=8),axis.title.y=element_text(size=9))

#Injuries
inj <- dat2 %>% group_by(evtype) %>% summarise(injuries = sum(injuries)) %>% arrange(desc(injuries))
## `summarise()` ungrouping output (override with `.groups` argument)
inj10 <- slice(inj, 1:10)
inj10$evtype <- factor(inj10$evtype, levels = inj10$evtype[order(-inj10$injuries)])
injplot <- ggplot(inj10, aes(x=evtype, y=injuries))+geom_bar(stat = 'identity', fill='blue')+theme(axis.text.x = element_text(angle = 90, hjust = 1,vjust = 0.5))+labs(title="Total Number of Injuries by Event Type")+labs(x="Event Type",y="Number of Injuriess")+theme(plot.title = element_text(hjust = 0.5,size = 10, face = "bold"),axis.text.x = element_text(size=7),axis.title.x=element_text(size=8),axis.title.y=element_text(size=9))

grid.arrange(fatplot, injplot, nrow=1, ncol =2) #put Fatalities plot and Injuries plot together

2. Top 10 events that have the greatest economic consequences.

First, I report the top 10 events that have the greatest economic consequences on properties and crops

#Property damages
prop <- dat2 %>%group_by(evtype)%>%summarise(propdmg = sum(propdmg))%>% mutate(propdmg = propdmg/10^9)%>%arrange(desc(propdmg))
## `summarise()` ungrouping output (override with `.groups` argument)
prop10 <- slice(prop, 1:10)
prop10$evtype <- factor(prop10$evtype, levels = prop10$evtype[order(-prop10$propdmg)])

proplot <- ggplot(prop10, aes(x=evtype, y=propdmg))+geom_bar(stat = 'identity', fill='blue')+theme(axis.text.x = element_text(angle = 90, 
        hjust = 1,vjust = 0.5))+labs(title="Total Property Damage  by Event Type")+labs(x="Event Type",y="Damage (in Billion Dollar)")+
        theme(plot.title = element_text(hjust = 0.5,size = 10, face = "bold"),axis.text.x = element_text(size=7),axis.title.x=element_text(size=8),axis.title.y=element_text(size=9))

#Crop Damages
crop <- dat2 %>%group_by(evtype)%>%summarise(cropdmg = sum(cropdmg))%>% mutate(cropdmg = cropdmg/10^9)%>%arrange(desc(cropdmg))
## `summarise()` ungrouping output (override with `.groups` argument)
crop10 <- slice(crop, 1:10)
crop10$evtype <- factor(crop10$evtype, levels = crop10$evtype[order(-crop10$cropdmg)])

croplot <- ggplot(crop10, aes(x=evtype, y=cropdmg))+geom_bar(stat = 'identity', fill='blue')+theme(axis.text.x = element_text(angle = 90, 
        hjust = 1,vjust = 0.5))+labs(title="Total Crop Damage  by Event Type")+labs(x="Event Type",y="Damage (in Billion Dollar)")+
        theme(plot.title = element_text(hjust = 0.5,size = 10, face = "bold"),axis.text.x = element_text(size=7),axis.title.x=element_text(size=8),axis.title.y=element_text(size=9))

#Put together
grid.arrange(proplot, croplot, nrow =1)

Next, I report the top 20 events that have the greatest total economic consequences

#Total economic damage
total <- dat2 %>% mutate(dmg = (propdmg+cropdmg)/10^9)%>%group_by(evtype)%>%summarise(dmg = sum(dmg))%>%arrange(desc(dmg))
## `summarise()` ungrouping output (override with `.groups` argument)
total10 <- slice(total,1:10)

total10$evtype <- factor(total10$evtype, levels = total10$evtype[order(-total10$dmg)])

ggplot(total10, aes(x=evtype, y=dmg))+geom_bar(stat = 'identity', fill='blue')+theme(axis.text.x = element_text(angle = 90, 
        hjust = 1,vjust = 0.5))+labs(title="Total Economic Damage  by Event Type")+labs(x="Event Type",y="Damage (in Billion Dollar)")+
        theme(plot.title = element_text(hjust = 0.5,size = 10, face = "bold"),axis.text.x = element_text(size=7),axis.title.x=element_text(size=8),axis.title.y=element_text(size=9))