Effects of Weather Related Events on Population Health and Economic Damage

Synopsis: This analysis shows the effects weather related events have on population heath measured by fatailties and injuries and economic damage measured in the cost of property and crop damage. Tornadoes caused the most fatalities and injuries by a significant margin. Floods Caused the most property damage and drought caused the most crop damage.

Data Processing

library(dplyr)
library(stringr) #for str_detect
library(ggplot2)
library(gridExtra)
library(scales)
#read in data set 
stormData <- read.csv("repdata_data_StormData.csv.bz2")
#extract only necessary columns for analysis
ADJ_stormData <- stormData[c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
#Convert all PROPDMG values to proper units as indicated by the variable PROPDMGEXP (0-9 is a factor of 10, H/h - hundreds, 
#K/k - thousands, M/m - millions, B/b - billions)
ADJ_stormData$ADJ_PROPDMG <- ifelse(str_detect(ADJ_stormData$PROPDMGEXP, c("0|1|2|3|4|5|6|7|8|9")), ADJ_stormData$ADJ_PROPDMG <-ADJ_stormData$PROPDMG * 10,
       ifelse(str_detect(ADJ_stormData$PROPDMGEXP,  c("H|h")), ADJ_stormData$ADJ_PROPDMG <-ADJ_stormData$PROPDMG * 100,
              ifelse(str_detect(ADJ_stormData$PROPDMGEXP, c("K|k")), ADJ_stormData$ADJ_PROPDMG <-ADJ_stormData$PROPDMG * 1000,
                     ifelse(str_detect(ADJ_stormData$PROPDMGEXP,  c("M|m")), ADJ_stormData$ADJ_PROPDMG <-ADJ_stormData$PROPDMG * 1000000,
                          ifelse(str_detect(ADJ_stormData$PROPDMGEXP,  c("B|b")), ADJ_stormData$ADJ_PROPDMG <-ADJ_stormData$PROPDMG *     1000000000,ADJ_stormData$ADJ_PROPDMG <- ADJ_stormData$PROPDMG)))))
#Convert all CROPDMG values to proper units as indicated by the variable CROPDMGEXP(0-9 is a factor of 10, H/h - hundreds, 
#K/k - thousands, M/m - millions, B/b - billions)
ADJ_stormData$ADJ_CROPDMG <- ifelse(str_detect(ADJ_stormData$CROPDMGEXP, c("0|1|2|3|4|5|6|7|8|9")), ADJ_stormData$ADJ_CROPDMG <-ADJ_stormData$CROPDMG * 10,
        ifelse(str_detect(ADJ_stormData$CROPDMGEXP,  c("H|h")), ADJ_stormData$ADJ_CROPDMG <-ADJ_stormData$CROPDMG * 100,
              ifelse(str_detect(ADJ_stormData$CROPDMGEXP, c("K|k")), ADJ_stormData$ADJ_CROPDMG <-ADJ_stormData$CROPDMG * 1000,
                     ifelse(str_detect(ADJ_stormData$CROPDMGEXP,  c("M|m")), ADJ_stormData$ADJ_CROPDMG <-ADJ_stormData$CROPDMG * 1000000,
                        ifelse(str_detect(ADJ_stormData$CROPDMGEXP,  c("B|b")), ADJ_stormData$ADJ_CROPDMG <-ADJ_stormData$CROPDMG * 1000000000,ADJ_stormData$ADJ_CROPDMG <- ADJ_stormData$CROPDMG)))))


#calculate total number of each variable of interest by event type
num_fatal <- aggregate(FATALITIES ~ EVTYPE, data = ADJ_stormData, FUN = sum)
num_injury <- aggregate(INJURIES ~ EVTYPE, data = ADJ_stormData, FUN = sum)
num_propdmg <- aggregate(ADJ_PROPDMG ~ EVTYPE, data = ADJ_stormData, FUN = sum)
num_cropdmg <- aggregate(ADJ_CROPDMG ~ EVTYPE, data = ADJ_stormData, FUN = sum)

#take the top 10 highest occurences in each variable of interest
top10_fatal <- head(arrange(num_fatal,desc(FATALITIES)), n = 10)
top10_injury <- head(arrange(num_injury,desc(INJURIES)), n = 10)
top10_propdmg <- head(arrange(num_propdmg,desc(ADJ_PROPDMG)), n = 10)
top10_cropdmg <- head(arrange(num_cropdmg,desc(ADJ_CROPDMG)), n = 10)

Results

fatalPlot <-ggplot(data=top10_fatal, aes(x=reorder(EVTYPE,-FATALITIES), y=FATALITIES)) +
  geom_bar(stat="identity", fill="#B2182B", col="black") +
  theme(axis.text.x = element_text(angle = 90), axis.text.y = element_text(angle = 45), plot.title = element_text(size=10)) +
  scale_y_continuous(labels = comma) +
  labs(y= "Number of Fatalities", x = "Event Type", title = "Top 10 Events with Most Fatalities")

injuryPlot <-ggplot(data=top10_injury, aes(x=reorder(EVTYPE,-INJURIES), y=INJURIES)) +
  geom_bar(stat="identity", fill="#B2182B", col="black") +
  theme(axis.text.x = element_text(angle = 90), axis.text.y = element_text(angle = 45), plot.title = element_text(size=10)) +
  scale_y_continuous(labels = comma) + 
  labs(y= "Number of Injuries", x = "Event Type",title = "Top 10 Events with Most Injuries")


grid.arrange(fatalPlot, injuryPlot, ncol=2)

#Event Types causing most economic damage
propdmgPlot <-ggplot(data=top10_propdmg, aes(x=reorder(EVTYPE,-ADJ_PROPDMG), y=ADJ_PROPDMG)) +
  geom_bar(stat="identity", fill="#B2182B", col="black") +
  theme(axis.text.x = element_text(angle = 90), axis.text.y = element_text(angle = 45), plot.title = element_text(size=10)) +
  scale_y_continuous(labels = comma) +
  labs(y= "Cost of Damage", x = "Event Type",title = "Top 10 Events Most Property Damage ")

cropdmgPlot <-ggplot(data=top10_cropdmg, aes(x=reorder(EVTYPE,-ADJ_CROPDMG), y=ADJ_CROPDMG)) +
  geom_bar(stat="identity", fill="#B2182B", col="black") +
  theme(axis.text.x = element_text(angle = 90), axis.text.y = element_text(angle = 45), plot.title = element_text(size=10)) +
  scale_y_continuous(labels = comma) + 
  labs(y= "Cost of Damage", x = "Event Type",title = "Top 10 Events with Most Crop Damage ")

grid.arrange(propdmgPlot, cropdmgPlot, ncol=2)