Data Processing
library(dplyr)
library(stringr) #for str_detect
library(ggplot2)
library(gridExtra)
library(scales)
#read in data set
stormData <- read.csv("repdata_data_StormData.csv.bz2")
#extract only necessary columns for analysis
ADJ_stormData <- stormData[c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
#Convert all PROPDMG values to proper units as indicated by the variable PROPDMGEXP (0-9 is a factor of 10, H/h - hundreds,
#K/k - thousands, M/m - millions, B/b - billions)
ADJ_stormData$ADJ_PROPDMG <- ifelse(str_detect(ADJ_stormData$PROPDMGEXP, c("0|1|2|3|4|5|6|7|8|9")), ADJ_stormData$ADJ_PROPDMG <-ADJ_stormData$PROPDMG * 10,
ifelse(str_detect(ADJ_stormData$PROPDMGEXP, c("H|h")), ADJ_stormData$ADJ_PROPDMG <-ADJ_stormData$PROPDMG * 100,
ifelse(str_detect(ADJ_stormData$PROPDMGEXP, c("K|k")), ADJ_stormData$ADJ_PROPDMG <-ADJ_stormData$PROPDMG * 1000,
ifelse(str_detect(ADJ_stormData$PROPDMGEXP, c("M|m")), ADJ_stormData$ADJ_PROPDMG <-ADJ_stormData$PROPDMG * 1000000,
ifelse(str_detect(ADJ_stormData$PROPDMGEXP, c("B|b")), ADJ_stormData$ADJ_PROPDMG <-ADJ_stormData$PROPDMG * 1000000000,ADJ_stormData$ADJ_PROPDMG <- ADJ_stormData$PROPDMG)))))
#Convert all CROPDMG values to proper units as indicated by the variable CROPDMGEXP(0-9 is a factor of 10, H/h - hundreds,
#K/k - thousands, M/m - millions, B/b - billions)
ADJ_stormData$ADJ_CROPDMG <- ifelse(str_detect(ADJ_stormData$CROPDMGEXP, c("0|1|2|3|4|5|6|7|8|9")), ADJ_stormData$ADJ_CROPDMG <-ADJ_stormData$CROPDMG * 10,
ifelse(str_detect(ADJ_stormData$CROPDMGEXP, c("H|h")), ADJ_stormData$ADJ_CROPDMG <-ADJ_stormData$CROPDMG * 100,
ifelse(str_detect(ADJ_stormData$CROPDMGEXP, c("K|k")), ADJ_stormData$ADJ_CROPDMG <-ADJ_stormData$CROPDMG * 1000,
ifelse(str_detect(ADJ_stormData$CROPDMGEXP, c("M|m")), ADJ_stormData$ADJ_CROPDMG <-ADJ_stormData$CROPDMG * 1000000,
ifelse(str_detect(ADJ_stormData$CROPDMGEXP, c("B|b")), ADJ_stormData$ADJ_CROPDMG <-ADJ_stormData$CROPDMG * 1000000000,ADJ_stormData$ADJ_CROPDMG <- ADJ_stormData$CROPDMG)))))
#calculate total number of each variable of interest by event type
num_fatal <- aggregate(FATALITIES ~ EVTYPE, data = ADJ_stormData, FUN = sum)
num_injury <- aggregate(INJURIES ~ EVTYPE, data = ADJ_stormData, FUN = sum)
num_propdmg <- aggregate(ADJ_PROPDMG ~ EVTYPE, data = ADJ_stormData, FUN = sum)
num_cropdmg <- aggregate(ADJ_CROPDMG ~ EVTYPE, data = ADJ_stormData, FUN = sum)
#take the top 10 highest occurences in each variable of interest
top10_fatal <- head(arrange(num_fatal,desc(FATALITIES)), n = 10)
top10_injury <- head(arrange(num_injury,desc(INJURIES)), n = 10)
top10_propdmg <- head(arrange(num_propdmg,desc(ADJ_PROPDMG)), n = 10)
top10_cropdmg <- head(arrange(num_cropdmg,desc(ADJ_CROPDMG)), n = 10)
Results
fatalPlot <-ggplot(data=top10_fatal, aes(x=reorder(EVTYPE,-FATALITIES), y=FATALITIES)) +
geom_bar(stat="identity", fill="#B2182B", col="black") +
theme(axis.text.x = element_text(angle = 90), axis.text.y = element_text(angle = 45), plot.title = element_text(size=10)) +
scale_y_continuous(labels = comma) +
labs(y= "Number of Fatalities", x = "Event Type", title = "Top 10 Events with Most Fatalities")
injuryPlot <-ggplot(data=top10_injury, aes(x=reorder(EVTYPE,-INJURIES), y=INJURIES)) +
geom_bar(stat="identity", fill="#B2182B", col="black") +
theme(axis.text.x = element_text(angle = 90), axis.text.y = element_text(angle = 45), plot.title = element_text(size=10)) +
scale_y_continuous(labels = comma) +
labs(y= "Number of Injuries", x = "Event Type",title = "Top 10 Events with Most Injuries")
grid.arrange(fatalPlot, injuryPlot, ncol=2)
#Event Types causing most economic damage
propdmgPlot <-ggplot(data=top10_propdmg, aes(x=reorder(EVTYPE,-ADJ_PROPDMG), y=ADJ_PROPDMG)) +
geom_bar(stat="identity", fill="#B2182B", col="black") +
theme(axis.text.x = element_text(angle = 90), axis.text.y = element_text(angle = 45), plot.title = element_text(size=10)) +
scale_y_continuous(labels = comma) +
labs(y= "Cost of Damage", x = "Event Type",title = "Top 10 Events Most Property Damage ")
cropdmgPlot <-ggplot(data=top10_cropdmg, aes(x=reorder(EVTYPE,-ADJ_CROPDMG), y=ADJ_CROPDMG)) +
geom_bar(stat="identity", fill="#B2182B", col="black") +
theme(axis.text.x = element_text(angle = 90), axis.text.y = element_text(angle = 45), plot.title = element_text(size=10)) +
scale_y_continuous(labels = comma) +
labs(y= "Cost of Damage", x = "Event Type",title = "Top 10 Events with Most Crop Damage ")
grid.arrange(propdmgPlot, cropdmgPlot, ncol=2)