setwd("E:/R/coursera/Assignments/Reproducible Research/Project 2")
if(!file.exists("./data")) {
dir.create("./data")
data.url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(data.url,
destfile = "./data/stormdata.csv.bz2")
}
#
storm_data <- read.csv(bzfile("./data/stormdata.csv.bz2"),
sep = ",",
header = TRUE)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.2
storm_data <- mutate(storm_data, IMPACT = FATALITIES + INJURIES)
## Warning: package 'bindrcpp' was built under R version 3.4.3
damaging_plot <- arrange(aggregate(IMPACT~EVTYPE,
storm_data,
sum),
desc(IMPACT))[1:3,]
damaging_event <- damaging_plot[1,]
The exponents are mapped as follows:
storm_data$PROPDMGEXP <- as.character(storm_data$PROPDMGEXP)
storm_data$CROPDMGEXP <- as.character(storm_data$CROPDMGEXP)
#
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP == 1] <- 10
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP == 4] <- 10000
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP == 5] <- 100000
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP == 7] <- 10000000
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP == 8] <- 100000000
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP == 0] <- 1
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP %in% c("h", "H", 2)] <- 100
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP %in% c("k", "K", 3)] <- 1000
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP %in% c("+", "", "?", "-")] <- 0
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP %in% c("m", "M", 6)] <- 1000000
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP %in% c("b", "B")] <- 1000000000
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP == 1] <- 10
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP == 4] <- 10000
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP == 5] <- 100000
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP == 7] <- 10000000
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP == 8] <- 100000000
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP == 0] <- 1
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP %in% c("h", "H", 2)] <- 100
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP %in% c("k", "K", 3)] <- 1000
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP %in% c("+", "", "?", "-")] <- 0
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP %in% c("m", "M", 6)] <- 1000000
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP %in% c("b", "B")] <- 1000000000
#
# Converting DMG and DMGEXP into numeric values.
#
storm_data$PROPDMG <- as.numeric(storm_data$PROPDMG)
storm_data$PROPDMGEXP <- as.numeric(storm_data$PROPDMGEXP)
storm_data$CROPDMG <- as.numeric(storm_data$CROPDMG)
storm_data$CROPDMGEXP <- as.numeric(storm_data$CROPDMGEXP)
storm_data <- mutate(storm_data, FIN_IMPACT = ((PROPDMG * PROPDMGEXP) +
(CROPDMG * CROPDMGEXP)))
fin_impact_plot <- arrange(aggregate(FIN_IMPACT~EVTYPE,
storm_data,
sum),
desc(FIN_IMPACT))[1:3,]
fin_impact_event <- fin_impact_plot[1,]
ggplot(damaging_plot,
aes(x = as.factor(EVTYPE),
y = IMPACT)) +
geom_bar(stat = "identity") +
labs(title = "Total Fatalaties + Injuries for top 3 Events")
The event TORNADOis most harmful to population health with 96979 fatalities / injuries.
ggplot(fin_impact_plot,
aes(x = as.factor(EVTYPE),
y = FIN_IMPACT)) +
geom_bar(stat = "identity") +
labs(title = "Total Fin Impact for top 3 Event types")
The event FLOOD has the greatest economic consequences with 150319678250 USD in damages.