Reproducible Research - Project assignment 2

Read the data and set working directory

setwd("E:/R/coursera/Assignments/Reproducible Research/Project 2")

if(!file.exists("./data")) {
        dir.create("./data")
        data.url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
        download.file(data.url, 
                      destfile = "./data/stormdata.csv.bz2")
}
#
storm_data <- read.csv(bzfile("./data/stormdata.csv.bz2"),
                              sep = ",",
                              header = TRUE)

load the r packages required.

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.2

Data transformation

To find the most harmful event to population health, we calculate the

impact to human population by sumation of FATALITIES and INJURIES.

We add a new column “IMPACT” to the table as sum of both variables

storm_data <- mutate(storm_data, IMPACT = FATALITIES + INJURIES)
## Warning: package 'bindrcpp' was built under R version 3.4.3

The top damaging event will be he one with highest count in column

“IMPACT”. We find the sum of IMPACT for all the events and sort them

in descending order.This will give the highest impact event at top

of the list.

damaging_plot <- arrange(aggregate(IMPACT~EVTYPE,
                                    storm_data,
                                    sum),
                          desc(IMPACT))[1:3,]
damaging_event <- damaging_plot[1,]

Exponent Handling

The exponents are mapped as follows:

  • h, H, 2 = 100
  • k, K, 3 = 1,000
  • m, M, 6 = 1,000,000
  • b, B, 9 = 1,000,000,000
  • 1 = 10
  • 4 = 10,000
  • 5 = 100,000
  • 7 = 10,000,000
  • 8 = 100,000,000
  • “+”, “-”, “?” = 0
storm_data$PROPDMGEXP <- as.character(storm_data$PROPDMGEXP)
storm_data$CROPDMGEXP <- as.character(storm_data$CROPDMGEXP)
#
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP == 1] <- 10
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP == 4] <- 10000
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP == 5] <- 100000
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP == 7] <- 10000000
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP == 8] <- 100000000
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP == 0] <- 1
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP %in% c("h", "H", 2)] <- 100
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP %in% c("k", "K", 3)] <- 1000
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP %in% c("+", "", "?", "-")] <- 0
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP %in% c("m", "M", 6)] <- 1000000
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP %in% c("b", "B")] <- 1000000000

storm_data$CROPDMGEXP[storm_data$CROPDMGEXP == 1] <- 10
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP == 4] <- 10000
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP == 5] <- 100000
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP == 7] <- 10000000
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP == 8] <- 100000000
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP == 0] <- 1
storm_data$PROPDMGEXP[storm_data$PROPDMGEXP %in% c("h", "H", 2)] <- 100
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP %in% c("k", "K", 3)] <- 1000
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP %in% c("+", "", "?", "-")] <- 0
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP %in% c("m", "M", 6)] <- 1000000
storm_data$CROPDMGEXP[storm_data$CROPDMGEXP %in% c("b", "B")] <- 1000000000
#
# Converting DMG and DMGEXP into numeric values.
#
storm_data$PROPDMG <- as.numeric(storm_data$PROPDMG)
storm_data$PROPDMGEXP <- as.numeric(storm_data$PROPDMGEXP)
storm_data$CROPDMG <- as.numeric(storm_data$CROPDMG)
storm_data$CROPDMGEXP <- as.numeric(storm_data$CROPDMGEXP)

Adding a new column FIN_IMPACT to storm data table with sum of property

and crop damages.

storm_data <- mutate(storm_data, FIN_IMPACT = ((PROPDMG * PROPDMGEXP) +
                             (CROPDMG * CROPDMGEXP)))

Aggregate and Sort the storm data as per Event Type for FIN_IMPACT

fin_impact_plot <- arrange(aggregate(FIN_IMPACT~EVTYPE,
                                      storm_data,
                                      sum),
                            desc(FIN_IMPACT))[1:3,]

fin_impact_event <- fin_impact_plot[1,]

RESULT

ggplot(damaging_plot,
       aes(x = as.factor(EVTYPE),
           y = IMPACT)) +
        geom_bar(stat = "identity") +
        labs(title = "Total Fatalaties + Injuries for top 3 Events")

The event TORNADOis most harmful to population health with 96979 fatalities / injuries.

ggplot(fin_impact_plot,
       aes(x = as.factor(EVTYPE),
           y = FIN_IMPACT)) + 
        geom_bar(stat = "identity") +
        labs(title = "Total Fin Impact for top 3 Event types")

The event FLOOD has the greatest economic consequences with 150319678250 USD in damages.

END