Data Processing

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
if(!file.exists('StormDataDocumentation.pdf')){
        url <- "http://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf"
        fileName <- file.path(getwd(), "StormDataDocumentation.pdf")
        download.file(url, destfile=fileName, mode="wb")
}        
if(!file.exists('StormEventsFAQ.pdf')){        
        url <- "http://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2FNCDC%20Storm%20Events-FAQ%20Page.pdf"
        fileName <- file.path(getwd(), "StormEventsFAQ.pdf")
        download.file(url, destfile=fileName, mode="wb")
}   


# Download the main dataset
if(!file.exists('StormData.csv.bz2')){
        url <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
        fileName <- file.path(getwd(),'StormData.csv.bz2')
        download.file(url,destfile=fileName)
}

stormData <- read.csv(bzfile('StormData.csv.bz2'), header = TRUE)
stormData <- select(stormData, EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
str(stormData)
## 'data.frame':    902297 obs. of  7 variables:
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
getFATALITIES <- group_by(stormData, EVTYPE)
top15_FATALITIES<- summarise(getFATALITIES, total = sum(FATALITIES)) %>% arrange(desc(total)) %>% top_n(15)
## Selecting by total
top15_FATALITIES
## # A tibble: 15 x 2
##    EVTYPE            total
##    <fct>             <dbl>
##  1 TORNADO            5633
##  2 EXCESSIVE HEAT     1903
##  3 FLASH FLOOD         978
##  4 HEAT                937
##  5 LIGHTNING           816
##  6 TSTM WIND           504
##  7 FLOOD               470
##  8 RIP CURRENT         368
##  9 HIGH WIND           248
## 10 AVALANCHE           224
## 11 WINTER STORM        206
## 12 RIP CURRENTS        204
## 13 HEAT WAVE           172
## 14 EXTREME COLD        160
## 15 THUNDERSTORM WIND   133
getINJURIES <- group_by(stormData, EVTYPE)
top15_INJURIES <- summarise(getINJURIES, total = sum(INJURIES)) %>% arrange(desc(total)) %>% top_n(15)
## Selecting by total
top15_INJURIES
## # A tibble: 15 x 2
##    EVTYPE            total
##    <fct>             <dbl>
##  1 TORNADO           91346
##  2 TSTM WIND          6957
##  3 FLOOD              6789
##  4 EXCESSIVE HEAT     6525
##  5 LIGHTNING          5230
##  6 HEAT               2100
##  7 ICE STORM          1975
##  8 FLASH FLOOD        1777
##  9 THUNDERSTORM WIND  1488
## 10 HAIL               1361
## 11 WINTER STORM       1321
## 12 HURRICANE/TYPHOON  1275
## 13 HIGH WIND          1137
## 14 HEAVY SNOW         1021
## 15 WILDFIRE            911
unique(stormData$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(stormData$CROPDMGEXP)
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M
xformData <- function(dataset = stormData) {
        
        dataset$PROPDMGEXP <- as.character(dataset$PROPDMGEXP)
        dataset$PROPDMGEXP = gsub("\\-|\\+|\\?","0",dataset$PROPDMGEXP)
        dataset$PROPDMGEXP = gsub("B|b", "9", dataset$PROPDMGEXP)
        dataset$PROPDMGEXP = gsub("M|m", "6", dataset$PROPDMGEXP)
        dataset$PROPDMGEXP = gsub("K|k", "3", dataset$PROPDMGEXP)
        dataset$PROPDMGEXP = gsub("H|h", "2", dataset$PROPDMGEXP)
        dataset$PROPDMGEXP <- as.numeric(dataset$PROPDMGEXP)
        dataset$PROPDMGEXP[is.na(dataset$PROPDMGEXP)] = 0
        dataset$ActPropDam<- dataset$PROPDMG * 10^dataset$PROPDMGEXP
        
        dataset$CROPDMGEXP <- as.character(dataset$CROPDMGEXP)
        dataset$CROPDMGEXP = gsub("\\-|\\+|\\?","0",dataset$CROPDMGEXP)
        dataset$CROPDMGEXP = gsub("B|b", "9", dataset$CROPDMGEXP)
        dataset$CROPDMGEXP = gsub("M|m", "6", dataset$CROPDMGEXP)
        dataset$CROPDMGEXP = gsub("K|k", "3", dataset$CROPDMGEXP)
        dataset$CROPDMGEXP = gsub("H|h", "2", dataset$CROPDMGEXP)
        dataset$CROPDMGEXP <- as.numeric(dataset$CROPDMGEXP)
        dataset$CROPDMGEXP[is.na(dataset$CROPDMGEXP)] = 0
        dataset$ActCropDam<- dataset$CROPDMG * 10^dataset$CROPDMGEXP
        
        return(dataset)
}        
        s = stormData
        s <- xformData(s)

         propertyDamage <- aggregate(ActPropDam~EVTYPE, data=s, sum)
         propertyDamage_sort<- propertyDamage[order(-propertyDamage$ActPropDam),]
         top15_PROPDAM<-propertyDamage_sort[1:15,]

        cropDamage <- aggregate(ActCropDam~EVTYPE, data=s, sum)
        cropDamage_sort<- cropDamage[order(-cropDamage$ActCropDam),]
        top15_CROPDAM<-cropDamage_sort[1:15,]
        
        options(scipen=999)
        totalDamages <- aggregate(ActPropDam + ActCropDam~EVTYPE, data=s, sum)
        names(totalDamages)[2] <- "total"
        top15_TotalDamages <- arrange(totalDamages, desc(total)) %>% top_n(15)
## Selecting by total
        top15_PROPDAM
##                EVTYPE   ActPropDam
## 170             FLOOD 144657709807
## 411 HURRICANE/TYPHOON  69305840000
## 834           TORNADO  56947380676
## 670       STORM SURGE  43323536000
## 153       FLASH FLOOD  16822673978
## 244              HAIL  15735267513
## 402         HURRICANE  11868319010
## 848    TROPICAL STORM   7703890550
## 972      WINTER STORM   6688497251
## 359         HIGH WIND   5270046295
## 590       RIVER FLOOD   5118945500
## 957          WILDFIRE   4765114000
## 671  STORM SURGE/TIDE   4641188000
## 856         TSTM WIND   4484928495
## 427         ICE STORM   3944927860
        top15_CROPDAM
##                EVTYPE  ActCropDam
## 95            DROUGHT 13972566000
## 170             FLOOD  5661968450
## 590       RIVER FLOOD  5029459000
## 427         ICE STORM  5022113500
## 244              HAIL  3025954473
## 402         HURRICANE  2741910000
## 411 HURRICANE/TYPHOON  2607872800
## 153       FLASH FLOOD  1421317100
## 140      EXTREME COLD  1292973000
## 212      FROST/FREEZE  1094086000
## 290        HEAVY RAIN   733399800
## 848    TROPICAL STORM   678346000
## 359         HIGH WIND   638571300
## 856         TSTM WIND   554007350
## 130    EXCESSIVE HEAT   492402000
        top15_TotalDamages
##               EVTYPE        total
## 1              FLOOD 150319678257
## 2  HURRICANE/TYPHOON  71913712800
## 3            TORNADO  57362333946
## 4        STORM SURGE  43323541000
## 5               HAIL  18761221986
## 6        FLASH FLOOD  18243991078
## 7            DROUGHT  15018672000
## 8          HURRICANE  14610229010
## 9        RIVER FLOOD  10148404500
## 10         ICE STORM   8967041360
## 11    TROPICAL STORM   8382236550
## 12      WINTER STORM   6715441251
## 13         HIGH WIND   5908617595
## 14          WILDFIRE   5060586800
## 15         TSTM WIND   5038935845

Result

Top Fatalities and Top Injuries Plots Below: