library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
if(!file.exists('StormDataDocumentation.pdf')){
url <- "http://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf"
fileName <- file.path(getwd(), "StormDataDocumentation.pdf")
download.file(url, destfile=fileName, mode="wb")
}
if(!file.exists('StormEventsFAQ.pdf')){
url <- "http://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2FNCDC%20Storm%20Events-FAQ%20Page.pdf"
fileName <- file.path(getwd(), "StormEventsFAQ.pdf")
download.file(url, destfile=fileName, mode="wb")
}
# Download the main dataset
if(!file.exists('StormData.csv.bz2')){
url <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
fileName <- file.path(getwd(),'StormData.csv.bz2')
download.file(url,destfile=fileName)
}
stormData <- read.csv(bzfile('StormData.csv.bz2'), header = TRUE)
stormData <- select(stormData, EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
str(stormData)
## 'data.frame': 902297 obs. of 7 variables:
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
getFATALITIES <- group_by(stormData, EVTYPE)
top15_FATALITIES<- summarise(getFATALITIES, total = sum(FATALITIES)) %>% arrange(desc(total)) %>% top_n(15)
## Selecting by total
top15_FATALITIES
## # A tibble: 15 x 2
## EVTYPE total
## <fct> <dbl>
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
## 7 FLOOD 470
## 8 RIP CURRENT 368
## 9 HIGH WIND 248
## 10 AVALANCHE 224
## 11 WINTER STORM 206
## 12 RIP CURRENTS 204
## 13 HEAT WAVE 172
## 14 EXTREME COLD 160
## 15 THUNDERSTORM WIND 133
getINJURIES <- group_by(stormData, EVTYPE)
top15_INJURIES <- summarise(getINJURIES, total = sum(INJURIES)) %>% arrange(desc(total)) %>% top_n(15)
## Selecting by total
top15_INJURIES
## # A tibble: 15 x 2
## EVTYPE total
## <fct> <dbl>
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
## 6 HEAT 2100
## 7 ICE STORM 1975
## 8 FLASH FLOOD 1777
## 9 THUNDERSTORM WIND 1488
## 10 HAIL 1361
## 11 WINTER STORM 1321
## 12 HURRICANE/TYPHOON 1275
## 13 HIGH WIND 1137
## 14 HEAVY SNOW 1021
## 15 WILDFIRE 911
unique(stormData$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(stormData$CROPDMGEXP)
## [1] M K m B ? 0 k 2
## Levels: ? 0 2 B k K m M
xformData <- function(dataset = stormData) {
dataset$PROPDMGEXP <- as.character(dataset$PROPDMGEXP)
dataset$PROPDMGEXP = gsub("\\-|\\+|\\?","0",dataset$PROPDMGEXP)
dataset$PROPDMGEXP = gsub("B|b", "9", dataset$PROPDMGEXP)
dataset$PROPDMGEXP = gsub("M|m", "6", dataset$PROPDMGEXP)
dataset$PROPDMGEXP = gsub("K|k", "3", dataset$PROPDMGEXP)
dataset$PROPDMGEXP = gsub("H|h", "2", dataset$PROPDMGEXP)
dataset$PROPDMGEXP <- as.numeric(dataset$PROPDMGEXP)
dataset$PROPDMGEXP[is.na(dataset$PROPDMGEXP)] = 0
dataset$ActPropDam<- dataset$PROPDMG * 10^dataset$PROPDMGEXP
dataset$CROPDMGEXP <- as.character(dataset$CROPDMGEXP)
dataset$CROPDMGEXP = gsub("\\-|\\+|\\?","0",dataset$CROPDMGEXP)
dataset$CROPDMGEXP = gsub("B|b", "9", dataset$CROPDMGEXP)
dataset$CROPDMGEXP = gsub("M|m", "6", dataset$CROPDMGEXP)
dataset$CROPDMGEXP = gsub("K|k", "3", dataset$CROPDMGEXP)
dataset$CROPDMGEXP = gsub("H|h", "2", dataset$CROPDMGEXP)
dataset$CROPDMGEXP <- as.numeric(dataset$CROPDMGEXP)
dataset$CROPDMGEXP[is.na(dataset$CROPDMGEXP)] = 0
dataset$ActCropDam<- dataset$CROPDMG * 10^dataset$CROPDMGEXP
return(dataset)
}
s = stormData
s <- xformData(s)
propertyDamage <- aggregate(ActPropDam~EVTYPE, data=s, sum)
propertyDamage_sort<- propertyDamage[order(-propertyDamage$ActPropDam),]
top15_PROPDAM<-propertyDamage_sort[1:15,]
cropDamage <- aggregate(ActCropDam~EVTYPE, data=s, sum)
cropDamage_sort<- cropDamage[order(-cropDamage$ActCropDam),]
top15_CROPDAM<-cropDamage_sort[1:15,]
options(scipen=999)
totalDamages <- aggregate(ActPropDam + ActCropDam~EVTYPE, data=s, sum)
names(totalDamages)[2] <- "total"
top15_TotalDamages <- arrange(totalDamages, desc(total)) %>% top_n(15)
## Selecting by total
top15_PROPDAM
## EVTYPE ActPropDam
## 170 FLOOD 144657709807
## 411 HURRICANE/TYPHOON 69305840000
## 834 TORNADO 56947380676
## 670 STORM SURGE 43323536000
## 153 FLASH FLOOD 16822673978
## 244 HAIL 15735267513
## 402 HURRICANE 11868319010
## 848 TROPICAL STORM 7703890550
## 972 WINTER STORM 6688497251
## 359 HIGH WIND 5270046295
## 590 RIVER FLOOD 5118945500
## 957 WILDFIRE 4765114000
## 671 STORM SURGE/TIDE 4641188000
## 856 TSTM WIND 4484928495
## 427 ICE STORM 3944927860
top15_CROPDAM
## EVTYPE ActCropDam
## 95 DROUGHT 13972566000
## 170 FLOOD 5661968450
## 590 RIVER FLOOD 5029459000
## 427 ICE STORM 5022113500
## 244 HAIL 3025954473
## 402 HURRICANE 2741910000
## 411 HURRICANE/TYPHOON 2607872800
## 153 FLASH FLOOD 1421317100
## 140 EXTREME COLD 1292973000
## 212 FROST/FREEZE 1094086000
## 290 HEAVY RAIN 733399800
## 848 TROPICAL STORM 678346000
## 359 HIGH WIND 638571300
## 856 TSTM WIND 554007350
## 130 EXCESSIVE HEAT 492402000
top15_TotalDamages
## EVTYPE total
## 1 FLOOD 150319678257
## 2 HURRICANE/TYPHOON 71913712800
## 3 TORNADO 57362333946
## 4 STORM SURGE 43323541000
## 5 HAIL 18761221986
## 6 FLASH FLOOD 18243991078
## 7 DROUGHT 15018672000
## 8 HURRICANE 14610229010
## 9 RIVER FLOOD 10148404500
## 10 ICE STORM 8967041360
## 11 TROPICAL STORM 8382236550
## 12 WINTER STORM 6715441251
## 13 HIGH WIND 5908617595
## 14 WILDFIRE 5060586800
## 15 TSTM WIND 5038935845
Top Fatalities and Top Injuries Plots Below: