dd <- read.csv("repdata_data_StormData.csv.bz2")
Checking the data and transforming some variables
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.3
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
sum(is.na(dd$BGN_DATE))
## [1] 0
stormData <- dd
stormData$BGN_DATE <- as.Date(stormData$BGN_DATE, "%m/%d/%Y")
stormData$YEAR <- year(stormData$BGN_DATE)
Transforming the demage variables
stormData$PROPDMGEXP <- toupper(stormData$PROPDMGEXP)
stormData$CROPDMGEXP <- toupper(stormData$CROPDMGEXP)
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "")] <- 10^0
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "?")] <- 10^0
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "0")] <- 10^0
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "2")] <- 10^2
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "K")] <- 10^3
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "M")] <- 10^6
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "B")] <- 10^9
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "-")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "?")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "+")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "0")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "1")] <- 10^1
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "2")] <- 10^2
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "3")] <- 10^3
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "4")] <- 10^4
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "5")] <- 10^5
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "6")] <- 10^6
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "7")] <- 10^7
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "8")] <- 10^8
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "H")] <- 10^2
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "K")] <- 10^3
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "M")] <- 10^6
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "B")] <- 10^9
stormData = stormData %>%
mutate(HEALTHIMP = FATALITIES + INJURIES) %>%
mutate(ECONOMICCOST = PROPDMG * PROPDMGFACTOR + CROPDMG * CROPDMGFACTOR)
stormData <- filter(stormData, PROPDMG > 0 | CROPDMG > 0 | FATALITIES > 0 | INJURIES > 0)
stormData$EVTYPE <- toupper(stormData$EVTYPE)
healthImpact <- with(stormData, aggregate(HEALTHIMP ~ EVTYPE, FUN = sum))
subset(healthImpact, HEALTHIMP > quantile(HEALTHIMP, prob = 0.95))
## EVTYPE HEALTHIMP
## 14 BLIZZARD 906
## 50 DUST STORM 462
## 54 EXCESSIVE HEAT 8428
## 65 FLASH FLOOD 2755
## 78 FLOOD 7259
## 90 FOG 796
## 116 HAIL 1376
## 133 HEAT 3037
## 134 HEAT WAVE 551
## 151 HEAVY SNOW 1148
## 179 HIGH WIND 1385
## 203 HURRICANE/TYPHOON 1339
## 216 ICE STORM 2064
## 233 LIGHTNING 6046
## 276 RIP CURRENT 600
## 277 RIP CURRENTS 501
## 329 THUNDERSTORM WIND 1621
## 346 THUNDERSTORM WINDS 972
## 371 TORNADO 96979
## 386 TSTM WIND 7461
## 431 WILD/FOREST FIRE 557
## 433 WILDFIRE 986
## 441 WINTER STORM 1527
Getting economic cost
economicCost <- with(stormData, aggregate(ECONOMICCOST ~ EVTYPE, FUN = sum))
subset(economicCost, ECONOMICCOST > quantile(ECONOMICCOST, prob = 0.95))
## EVTYPE ECONOMICCOST
## 43 DROUGHT 15018672000
## 65 FLASH FLOOD 18243991079
## 78 FLOOD 150319678257
## 116 HAIL 18761221986
## 140 HEAVY RAIN 1427647890
## 144 HEAVY RAIN/SEVERE WEATHER 2500000000
## 179 HIGH WIND 5908617595
## 194 HURRICANE 14610229010
## 201 HURRICANE OPAL 3191846000
## 203 HURRICANE/TYPHOON 71913712800
## 216 ICE STORM 8967041360
## 280 RIVER FLOOD 10148404500
## 317 STORM SURGE 43323541000
## 318 STORM SURGE/TIDE 4642038000
## 329 THUNDERSTORM WIND 3897965522
## 346 THUNDERSTORM WINDS 2135245647
## 371 TORNADO 57362333947
## 377 TORNADOES, TSTM WIND, HAIL 1602500000
## 381 TROPICAL STORM 8382236550
## 386 TSTM WIND 5038965845
## 431 WILD/FOREST FIRE 3108626330
## 433 WILDFIRE 5060586800
## 441 WINTER STORM 6715441251
Getting Health impact
healthImpact <- stormData %>%
group_by(EVTYPE) %>%
summarise(HEALTHIMP = sum(HEALTHIMP)) %>%
arrange(desc(HEALTHIMP))
Plotting the result for health impact:
library(ggplot2)
g <- ggplot(healthImpact[1:10,], aes(x=reorder(EVTYPE, -HEALTHIMP),y=HEALTHIMP, color = EVTYPE))+
geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g
Plotting results for economic impact
economicCost <- stormData %>%
group_by(EVTYPE) %>%
summarise(ECONOMICCOST = sum(ECONOMICCOST)) %>%
arrange(desc(ECONOMICCOST))
g1 <- ggplot(economicCost[1:10,],aes(x=reorder(EVTYPE,-ECONOMICCOST),y=ECONOMICCOST,color=EVTYPE)) +
geom_bar(stat="identity", fill="white") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
g1