Loading Data

dd <- read.csv("repdata_data_StormData.csv.bz2")

Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

Checking the data and transforming some variables

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.3
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
sum(is.na(dd$BGN_DATE))
## [1] 0
stormData <- dd
stormData$BGN_DATE <- as.Date(stormData$BGN_DATE, "%m/%d/%Y")
stormData$YEAR <- year(stormData$BGN_DATE)

Transforming the demage variables

stormData$PROPDMGEXP <- toupper(stormData$PROPDMGEXP)
stormData$CROPDMGEXP <- toupper(stormData$CROPDMGEXP)

stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "")] <- 10^0
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "?")] <- 10^0
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "0")] <- 10^0
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "2")] <- 10^2
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "K")] <- 10^3
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "M")] <- 10^6
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "B")] <- 10^9

stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "-")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "?")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "+")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "0")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "1")] <- 10^1
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "2")] <- 10^2
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "3")] <- 10^3
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "4")] <- 10^4
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "5")] <- 10^5
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "6")] <- 10^6
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "7")] <- 10^7
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "8")] <- 10^8
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "H")] <- 10^2
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "K")] <- 10^3
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "M")] <- 10^6
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "B")] <- 10^9



stormData = stormData %>%
  mutate(HEALTHIMP = FATALITIES + INJURIES) %>%
  mutate(ECONOMICCOST = PROPDMG * PROPDMGFACTOR + CROPDMG * CROPDMGFACTOR)


stormData <- filter(stormData, PROPDMG > 0 | CROPDMG > 0 | FATALITIES > 0 | INJURIES > 0)


stormData$EVTYPE <- toupper(stormData$EVTYPE)
healthImpact <- with(stormData, aggregate(HEALTHIMP ~ EVTYPE, FUN = sum))
subset(healthImpact, HEALTHIMP > quantile(HEALTHIMP, prob = 0.95))
##                 EVTYPE HEALTHIMP
## 14            BLIZZARD       906
## 50          DUST STORM       462
## 54      EXCESSIVE HEAT      8428
## 65         FLASH FLOOD      2755
## 78               FLOOD      7259
## 90                 FOG       796
## 116               HAIL      1376
## 133               HEAT      3037
## 134          HEAT WAVE       551
## 151         HEAVY SNOW      1148
## 179          HIGH WIND      1385
## 203  HURRICANE/TYPHOON      1339
## 216          ICE STORM      2064
## 233          LIGHTNING      6046
## 276        RIP CURRENT       600
## 277       RIP CURRENTS       501
## 329  THUNDERSTORM WIND      1621
## 346 THUNDERSTORM WINDS       972
## 371            TORNADO     96979
## 386          TSTM WIND      7461
## 431   WILD/FOREST FIRE       557
## 433           WILDFIRE       986
## 441       WINTER STORM      1527

Getting economic cost

economicCost <- with(stormData, aggregate(ECONOMICCOST ~ EVTYPE, FUN = sum))
subset(economicCost, ECONOMICCOST > quantile(ECONOMICCOST, prob = 0.95))
##                         EVTYPE ECONOMICCOST
## 43                     DROUGHT  15018672000
## 65                 FLASH FLOOD  18243991079
## 78                       FLOOD 150319678257
## 116                       HAIL  18761221986
## 140                 HEAVY RAIN   1427647890
## 144  HEAVY RAIN/SEVERE WEATHER   2500000000
## 179                  HIGH WIND   5908617595
## 194                  HURRICANE  14610229010
## 201             HURRICANE OPAL   3191846000
## 203          HURRICANE/TYPHOON  71913712800
## 216                  ICE STORM   8967041360
## 280                RIVER FLOOD  10148404500
## 317                STORM SURGE  43323541000
## 318           STORM SURGE/TIDE   4642038000
## 329          THUNDERSTORM WIND   3897965522
## 346         THUNDERSTORM WINDS   2135245647
## 371                    TORNADO  57362333947
## 377 TORNADOES, TSTM WIND, HAIL   1602500000
## 381             TROPICAL STORM   8382236550
## 386                  TSTM WIND   5038965845
## 431           WILD/FOREST FIRE   3108626330
## 433                   WILDFIRE   5060586800
## 441               WINTER STORM   6715441251

Getting Health impact

healthImpact <- stormData %>% 
                group_by(EVTYPE) %>% 
                summarise(HEALTHIMP = sum(HEALTHIMP)) %>% 
                arrange(desc(HEALTHIMP))

Plotting the result for health impact:

library(ggplot2)
g <- ggplot(healthImpact[1:10,], aes(x=reorder(EVTYPE, -HEALTHIMP),y=HEALTHIMP, color = EVTYPE))+
  geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g

Plotting results for economic impact

economicCost <- stormData %>% 
  group_by(EVTYPE) %>% 
  summarise(ECONOMICCOST = sum(ECONOMICCOST)) %>% 
  arrange(desc(ECONOMICCOST))

g1 <- ggplot(economicCost[1:10,],aes(x=reorder(EVTYPE,-ECONOMICCOST),y=ECONOMICCOST,color=EVTYPE)) + 
  geom_bar(stat="identity", fill="white") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1))
  
g1