library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.2
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(ggplot2)
Read Data
stormDataRaw <- read.csv("StormData.csv.bz2")
stormData <- select(stormDataRaw, BGN_DATE, EVTYPE, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP, FATALITIES, INJURIES)
Only use events with either health impact or economic damage
stormData <- filter(stormData, PROPDMG > 0 | CROPDMG > 0 | FATALITIES > 0 | INJURIES > 0)
table(stormData$PROPDMGEXP)
##
## - ? + 0 1 2 3 4 5 6
## 8448 0 0 0 0 0 0 0 0 0 0
## 7 8 B h H K m M
## 0 0 32 0 0 185474 0 7364
table(stormData$CROPDMGEXP)
##
## ? 0 2 B k K m M
## 102767 0 0 0 2 0 96787 0 1762
stormData$PROPDMGEXP <- toupper(stormData$PROPDMGEXP)
stormData$CROPDMGEXP <- toupper(stormData$CROPDMGEXP)
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "")] <- 10^0
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "?")] <- 10^0
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "0")] <- 10^0
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "2")] <- 10^2
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "K")] <- 10^3
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "M")] <- 10^6
stormData$CROPDMGFACTOR[(stormData$CROPDMGEXP == "B")] <- 10^9
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "-")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "?")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "+")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "0")] <- 10^0
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "1")] <- 10^1
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "2")] <- 10^2
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "3")] <- 10^3
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "4")] <- 10^4
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "5")] <- 10^5
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "6")] <- 10^6
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "7")] <- 10^7
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "8")] <- 10^8
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "H")] <- 10^2
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "K")] <- 10^3
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "M")] <- 10^6
stormData$PROPDMGFACTOR[(stormData$PROPDMGEXP == "B")] <- 10^9
stormData <- mutate(stormData, HEALTHIMP = FATALITIES + INJURIES)
stormData <- mutate(stormData, ECONOMICCOST = PROPDMG * PROPDMGFACTOR + CROPDMG * CROPDMGFACTOR)
stormData$EVTYPE <- toupper(stormData$EVTYPE)
dim(data.frame(table(stormData$EVTYPE)))
## [1] 186 2
evtypeUnique <- unique(stormData$EVTYPE)
evtypeUnique[grep("THUND", evtypeUnique)]
## [1] "THUNDERSTORM" "THUNDERSTORM WIND (G40)"
## [3] "THUNDERSTORM WIND" "MARINE THUNDERSTORM WIND"
healthImpact <- with(stormData, aggregate(HEALTHIMP ~ EVTYPE, FUN = sum))
subset(healthImpact, HEALTHIMP > quantile(HEALTHIMP, prob = 0.95))
## EVTYPE HEALTHIMP
## 39 EXCESSIVE HEAT 8188
## 46 FLASH FLOOD 2561
## 48 FLOOD 7172
## 69 HEAT 1459
## 88 HURRICANE/TYPHOON 1339
## 107 LIGHTNING 4792
## 146 THUNDERSTORM WIND 1530
## 149 TORNADO 22178
## 153 TSTM WIND 3870
## 182 WINTER STORM 1483
stormData$EVTYPE[(stormData$EVTYPE == "TSTM WIND")] <- "THUNDERSTORM WIND"
stormData$EVTYPE[(stormData$EVTYPE == "HURRICANE/TYPHOON")] <- "HURRICANE (TYPHOON)"
economicCost <- with(stormData, aggregate(ECONOMICCOST ~ EVTYPE, FUN = sum))
subset(economicCost, ECONOMICCOST > quantile(ECONOMICCOST, prob = 0.95))
## EVTYPE ECONOMICCOST
## 32 DROUGHT 14413667000
## 46 FLASH FLOOD 16557105610
## 48 FLOOD 148919611950
## 66 HAIL 17071172870
## 86 HURRICANE 14554229010
## 87 HURRICANE (TYPHOON) 71913712800
## 141 STORM SURGE 43193541000
## 146 THUNDERSTORM WIND 8812957230
## 149 TORNADO 24900370720
## 152 TROPICAL STORM 8320186550
stormData$EVTYPE[(stormData$EVTYPE == "HURRICANE")] <- "HURRICANE (TYPHOON)"
stormData$EVTYPE[(stormData$EVTYPE == "STORM SURGE")] <- "STORM SURGE/TIDE"
healthImpact <- stormData %>%
group_by(EVTYPE) %>%
summarise(HEALTHIMP = sum(HEALTHIMP)) %>%
arrange(desc(HEALTHIMP))
#healthImpact[1:10,]
j1 <- ggplot(healthImpact[1:10,], aes(x=reorder(EVTYPE, -HEALTHIMP),y=HEALTHIMP,color=EVTYPE)) +
geom_bar(stat="identity", fill="white") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event") + ylab("Number of fatalities and injuries") +
theme(legend.position="none") +
ggtitle("Fatalities and injuries in the US caused by severe weather events")
j1

economicCost <- stormData %>%
group_by(EVTYPE) %>%
summarise(ECONOMICCOST = sum(ECONOMICCOST)) %>%
arrange(desc(ECONOMICCOST))
#economicCost[1:10,]
g1 <- ggplot(economicCost[1:10,], aes(x=reorder(EVTYPE, -ECONOMICCOST),y=ECONOMICCOST,color=EVTYPE)) +
geom_bar(stat="identity", fill="white") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event") + ylab("Economic cost in USD") +
theme(legend.position="none") +
ggtitle("Economic cost in the US caused by severe weather events")
g1
