The research includes the analysis of economic and population health consequences caused by major storms and weather events in the US. The goal of this reasearch is to explore the NOAA Storm Database and answer the questions: 1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health? 2. Across the United States, which types of events have the greatest economic consequences? For calculation of economic consequences, the property and crop damage from each event type was summarised (indicated in US dollars). For calculation of population health consequences, the total numbers of fatalities and injuries were used (indicated in number of people).
data <- read.csv("repdata-data-StormData.csv.bz2")
library(dplyr)
library(ggplot2)
# Summarising data by Event type, total fatalities and injuries for Q1
health.data <- data %.%
group_by(EVTYPE) %.%
summarise(fatalities = sum(FATALITIES, na.rm = T), injuries = sum(INJURIES, na.rm = T)) %.%
mutate(fatalities.p = round(fatalities / sum(fatalities) * 100, 2), injuries.p = round(injuries / sum(injuries) * 100, 2))
# Cleaning and summarizing data for Q2
data$PROPDMGEXP <- as.character(data$PROPDMGEXP)
data$PROPDMGEXP[data$PROPDMGEXP == "B"] <- "9"
data$PROPDMGEXP[tolower(data$PROPDMGEXP) == "h"] <- "2"
data$PROPDMGEXP[data$PROPDMGEXP == "K"] <- "3"
data$PROPDMGEXP[tolower(data$PROPDMGEXP) == "m"] <- "6"
data$PROPDMGEXP[data$PROPDMGEXP %in% c("", "-", "?", "+")] <- "0"
data$PROPDMGEXP <- as.numeric(data$PROPDMGEXP)
data$propertydamage <- data$PROPDMG * 10 ^ data$PROPDMGEXP
data$CROPDMGEXP <- as.character(data$CROPDMGEXP)
data$CROPDMGEXP[data$CROPDMGEXP == "B"] <- "9"
data$CROPDMGEXP[tolower(data$CROPDMGEXP) == "k"] <- "3"
data$CROPDMGEXP[tolower(data$CROPDMGEXP) == "m"] <- "6"
data$CROPDMGEXP[data$CROPDMGEXP %in% c("", "?")] <- "0"
data$CROPDMGEXP <- as.numeric(data$CROPDMGEXP)
data$cropdamage <- data$CROPDMG * 10 ^ data$CROPDMGEXP
data$totaldamage <- data$propertydamage + data$cropdamage
damage.data <- data %.%
group_by(EVTYPE) %.%
summarise(damage = sum(totaldamage, na.rm = T)) %.%
mutate(damage.p = round(damage / sum(damage, na.rm = T) * 100, 2))
qplot(log(fatalities), log(injuries), data = arrange(health.data, -fatalities, -injuries)[1:10, ], color = EVTYPE, alpha=I(0.5), size = I(5), main = "Injuries and fatalities from different events") + geom_text(aes(label=EVTYPE), size=2, hjust=1, vjust=-1, angle = 45)
Top 10 events with most number of fatalities:
arrange(health.data, -fatalities)[1:10, c(1,2,4)]
## Source: local data frame [10 x 3]
##
## EVTYPE fatalities fatalities.p
## 1 TORNADO 5633 37.19
## 2 EXCESSIVE HEAT 1903 12.57
## 3 FLASH FLOOD 978 6.46
## 4 HEAT 937 6.19
## 5 LIGHTNING 816 5.39
## 6 TSTM WIND 504 3.33
## 7 FLOOD 470 3.10
## 8 RIP CURRENT 368 2.43
## 9 HIGH WIND 248 1.64
## 10 AVALANCHE 224 1.48
EVTYPE - type of event, fatalities - number of fatalities, fatalities.p - percent of fatalities from each type
Top 10 events with most number of injuries:
arrange(health.data, -injuries)[1:10, c(1,3,5)]
## Source: local data frame [10 x 3]
##
## EVTYPE injuries injuries.p
## 1 TORNADO 91346 65.00
## 2 TSTM WIND 6957 4.95
## 3 FLOOD 6789 4.83
## 4 EXCESSIVE HEAT 6525 4.64
## 5 LIGHTNING 5230 3.72
## 6 HEAT 2100 1.49
## 7 ICE STORM 1975 1.41
## 8 FLASH FLOOD 1777 1.26
## 9 THUNDERSTORM WIND 1488 1.06
## 10 HAIL 1361 0.97
EVTYPE - type of event, injuries - number of injuries, injuries.p - percent of injuries from each type
damage.data2 <- arrange(damage.data, -damage)[1:10, c(1,2,3)]
ggplot(data = damage.data2, aes(x=EVTYPE, y = damage)) + geom_bar(stat = "identity", aes(fill = EVTYPE)) + labs(title = "Top 10 events with most damage", x = "Events", y = "Damage, USD") + scale_fill_hue(guide = F) + geom_text(aes(label=EVTYPE), size=2.5, hjust=0, vjust=-1, angle = 90)
Top 10 events with most damage:
damage.data2
## Source: local data frame [10 x 3]
##
## EVTYPE damage damage.p
## 1 FLOOD 1.503e+11 31.49
## 2 HURRICANE/TYPHOON 7.191e+10 15.07
## 3 TORNADO 5.736e+10 12.02
## 4 STORM SURGE 4.332e+10 9.08
## 5 HAIL 1.876e+10 3.93
## 6 FLASH FLOOD 1.824e+10 3.82
## 7 DROUGHT 1.502e+10 3.15
## 8 HURRICANE 1.461e+10 3.06
## 9 RIVER FLOOD 1.015e+10 2.13
## 10 ICE STORM 8.967e+09 1.88
EVTYPE - type of event, damage - total amount of damage in USD, damage.p - percent of damage from each type