knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This study explores the criticality of different events based on three aforementioned parameters viz., fatalities, injuries and economic damage. It identifies the top six critical events across each parameter and compares with the others.
The analysis identifies Tornado to be the most destructive on a personal level as it causes the highest number of fatalities and injuries. It also shows that Floods are the major cause for property damage.
The data for this assignment come in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size. You can download the file from the course web site:
There is also some documentation of the database available. Here you will find how some of the variables are constructed/defined.
The events in the database start in the year 1950 and end in November 2011. In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years should be considered more complete.
storm <- read.csv(bzfile("repdata_data_StormData.csv.bz2"))
names(storm)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Key variables used for the analysis are:
storm2 <- select(storm, c(EVTYPE, FATALITIES, INJURIES, PROPDMG, CROPDMG, PROPDMGEXP, CROPDMGEXP))
unique(storm2$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
unique(storm2$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
storm2$PROPDMGEXP <- toupper(storm2$PROPDMGEXP)
storm2$PROPDMGEXP[storm2$PROPDMGEXP %in% c("", "+", "-", "?")] <- "0"
storm2$PROPDMGEXP[storm2$PROPDMGEXP %in% c("B")] <- "9"
storm2$PROPDMGEXP[storm2$PROPDMGEXP %in% c("M")] <- "6"
storm2$PROPDMGEXP[storm2$PROPDMGEXP %in% c("K")] <- "3"
storm2$PROPDMGEXP[storm2$PROPDMGEXP %in% c("H")] <- "2"
storm2$PROPDMGEXP <- as.numeric(storm2$PROPDMGEXP)
storm2$CROPDMGEXP <- toupper(storm2$CROPDMGEXP)
storm2$CROPDMGEXP[storm2$CROPDMGEXP %in% c("", "+", "-", "?")] <- "0"
storm2$CROPDMGEXP[storm2$CROPDMGEXP %in% c("B")] <- "9"
storm2$CROPDMGEXP[storm2$CROPDMGEXP %in% c("M")] <- "6"
storm2$CROPDMGEXP[storm2$CROPDMGEXP %in% c("K")] <- "3"
storm2$CROPDMGEXP[storm2$CROPDMGEXP %in% c("H")] <- "2"
storm2$CROPDMGEXP <- as.numeric(storm2$CROPDMGEXP)
storm2$PROPDMGTOTAL <- storm2$PROPDMG * (10 ** storm2$PROPDMGEXP)
storm2$CROPDMGTOTAL <- storm2$CROPDMG * (10 ** storm2$CROPDMGEXP)
storm2$DMGTOTAL <- storm2$PROPDMGTOTAL + storm2$CROPDMGTOTAL
str(storm2)
## 'data.frame': 902297 obs. of 10 variables:
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ FATALITIES : num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PROPDMGEXP : num 3 3 3 3 3 3 3 3 3 3 ...
## $ CROPDMGEXP : num 0 0 0 0 0 0 0 0 0 0 ...
## $ PROPDMGTOTAL: num 25000 2500 25000 2500 2500 2500 2500 2500 25000 25000 ...
## $ CROPDMGTOTAL: num 0 0 0 0 0 0 0 0 0 0 ...
## $ DMGTOTAL : num 25000 2500 25000 2500 2500 2500 2500 2500 25000 25000 ...
SumData <- storm2 %>%
group_by(EVTYPE) %>%
summarize(SUMFATALITIES = sum(FATALITIES),
SUMINJURIES = sum(INJURIES),
SUMPROPDMG = sum(PROPDMGTOTAL),
SUMCROPDMG = sum(CROPDMGTOTAL),
TOTALDMG = sum(DMGTOTAL))
## `summarise()` ungrouping output (override with `.groups` argument)
head(SumData)
## # A tibble: 6 x 6
## EVTYPE SUMFATALITIES SUMINJURIES SUMPROPDMG SUMCROPDMG TOTALDMG
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 " HIGH SURF ADVISO~ 0 0 200000 0 200000
## 2 " COASTAL FLOOD" 0 0 0 0 0
## 3 " FLASH FLOOD" 0 0 50000 0 50000
## 4 " LIGHTNING" 0 0 0 0 0
## 5 " TSTM WIND" 0 0 8100000 0 8100000
## 6 " TSTM WIND (G45)" 0 0 8000 0 8000
Fatality <- arrange(SumData, desc(SUMFATALITIES))
FatalityData <- Fatality[1:10,]
Injury <- arrange(SumData, desc(SUMINJURIES))
InjuryData <- Injury[1:10,]
Damage <- SumData[order(-SumData$TOTALDMG), ]
DamageData <- Damage[1:10,]
This would show the top 10 types of elements which lead to fatalities.
FatalityData$EVTYPE <- with(FatalityData, reorder(EVTYPE, -SUMFATALITIES))
ggplot(FatalityData, aes(EVTYPE, SUMFATALITIES, label = SUMFATALITIES)) +
geom_bar(stat = "identity", aes(color = EVTYPE)) +
geom_text(nudge_y = 200) +
xlab("Event Type") +
ylab("Total Fatalities") +
ggtitle("Most Fatal Events") +
theme(plot.title = element_text(hjust = 0.5))
This would show the top 10 types of elements which lead to injuries.
InjuryData$EVTYPE <- with(InjuryData, reorder(EVTYPE, -SUMINJURIES))
ggplot(FatalityData, aes(EVTYPE, SUMFATALITIES, label = SUMFATALITIES)) +
geom_bar(stat = "identity", aes(color = EVTYPE)) +
geom_text(nudge_y = 200) +
xlab("Event Type") +
ylab("Total Fatalities") +
ggtitle("Most Fatal Events") +
theme(plot.title = element_text(hjust = 0.5))
This would show the top 10 types of elements which lead to total damages.
DamageData$EVTYPE <- with(DamageData, reorder(EVTYPE, -TOTALDMG))
ggplot(DamageData, aes(x = EVTYPE, y = TOTALDMG)) +
geom_bar(stat = "identity", aes(color = EVTYPE)) +
xlab("Event Type") +
ylab("Total Damage") +
ggtitle("Events with Most Damage") +
theme(plot.title = element_text(hjust = 0.5), legend.position = "bottom")