Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage
## -- Attaching packages ----------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.3.0
## v tibble 2.0.1 v dplyr 0.8.0.1
## v tidyr 0.8.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts -------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Warning: package 'gridExtra' was built under R version 3.5.3
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## Warning: package 'R.utils' was built under R version 3.5.3
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.22.0 (2018-04-21) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
## R.utils v2.8.0 successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
Reading the data
data <- read_csv("StormData.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## BGN_DATE = col_character(),
## BGN_TIME = col_character(),
## TIME_ZONE = col_character(),
## COUNTYNAME = col_character(),
## STATE = col_character(),
## EVTYPE = col_character(),
## BGN_AZI = col_logical(),
## BGN_LOCATI = col_logical(),
## END_DATE = col_logical(),
## END_TIME = col_logical(),
## COUNTYENDN = col_logical(),
## END_AZI = col_logical(),
## END_LOCATI = col_logical(),
## PROPDMGEXP = col_character(),
## CROPDMGEXP = col_logical(),
## WFO = col_logical(),
## STATEOFFIC = col_logical(),
## ZONENAMES = col_logical(),
## REMARKS = col_logical()
## )
## See spec(...) for full column specifications.
data$BGN_DATE <- gsub(" 0:00:00","",data$BGN_DATE)
data$BGN_DATE <- mdy(data$BGN_DATE)
data$EVTYPE <- gsub("EXCESSIVE HEAT","HEAT",data$EVTYPE)
data$EVTYPE <- gsub("FLASH FLOOD","FLOOD",data$EVTYPE)
data$EVTYPE <- gsub("FLOODING","FLOOD",data$EVTYPE)
data$EVTYPE[grep(".*WIND.*", data$EVTYPE, ignore.case = TRUE)] <- "WIND"
calculating total fatalities
fatalities <- data %>% group_by(EVTYPE) %>% summarise(total.fatalities = sum(FATALITIES)) %>% arrange(desc(total.fatalities))
head(fatalities, 10)
## # A tibble: 10 x 2
## EVTYPE total.fatalities
## <chr> <dbl>
## 1 TORNADO 5633
## 2 HEAT 2840
## 3 FLOOD 1473
## 4 WIND 1451
## 5 LIGHTNING 816
## 6 RIP CURRENT 368
## 7 AVALANCHE 224
## 8 WINTER STORM 206
## 9 RIP CURRENTS 204
## 10 HEAT WAVE 172
ggplot(fatalities[1:10,], aes(x=reorder(EVTYPE, -total.fatalities), y=total.fatalities,fill=total.fatalities))+
geom_bar(stat="identity",color="black") +
theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))+
ggtitle("Top 10 Events with Highest Total Fatalities") +labs(x="EVENT TYPE", y="Total Fatalities")+
scale_fill_gradient(low="#4D98DB", high="#3A75AE")+
theme(legend.position="none")
injuries <- data %>% select(EVTYPE, INJURIES) %>% group_by(EVTYPE) %>% summarise(total.injuries = sum(INJURIES)) %>% arrange(-total.injuries)
head(injuries, 10)
## # A tibble: 10 x 2
## EVTYPE total.injuries
## <chr> <dbl>
## 1 TORNADO 91346
## 2 WIND 11498
## 3 HEAT 8625
## 4 FLOOD 8576
## 5 LIGHTNING 5230
## 6 ICE STORM 1975
## 7 HAIL 1361
## 8 WINTER STORM 1321
## 9 HURRICANE/TYPHOON 1275
## 10 HEAVY SNOW 1021
ggplot(injuries[1:10,], aes(x=reorder(EVTYPE, -total.injuries), y=total.injuries,fill=total.injuries))+
geom_bar(stat="identity",color="black")+
theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))+
ggtitle("Top 10 Events with Highest Total Injuries") +
labs(x="EVENT TYPE", y="Total Injuries") +
scale_fill_gradient(low="#4D98DB", high="#3A75AE")+
theme(legend.position="none")
propertyDamage <- sum(data$PROPDMG)
cropDamage <- sum(data$CROPDMG)
totalDamage <- (propertyDamage + cropDamage)*1000
Aggregate, sort, and subset the data
propDmgType <- aggregate(PROPDMG ~ EVTYPE, data, sum)
propDmgSort <- propDmgType[order(-propDmgType$PROPDMG),]
propDmgSub20 <- propDmgSort[1:10,]
propDmgSub20$EVTYPE <- factor(propDmgSub20$EVTYPE, levels=unique(propDmgSub20$EVTYPE))
cropDmgType <- aggregate(CROPDMG ~ EVTYPE, data, sum)
cropDmgSort <- cropDmgType[order(-cropDmgType$CROPDMG),]
cropDmgSub20 <- cropDmgSort[1:10,]
cropDmgSub20$EVTYPE <- factor(cropDmgSub20$EVTYPE, levels=unique(cropDmgSub20$EVTYPE))
p1 <- ggplot(propDmgSub20, aes(x=propDmgSub20$PROPDMG, y=propDmgSub20$EVTYPE)) +
geom_point(size=1.6,colour="blue") +
scale_y_discrete(limits = rev(levels(propDmgSub20$EVTYPE))) +
ggtitle("Most Harmful Storm Events\nby Property Damage\n1950 - 2011") +
theme(plot.title = element_text(size = 8.5, face = "bold")) +
labs(x="Property Damage ($K)",y="Storm Events")
p2 <- ggplot(cropDmgSub20, aes(x=cropDmgSub20$CROPDMG, y=cropDmgSub20$EVTYPE)) +
geom_point(size=1.6,colour="blue") +
scale_y_discrete(limits = rev(levels(cropDmgSub20$EVTYPE))) +
ggtitle("Most Harmful Storm Events\nby Crop Damage\n1950 - 2011") +
theme(plot.title = element_text(size = 8.5, face = "bold")) +
labs(x="Crop Damage ($K)",y="Storm Events")
grid.arrange(p1, p2, nrow=2)