Introduction

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage

## -- Attaching packages ----------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0       v purrr   0.3.0  
## v tibble  2.0.1       v dplyr   0.8.0.1
## v tidyr   0.8.2       v stringr 1.4.0  
## v readr   1.3.1       v forcats 0.4.0
## -- Conflicts -------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## Warning: package 'gridExtra' was built under R version 3.5.3
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
## Warning: package 'R.utils' was built under R version 3.5.3
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.22.0 (2018-04-21) successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods
## The following objects are masked from 'package:base':
## 
##     attach, detach, gc, load, save
## R.utils v2.8.0 successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## The following object is masked from 'package:tidyr':
## 
##     extract
## The following object is masked from 'package:utils':
## 
##     timestamp
## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, inherits, isOpen, parse, warnings
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date

Reading the data

data <- read_csv("StormData.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   BGN_DATE = col_character(),
##   BGN_TIME = col_character(),
##   TIME_ZONE = col_character(),
##   COUNTYNAME = col_character(),
##   STATE = col_character(),
##   EVTYPE = col_character(),
##   BGN_AZI = col_logical(),
##   BGN_LOCATI = col_logical(),
##   END_DATE = col_logical(),
##   END_TIME = col_logical(),
##   COUNTYENDN = col_logical(),
##   END_AZI = col_logical(),
##   END_LOCATI = col_logical(),
##   PROPDMGEXP = col_character(),
##   CROPDMGEXP = col_logical(),
##   WFO = col_logical(),
##   STATEOFFIC = col_logical(),
##   ZONENAMES = col_logical(),
##   REMARKS = col_logical()
## )
## See spec(...) for full column specifications.

Data Processing

data$BGN_DATE <- gsub(" 0:00:00","",data$BGN_DATE)
data$BGN_DATE <- mdy(data$BGN_DATE)

data$EVTYPE <- gsub("EXCESSIVE HEAT","HEAT",data$EVTYPE)
data$EVTYPE <- gsub("FLASH FLOOD","FLOOD",data$EVTYPE)
data$EVTYPE <- gsub("FLOODING","FLOOD",data$EVTYPE)
data$EVTYPE[grep(".*WIND.*", data$EVTYPE, ignore.case = TRUE)] <- "WIND"

Impact on Health

calculating total fatalities

fatalities <- data %>% group_by(EVTYPE) %>% summarise(total.fatalities = sum(FATALITIES)) %>% arrange(desc(total.fatalities))
head(fatalities, 10)
## # A tibble: 10 x 2
##    EVTYPE       total.fatalities
##    <chr>                   <dbl>
##  1 TORNADO                  5633
##  2 HEAT                     2840
##  3 FLOOD                    1473
##  4 WIND                     1451
##  5 LIGHTNING                 816
##  6 RIP CURRENT               368
##  7 AVALANCHE                 224
##  8 WINTER STORM              206
##  9 RIP CURRENTS              204
## 10 HEAT WAVE                 172
ggplot(fatalities[1:10,], aes(x=reorder(EVTYPE, -total.fatalities), y=total.fatalities,fill=total.fatalities))+
  geom_bar(stat="identity",color="black") + 
  theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))+
  ggtitle("Top 10 Events with Highest Total Fatalities") +labs(x="EVENT TYPE", y="Total Fatalities")+
  scale_fill_gradient(low="#4D98DB", high="#3A75AE")+
  theme(legend.position="none")

Total Injuries

injuries <- data %>% select(EVTYPE, INJURIES) %>% group_by(EVTYPE) %>% summarise(total.injuries = sum(INJURIES)) %>% arrange(-total.injuries)
head(injuries, 10)
## # A tibble: 10 x 2
##    EVTYPE            total.injuries
##    <chr>                      <dbl>
##  1 TORNADO                    91346
##  2 WIND                       11498
##  3 HEAT                        8625
##  4 FLOOD                       8576
##  5 LIGHTNING                   5230
##  6 ICE STORM                   1975
##  7 HAIL                        1361
##  8 WINTER STORM                1321
##  9 HURRICANE/TYPHOON           1275
## 10 HEAVY SNOW                  1021
ggplot(injuries[1:10,], aes(x=reorder(EVTYPE, -total.injuries), y=total.injuries,fill=total.injuries))+
  geom_bar(stat="identity",color="black")+
 theme(axis.text.x = element_text(angle=90, vjust=0.5, hjust=1))+
  ggtitle("Top 10 Events with Highest Total Injuries") +
  labs(x="EVENT TYPE", y="Total Injuries") +
  scale_fill_gradient(low="#4D98DB", high="#3A75AE")+
  theme(legend.position="none")

Economic Impact

propertyDamage <- sum(data$PROPDMG)
cropDamage <- sum(data$CROPDMG)
totalDamage <- (propertyDamage + cropDamage)*1000

Aggregate, sort, and subset the data

propDmgType <- aggregate(PROPDMG ~ EVTYPE, data, sum)
propDmgSort <- propDmgType[order(-propDmgType$PROPDMG),]
propDmgSub20 <- propDmgSort[1:10,]
propDmgSub20$EVTYPE  <- factor(propDmgSub20$EVTYPE, levels=unique(propDmgSub20$EVTYPE))

cropDmgType <- aggregate(CROPDMG ~ EVTYPE, data, sum)
cropDmgSort <- cropDmgType[order(-cropDmgType$CROPDMG),]
cropDmgSub20 <- cropDmgSort[1:10,]

cropDmgSub20$EVTYPE  <- factor(cropDmgSub20$EVTYPE, levels=unique(cropDmgSub20$EVTYPE))
p1 <- ggplot(propDmgSub20, aes(x=propDmgSub20$PROPDMG, y=propDmgSub20$EVTYPE)) +
  geom_point(size=1.6,colour="blue") +
  scale_y_discrete(limits = rev(levels(propDmgSub20$EVTYPE))) +
  ggtitle("Most Harmful Storm Events\nby Property Damage\n1950 - 2011") +
  theme(plot.title = element_text(size = 8.5, face = "bold")) +
  labs(x="Property Damage ($K)",y="Storm Events")


p2 <- ggplot(cropDmgSub20, aes(x=cropDmgSub20$CROPDMG, y=cropDmgSub20$EVTYPE)) +
  geom_point(size=1.6,colour="blue") +
  scale_y_discrete(limits = rev(levels(cropDmgSub20$EVTYPE))) +
  ggtitle("Most Harmful Storm Events\nby Crop Damage\n1950 - 2011") +
  theme(plot.title = element_text(size = 8.5, face = "bold")) +
  labs(x="Crop Damage ($K)",y="Storm Events")

The top 10 events with the highest total economic damages are shown graphically.

grid.arrange(p1, p2, nrow=2)