Synopsis

Storms and other severe weather events can cause both public health and economic problems. We utilized the U.S. National Oceanic and Atmospheric Administrations (NOAA) storm database to address some of these issues. Specifically we found that tornados are the most harmful events on population health while floods are responsible for the most economic damage.

Data Processing

# Download necessary libraries
  library(dplyr)
## Warning: package 'dplyr' was built under R version 3.4.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
  library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.4.2
# Dowload data if it isn't already done into the current working directory 
  if(!file.exists("repdata_data_StormData.csv.bz2")) {
      download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
      destfile = "repdata_data_StormData.csv.bz2")
  }

# Load data
NOAA<- read.csv(bzfile("repdata_data_StormData.csv.bz2"), sep=",", header=T)

# Subset necessary information from original file
subNOAA <- NOAA[,c('STATE','EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]

# Examine the PROPDMEXP
 subNOAA %>% 
    group_by(PROPDMGEXP) %>%
    summarise(no_rows = length(PROPDMGEXP))
## Warning: package 'bindrcpp' was built under R version 3.4.2
## # A tibble: 19 x 2
##    PROPDMGEXP no_rows
##        <fctr>   <int>
##  1             465934
##  2          -       1
##  3          ?       8
##  4          +       5
##  5          0     216
##  6          1      25
##  7          2      13
##  8          3       4
##  9          4       4
## 10          5      28
## 11          6       4
## 12          7       5
## 13          8       1
## 14          B      40
## 15          h       1
## 16          H       6
## 17          K  424665
## 18          m       7
## 19          M   11330
# convert property damage values (PROPDMG) to the same units (using the relavent alpha code found in PROPDMGEXP)
subNOAA$PROPDMGV = 0
  # H and h (hundred)
  subNOAA[subNOAA$PROPDMGEXP=="H",]$PROPDMGV<-subNOAA[subNOAA$PROPDMGEXP=="H",]   $PROPDMG*10^2
  subNOAA[subNOAA$PROPDMGEXP=="h",]$PROPDMGV<-subNOAA[subNOAA$PROPDMGEXP=="h",]   $PROPDMG*10^2
  # K (thousand)
  subNOAA[subNOAA$PROPDMGEXP == "K", ]$PROPDMGV = subNOAA[subNOAA$PROPDMGEXP ==   "K", ]$PROPDMG * 10^3
  # M or m (million)
  subNOAA[subNOAA$PROPDMGEXP == "M", ]$PROPDMGV = subNOAA[subNOAA$PROPDMGEXP ==   "M", ]$PROPDMG * 10^6
  subNOAA[subNOAA$PROPDMGEXP == "m", ]$PROPDMGV = subNOAA[subNOAA$PROPDMGEXP ==   "m", ]$PROPDMG * 10^6
   # B (Billion)
  subNOAA[subNOAA$PROPDMGEXP == "B", ]$PROPDMGV = subNOAA[subNOAA$PROPDMGEXP ==   "B", ]$PROPDMG * 10^9
  
  # Examine the CROPDMEXP
 subNOAA %>% 
    group_by(CROPDMGEXP) %>%
    summarise(no_rows = length(CROPDMGEXP))
## # A tibble: 9 x 2
##   CROPDMGEXP no_rows
##       <fctr>   <int>
## 1             618413
## 2          ?       7
## 3          0      19
## 4          2       1
## 5          B       9
## 6          k      21
## 7          K  281832
## 8          m       1
## 9          M    1994
# convert crop damage values (CROPDMG) to the same units (using the relavent alpha code found in CROPDMGEXP)
subNOAA$CROPDMGV = 0
   # K and k (thousand)
  subNOAA[subNOAA$CROPDMGEXP == "K", ]$CROPDMGV = subNOAA[subNOAA$CROPDMGEXP ==   "K", ]$CROPDMG * 10^3
  subNOAA[subNOAA$CROPDMGEXP == "K", ]$CROPDMGV = subNOAA[subNOAA$CROPDMGEXP ==   "K", ]$CROPDMG * 10^3
  # M or m (million)
  subNOAA[subNOAA$CROPDMGEXP == "M", ]$CROPDMGV = subNOAA[subNOAA$CROPDMGEXP ==   "M", ]$CROPDMG * 10^6
  subNOAA[subNOAA$CROPDMGEXP == "m", ]$CROPDMGV = subNOAA[subNOAA$CROPDMGEXP ==   "m", ]$CROPDMG * 10^6
  # B (Billion)
  subNOAA[subNOAA$CROPDMGEXP == "B", ]$CROPDMGV = subNOAA[subNOAA$CROPDMGEXP ==   "B", ]$CROPDMG * 10^9

# Subset necessary information from transformed data subNOAA
subNOAAExp <- subNOAA[,c('EVTYPE','FATALITIES','INJURIES', 'PROPDMGV',  'CROPDMGV')]

knitr::opts_chunk$set(echo = TRUE)

Results

Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

# create a new column with combined numbers for injury and fatalities
subNOAAExp$health<-0
subNOAAExp$health<-subNOAAExp$FATALITIES + subNOAAExp$INJURIES

# aggregate the number of injuries and fatalities by event type
Harm<- aggregate(health ~ EVTYPE, data=subNOAAExp, sum)

# order the events by the number of injuries and fatalities
ordHarm<-Harm[rev(order(Harm$health)),]

# subset only the top 20 events that effect population health
TopHarm<-head(ordHarm,20)

# create a plot depicting the top 20 events that effect population health in order of effect
ggplot(TopHarm, aes(x = reorder(EVTYPE,-health), y = health)) + 
    geom_bar(stat = "identity", fill = "purple") + theme(axis.text.x = element_text(angle = 60, hjust = 1)) + 
    xlab("Event Type") + ylab("Number of injuries and fatalities") + ggtitle("Top 20 weather events with respect to population health")

knitr::opts_chunk$set(echo = TRUE)

Tornados are most damaging to population health (defined by the number of injuries and fatalities).

Across the United States, which types of events have the greatest economic consequences?

# create a new column with combined numbers property and crop damage
subNOAAExp$combined<-0
subNOAAExp$combined<-subNOAAExp$PROPDMGV + subNOAAExp$CROPDMGV

# aggregate the total cost by event type
Cost<- aggregate(combined ~ EVTYPE, data=subNOAAExp, sum)

# order the events by total cost
ordCost<-Cost[rev(order(Cost$combined)),]

# subset only the top 20 events by cost
TopCost<-head(ordCost,20)

# create a plot depicting the top 20 events by economic cost
ggplot(TopCost, aes(x = reorder(EVTYPE,-combined), y = combined)) + 
    geom_bar(stat = "identity", fill = "red") + theme(axis.text.x = element_text(angle = 60, hjust = 1)) + 
    xlab("Event Type") + ylab("Economic Cost ($)") + ggtitle("Top 20 weather events for property & crop damages")

knitr::opts_chunk$set(echo = TRUE)

Floods are responsible for the most econonmic damage (defined as property and crop damage).