Synopsis:

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. This analysis explores the NOAA Storm Database and answer 2 specific questions about severe weather events: (1) Across the United States, which types of events (as indicated in the 𝙴𝚅𝚃𝚈𝙿𝙴 variable) are most harmful with respect to population health?, and (2)Across the United States, which types of events have the greatest economic consequences? This analysis found Tornadoes, Excessive Heat and Flash Floods as the top 3 contributors of human fatalities while Tornadoes, TSTM Wind and Flood are the top 3 contributors to human injuries. Floods, Hurricans and Tornadoes are the top 3 contributors to Property Damage while Drought, Flood and River Flood are the top contributors to Crop Damage.

Data Processing

Load libaries. Unzip and read data file

#load libs
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
#read data
dfStormData <- read.csv(bzfile("./data/StormData.csv.bz2"))

Check for NAs in the columns relevant for the analysis. Use dplyr for onward data analysis

colSums(is.na(dfStormData)) #check for NA's in columns relevant to analysis
##    STATE__   BGN_DATE   BGN_TIME  TIME_ZONE     COUNTY COUNTYNAME 
##          0          0          0          0          0          0 
##      STATE     EVTYPE  BGN_RANGE    BGN_AZI BGN_LOCATI   END_DATE 
##          0          0          0          0          0          0 
##   END_TIME COUNTY_END COUNTYENDN  END_RANGE    END_AZI END_LOCATI 
##          0          0     902297          0          0          0 
##     LENGTH      WIDTH          F        MAG FATALITIES   INJURIES 
##          0          0     843563          0          0          0 
##    PROPDMG PROPDMGEXP    CROPDMG CROPDMGEXP        WFO STATEOFFIC 
##          0          0          0          0          0          0 
##  ZONENAMES   LATITUDE  LONGITUDE LATITUDE_E LONGITUDE_    REMARKS 
##          0         47          0         40          0          0 
##     REFNUM 
##          0
dpStorm <- tbl_df(dfStormData) #use dplyr functions for analysis

Subset data to analyze events harmful to health and significant to economy, and deallocate others

healthEvents <- select(dpStorm, c(EVTYPE, FATALITIES:INJURIES))
economicEvents <- select(dpStorm, c(EVTYPE, PROPDMG:CROPDMGEXP))
rm(dfStormData)
rm(dpStorm)

Compute top 8 events harmful to population health

head(injuryEvents <- healthEvents %>% group_by(EVTYPE) %>% summarise(fatalities = sum(FATALITIES), injuries = sum(INJURIES)) %>% arrange(desc(injuries)),8)
## # A tibble: 8 × 3
##           EVTYPE fatalities injuries
##           <fctr>      <dbl>    <dbl>
## 1        TORNADO       5633    91346
## 2      TSTM WIND        504     6957
## 3          FLOOD        470     6789
## 4 EXCESSIVE HEAT       1903     6525
## 5      LIGHTNING        816     5230
## 6           HEAT        937     2100
## 7      ICE STORM         89     1975
## 8    FLASH FLOOD        978     1777
head(fatalEvents <- healthEvents %>% group_by(EVTYPE) %>% summarise(fatalities = sum(FATALITIES), injuries = sum(INJURIES)) %>% arrange(desc(fatalities)),8)
## # A tibble: 8 × 3
##           EVTYPE fatalities injuries
##           <fctr>      <dbl>    <dbl>
## 1        TORNADO       5633    91346
## 2 EXCESSIVE HEAT       1903     6525
## 3    FLASH FLOOD        978     1777
## 4           HEAT        937     2100
## 5      LIGHTNING        816     5230
## 6      TSTM WIND        504     6957
## 7          FLOOD        470     6789
## 8    RIP CURRENT        368      232

Compute damage to property and crops. Transform actual damage amounts by multiplying PROPDMG/CROPDMG by PROPDMGEXP/CROPDMGEXP factor applying the timesX() user defined function across the relevant colums

timesX <- function(x) {
  if(x %in% c('k','K')) return (10^3)
  else if (x %in% c('m','M')) return (10^6)
  else if (x %in% c('b','B')) return (10^9)
  return (0)
}
economicEvents$PROPDMG <- economicEvents$PROPDMG * apply(economicEvents[,3], 1, timesX)
economicEvents$CROPDMG <- economicEvents$CROPDMG * apply(economicEvents[,5], 1, timesX)

Compute top 8 events harmful to property and crops

head(propEvents <- economicEvents %>% group_by(EVTYPE) %>% summarise(PROPLOSSES = sum(PROPDMG)) %>% arrange(desc(PROPLOSSES)),8)
## # A tibble: 8 × 2
##              EVTYPE   PROPLOSSES
##              <fctr>        <dbl>
## 1             FLOOD 144657709800
## 2 HURRICANE/TYPHOON  69305840000
## 3           TORNADO  56937160480
## 4       STORM SURGE  43323536000
## 5       FLASH FLOOD  16140811510
## 6              HAIL  15732266720
## 7         HURRICANE  11868319010
## 8    TROPICAL STORM   7703890550
head(cropEvents <- economicEvents %>% group_by(EVTYPE) %>% summarise(CROPLOSSES = sum(CROPDMG)) %>% arrange(desc(CROPLOSSES)),8)
## # A tibble: 8 × 2
##              EVTYPE  CROPLOSSES
##              <fctr>       <dbl>
## 1           DROUGHT 13972566000
## 2             FLOOD  5661968450
## 3       RIVER FLOOD  5029459000
## 4         ICE STORM  5022113500
## 5              HAIL  3025954450
## 6         HURRICANE  2741910000
## 7 HURRICANE/TYPHOON  2607872800
## 8       FLASH FLOOD  1421317100

Results

Set common graph parameters

g <- geom_bar(stat = "identity")
scale1K <- scale_y_continuous(labels = function(x) x/10^3)
scale1M <- scale_y_continuous(labels = function(x) x/10^6)
scale1B <- scale_y_continuous(labels = function(x) x/10^9)
t <- theme(axis.text.x = element_text(angle = 30))

Plotting damage to population health, one can see that Tornadoes, Excessive Heat and Flash Floods as the top 3 contributors of human fatalities while Tornadoes, TSTM Wind and Flood are the top 3 contributors to human injuries.

g1 <- ggplot(data = head(fatalEvents, 8), aes(x = reorder(EVTYPE, desc(fatalities)), y = fatalities)) + g + ylab("Fatalities (thousands)") + xlab("Event") + t + scale1K
g2 <- ggplot(data = head(injuryEvents, 8), aes(x = reorder(EVTYPE, desc(injuries)), y = injuries)) + g + ylab("Injuries (thousands)") + xlab("Event") + t + scale1K
grid.arrange(g1, g2, ncol = 2)

Plotting damage to property and crops, one can see that Floods, Hurricans and Tornadoes are the top 3 contributors to Property Damage while Drought, Flood and River Flood are the top contributors to Crop Damage.

g3 <- ggplot(data = head(propEvents, 8), aes(x = reorder(EVTYPE, desc(PROPLOSSES)), y = PROPLOSSES)) + g + ylab("Property damage ($billions)") + xlab("Event") + t + scale1B
g4 <- ggplot(data = head(cropEvents, 8), aes(x = reorder(EVTYPE, desc(CROPLOSSES)), y = CROPLOSSES)) + g + ylab("Crop damage ($billions)") + xlab("Event") + t + scale1B
grid.arrange(g3, g4, ncol = 2)