Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. This analysis explores the NOAA Storm Database and answer 2 specific questions about severe weather events: (1) Across the United States, which types of events (as indicated in the 𝙴𝚅𝚃𝚈𝙿𝙴 variable) are most harmful with respect to population health?, and (2)Across the United States, which types of events have the greatest economic consequences? This analysis found Tornadoes, Excessive Heat and Flash Floods as the top 3 contributors of human fatalities while Tornadoes, TSTM Wind and Flood are the top 3 contributors to human injuries. Floods, Hurricans and Tornadoes are the top 3 contributors to Property Damage while Drought, Flood and River Flood are the top contributors to Crop Damage.
Load libaries. Unzip and read data file
#load libs
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
#read data
dfStormData <- read.csv(bzfile("./data/StormData.csv.bz2"))
Check for NAs in the columns relevant for the analysis. Use dplyr for onward data analysis
colSums(is.na(dfStormData)) #check for NA's in columns relevant to analysis
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME
## 0 0 0 0 0 0
## STATE EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE
## 0 0 0 0 0 0
## END_TIME COUNTY_END COUNTYENDN END_RANGE END_AZI END_LOCATI
## 0 0 902297 0 0 0
## LENGTH WIDTH F MAG FATALITIES INJURIES
## 0 0 843563 0 0 0
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC
## 0 0 0 0 0 0
## ZONENAMES LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS
## 0 47 0 40 0 0
## REFNUM
## 0
dpStorm <- tbl_df(dfStormData) #use dplyr functions for analysis
Subset data to analyze events harmful to health and significant to economy, and deallocate others
healthEvents <- select(dpStorm, c(EVTYPE, FATALITIES:INJURIES))
economicEvents <- select(dpStorm, c(EVTYPE, PROPDMG:CROPDMGEXP))
rm(dfStormData)
rm(dpStorm)
Compute top 8 events harmful to population health
head(injuryEvents <- healthEvents %>% group_by(EVTYPE) %>% summarise(fatalities = sum(FATALITIES), injuries = sum(INJURIES)) %>% arrange(desc(injuries)),8)
## # A tibble: 8 × 3
## EVTYPE fatalities injuries
## <fctr> <dbl> <dbl>
## 1 TORNADO 5633 91346
## 2 TSTM WIND 504 6957
## 3 FLOOD 470 6789
## 4 EXCESSIVE HEAT 1903 6525
## 5 LIGHTNING 816 5230
## 6 HEAT 937 2100
## 7 ICE STORM 89 1975
## 8 FLASH FLOOD 978 1777
head(fatalEvents <- healthEvents %>% group_by(EVTYPE) %>% summarise(fatalities = sum(FATALITIES), injuries = sum(INJURIES)) %>% arrange(desc(fatalities)),8)
## # A tibble: 8 × 3
## EVTYPE fatalities injuries
## <fctr> <dbl> <dbl>
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 FLASH FLOOD 978 1777
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
## 6 TSTM WIND 504 6957
## 7 FLOOD 470 6789
## 8 RIP CURRENT 368 232
Compute damage to property and crops. Transform actual damage amounts by multiplying PROPDMG/CROPDMG by PROPDMGEXP/CROPDMGEXP factor applying the timesX() user defined function across the relevant colums
timesX <- function(x) {
if(x %in% c('k','K')) return (10^3)
else if (x %in% c('m','M')) return (10^6)
else if (x %in% c('b','B')) return (10^9)
return (0)
}
economicEvents$PROPDMG <- economicEvents$PROPDMG * apply(economicEvents[,3], 1, timesX)
economicEvents$CROPDMG <- economicEvents$CROPDMG * apply(economicEvents[,5], 1, timesX)
Compute top 8 events harmful to property and crops
head(propEvents <- economicEvents %>% group_by(EVTYPE) %>% summarise(PROPLOSSES = sum(PROPDMG)) %>% arrange(desc(PROPLOSSES)),8)
## # A tibble: 8 × 2
## EVTYPE PROPLOSSES
## <fctr> <dbl>
## 1 FLOOD 144657709800
## 2 HURRICANE/TYPHOON 69305840000
## 3 TORNADO 56937160480
## 4 STORM SURGE 43323536000
## 5 FLASH FLOOD 16140811510
## 6 HAIL 15732266720
## 7 HURRICANE 11868319010
## 8 TROPICAL STORM 7703890550
head(cropEvents <- economicEvents %>% group_by(EVTYPE) %>% summarise(CROPLOSSES = sum(CROPDMG)) %>% arrange(desc(CROPLOSSES)),8)
## # A tibble: 8 × 2
## EVTYPE CROPLOSSES
## <fctr> <dbl>
## 1 DROUGHT 13972566000
## 2 FLOOD 5661968450
## 3 RIVER FLOOD 5029459000
## 4 ICE STORM 5022113500
## 5 HAIL 3025954450
## 6 HURRICANE 2741910000
## 7 HURRICANE/TYPHOON 2607872800
## 8 FLASH FLOOD 1421317100
Set common graph parameters
g <- geom_bar(stat = "identity")
scale1K <- scale_y_continuous(labels = function(x) x/10^3)
scale1M <- scale_y_continuous(labels = function(x) x/10^6)
scale1B <- scale_y_continuous(labels = function(x) x/10^9)
t <- theme(axis.text.x = element_text(angle = 30))
Plotting damage to population health, one can see that Tornadoes, Excessive Heat and Flash Floods as the top 3 contributors of human fatalities while Tornadoes, TSTM Wind and Flood are the top 3 contributors to human injuries.
g1 <- ggplot(data = head(fatalEvents, 8), aes(x = reorder(EVTYPE, desc(fatalities)), y = fatalities)) + g + ylab("Fatalities (thousands)") + xlab("Event") + t + scale1K
g2 <- ggplot(data = head(injuryEvents, 8), aes(x = reorder(EVTYPE, desc(injuries)), y = injuries)) + g + ylab("Injuries (thousands)") + xlab("Event") + t + scale1K
grid.arrange(g1, g2, ncol = 2)
Plotting damage to property and crops, one can see that Floods, Hurricans and Tornadoes are the top 3 contributors to Property Damage while Drought, Flood and River Flood are the top contributors to Crop Damage.
g3 <- ggplot(data = head(propEvents, 8), aes(x = reorder(EVTYPE, desc(PROPLOSSES)), y = PROPLOSSES)) + g + ylab("Property damage ($billions)") + xlab("Event") + t + scale1B
g4 <- ggplot(data = head(cropEvents, 8), aes(x = reorder(EVTYPE, desc(CROPLOSSES)), y = CROPLOSSES)) + g + ylab("Crop damage ($billions)") + xlab("Event") + t + scale1B
grid.arrange(g3, g4, ncol = 2)