Synopsis

Using data found in the US National Oceanic and Atmospheric Administration’s storm database, a summary of the damage caused by various weather events has been created. This includes damage in terms of population health (recorded as fatalities and injuries) and in terms of economic damage (recorded as property damage and crop damage, in US dollars). The process for obtaining and processing the data is shown below, and the findings suggest that tornadoes are the most harmful event for population health, while events associated with cold weather caused the most economic damage in the form of crop damage, but wind and tornado events caused the most property damage (however this is orders of magnitude less than the crop damage in terms of cost).

knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.3
library(reshape2)

Data Processing

Download and load data

fileURL<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileURL, destfile = "./StormData.csv.bz2")
data<-read.csv("StormData.csv.bz2")

Sort event types into broader categories. By using the table function to view the frequency of different event types, we can decide which events are worth looking at (sort(table(data$EVTYPE), decreasing = TRUE)), and these events are placed into general categories so data can be viewed more easily.

cold<-c('HAIL','SNOW','WINT','BLIZZARD','ICE','COLD','FREEZ','WINDCHILL')
tstm<-c('TSTM','THUNDERSTORM','LIGHTNING','HEAVY RAIN','MICROBURST')
flood<-c('FLOOD','SURGE','FLD')
wind<-'WIND'
hurricane<-'HURRICANE'
marine<-'MARINE'
landslide<-c('LANDSLIDE','AVALANCHE')
ripcur<-c('RIP CURRENT', 'SURF')
astrotide<-'ASTRONOMICAL'
fog<-'FOG'
hot<-c('DROUGHT','HEAT','WARM')
fire<-'FIRE'
dust<-'DUST'
tornado<-c('TORNADO','SPOUT','FUNNEL CLOUD')

The data can now be separated into these categories by creating a new factor variable

data<-data %>% mutate(CATEGORY=ifelse(grepl(paste(cold, collapse='|'),EVTYPE), 'COLD WEATHER', 'OTHER'))
data<-data %>% mutate(CATEGORY=ifelse(grepl(paste(tstm, collapse='|'),EVTYPE), 'THUNDERSTORM', CATEGORY))
data<-data %>% mutate(CATEGORY=ifelse(grepl(paste(flood, collapse='|'),EVTYPE), 'FLOOD', CATEGORY))
data<-data %>% mutate(CATEGORY=ifelse(grepl(wind,EVTYPE), 'WIND', CATEGORY))
data<-data %>% mutate(CATEGORY=ifelse(grepl(hurricane,EVTYPE), 'HURRICANE', CATEGORY))
data<-data %>% mutate(CATEGORY=ifelse(grepl(marine,EVTYPE), 'MARINE EVENTS', CATEGORY))
data<-data %>% mutate(CATEGORY=ifelse(grepl(paste(landslide, collapse='|'),EVTYPE), 'LANDSLIDE', CATEGORY))
data<-data %>% mutate(CATEGORY=ifelse(grepl(paste(ripcur, collapse='|'),EVTYPE), 'CURRENTS', CATEGORY))
data<-data %>% mutate(CATEGORY=ifelse(grepl(astrotide,EVTYPE), 'ASTRONOMICAL LOW/HIGH TIDE', CATEGORY))
data<-data %>% mutate(CATEGORY=ifelse(grepl(fog,EVTYPE), 'FOGGY WEATHER', CATEGORY))
data<-data %>% mutate(CATEGORY=ifelse(grepl(paste(hot, collapse='|'),EVTYPE), 'HOT WEATHER', CATEGORY))
data<-data %>% mutate(CATEGORY=ifelse(grepl(fire,EVTYPE), 'FIRE', CATEGORY))
data<-data %>% mutate(CATEGORY=ifelse(grepl(dust,EVTYPE), 'DUST', CATEGORY))
data<-data %>% mutate(CATEGORY=ifelse(grepl(paste(tornado, collapse='|'),EVTYPE), 'TORNADO', CATEGORY))
data$CATEGORY<-factor(data$CATEGORY)

The relevant columns in the data table for population health are FATALITIES and INJURIES.

The relevant columns for economic consequences are PROPDMG and CROPDMG, which are modified by PROPDMGEXP and CROPDMGEXP respectively. The true numbers for the data can be determined after deciphering what the values in the EXP columns signify.

k or K is 1000, M is 1000000 (million), B is 1000000000 (billion).

H corresponds to 100 (hundred), which can be verified by going to https://www.ncdc.noaa.gov/stormevents/, search by Select State or Area, then picking a time frame and event type corresponding to rows in the data. For example, a time frame containing 7/14/1995 19:23 for thunderstorm winds lets you find entry 216476 in Sherman, NE, which has 0.50K property damage on the site, and 5 H in the data set.

data <- data %>% mutate(PropertyDamage=ifelse(PROPDMGEXP=='B', 1000000000*PROPDMG, PROPDMG))
data <- data %>% mutate(PropertyDamage=ifelse(PROPDMGEXP=='M'|PROPDMGEXP=='m', 1000000*PROPDMG, PROPDMG))
data <- data %>% mutate(PropertyDamage=ifelse(PROPDMGEXP=='K', 1000*PROPDMG, PROPDMG))
data <- data %>% mutate(PropertyDamage=ifelse(PROPDMGEXP=='H'|PROPDMGEXP=='H', 100*PROPDMG, PROPDMG))

data <- data %>% mutate(CropDamage=ifelse(CROPDMGEXP=='B', 1000000000*CROPDMG, CROPDMG))
data <- data %>% mutate(CropDamage=ifelse(CROPDMGEXP=='M'|CROPDMGEXP=='m', 1000000*CROPDMG, CROPDMG))
data <- data %>% mutate(CropDamage=ifelse(CROPDMGEXP=='K'|CROPDMGEXP=='k', 1000*CROPDMG, CROPDMG))

Results

Load sums of damage associated with various categories into a new data frame

injuries<-tapply(data$INJURIES, data$CATEGORY, sum)
fatalities<-tapply(data$FATALITIES, data$CATEGORY, sum)
propdmg<-tapply(data$PropertyDamage, data$CATEGORY, sum)
cropdmg<-tapply(data$CropDamage, data$CATEGORY, sum)
category<-names(injuries)
damage<-data.frame(category, injuries, fatalities, propdmg, cropdmg)

Reshape data for graphing

econdamage<-data.frame(category, propdmg, cropdmg)
healthdamage<-data.frame(category, injuries, fatalities)
healthmelt<-melt(healthdamage, id="category")
econmelt<-melt(econdamage, id="category")

Graph the data with bar plots

ggplot(healthmelt, aes(x=category, y=value, fill=variable))+
        geom_bar(stat='identity', position='dodge', alpha=0.5)+
        labs(title='Injuries/Fatalities by Weather Category, 1950-2011')+
        labs(x='Category', y='Value')+
        coord_flip()

ggplot(damage, aes(x=category, y=propdmg))+
        geom_bar(stat='identity', fill='blue', alpha=0.5)+
        labs(title='Property Damage by Weather Category')+
        labs(x='Category', y='Damage ($)')+
        coord_flip()

ggplot(damage, aes(x=category, y=cropdmg))+
        geom_bar(stat='identity', fill='red', alpha=0.5)+
        labs(title='Crop Damage by Weather Category')+
        labs(x='Category', y='Damage ($)')+
        coord_flip()