What is the Most Dangerous Weather Events to Public Health

Synopsis

The purpose of this document is to show which weather event has the most threat to public health and economy. The data is from US NOAA. The data includes fatalities, injuries, and economical damage. By analysing this data, we should be able to identify what are the most dangerous weather events to general public.

Data Load and Data Processing

Download the data and load to memory. The dataset name is weatherData.

fileURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
fileName <- "./data/StormData.csv.bz2"
dataDirector <- "./data"

if(!dir.exists(dataDirector)) {
    dir.create(dataDirector)
}

if(!file.exists(fileName)) {
    download.file(fileURL, destfile = fileName)
} 

weatherData <- read.csv(fileName)

To effectively handle the data, I created more columns to aggregate the economic damage which is orginally two separate columns and selected the columns needed.

Data Modification Justification

The original data had separate columns for fiscal value and it’s exponent. To caculate it accordingly I added a new columns that has numeric value for the economical damage.

# Damage per events
#weatherData$FATALITIES
variableSelection <- c("EVTYPE", "FATALITIES", "INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")
damagePerEvents <- weatherData[variableSelection]
## Health Damage
totalFatalitiesByEvtypes <- aggregate(damagePerEvents$FATALITIES, by=list(EVTYPES = damagePerEvents$EVTYPE), FUN=sum)
avgFatalitiesByEvtypes <- aggregate(damagePerEvents$FATALITIES, by=list(EVTYPES = damagePerEvents$EVTYPE), FUN=mean)
totalInjuriesByEvtypes <- aggregate(damagePerEvents$INJURIES, by=list(EVTYPES = damagePerEvents$EVTYPE), FUN=sum)
avgInjuriesByEvtypes <- aggregate(damagePerEvents$INJURIES, by=list(EVTYPES = damagePerEvents$EVTYPE), FUN=mean)
## Economical Damage
damagePerEvents$PROPDMGAMT[damagePerEvents$PROPDMGEXP == "K"] <- damagePerEvents$PROPDMG*1000
## Warning in damagePerEvents$PROPDMGAMT[damagePerEvents$PROPDMGEXP == "K"] <-
## damagePerEvents$PROPDMG * : number of items to replace is not a multiple of
## replacement length
damagePerEvents$PROPDMGAMT[damagePerEvents$PROPDMGEXP == "M"] <- damagePerEvents$PROPDMG*1000000
## Warning in damagePerEvents$PROPDMGAMT[damagePerEvents$PROPDMGEXP == "M"] <-
## damagePerEvents$PROPDMG * : number of items to replace is not a multiple of
## replacement length
damagePerEvents$PROPDMGAMT[damagePerEvents$PROPDMGEXP == "B"] <- damagePerEvents$PROPDMG*1000000000
## Warning in damagePerEvents$PROPDMGAMT[damagePerEvents$PROPDMGEXP == "B"] <-
## damagePerEvents$PROPDMG * : number of items to replace is not a multiple of
## replacement length
damagePerEvents$PROPDMGAMT[is.na(damagePerEvents$PROPDMGAMT) ] <- 0
damagePerEvents$CROPDMGAMT[damagePerEvents$CROPDMGEXP == "K"] <- damagePerEvents$CROPDMG*1000
## Warning in damagePerEvents$CROPDMGAMT[damagePerEvents$CROPDMGEXP == "K"] <-
## damagePerEvents$CROPDMG * : number of items to replace is not a multiple of
## replacement length
damagePerEvents$CROPDMGAMT[damagePerEvents$CROPDMGEXP == "M"] <- damagePerEvents$CROPDMG*1000000
## Warning in damagePerEvents$CROPDMGAMT[damagePerEvents$CROPDMGEXP == "M"] <-
## damagePerEvents$CROPDMG * : number of items to replace is not a multiple of
## replacement length
damagePerEvents$CROPDMGAMT[damagePerEvents$CROPDMGEXP == "B"] <- damagePerEvents$CROPDMG*1000000000
## Warning in damagePerEvents$CROPDMGAMT[damagePerEvents$CROPDMGEXP == "B"] <-
## damagePerEvents$CROPDMG * : number of items to replace is not a multiple of
## replacement length
damagePerEvents$CROPDMGAMT[is.na(damagePerEvents$CROPDMGAMT) ] <- 0
damagePerEvents$TOTALDMGAMT <- damagePerEvents$PROPDMGAMT + damagePerEvents$CROPDMGAMT
totalEconomicDamage <- aggregate(damagePerEvents$TOTALDMGAMT, by=list(EVTYPES = damagePerEvents$EVTYPE), FUN=sum)
avgEconomicDamage <- aggregate(damagePerEvents$TOTALDMGAMT, by=list(EVTYPES = damagePerEvents$EVTYPE), FUN=mean)

Results

The followings are the most dangerous weather events by total and average.

Biggest Threat to Public Health

Fatalities

library(plyr)

head(arrange(totalFatalitiesByEvtypes,desc(x)), n = 5)
##          EVTYPES    x
## 1        TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3    FLASH FLOOD  978
## 4           HEAT  937
## 5      LIGHTNING  816
head(arrange(avgFatalitiesByEvtypes,desc(x)), n = 5)
##                      EVTYPES         x
## 1 TORNADOES, TSTM WIND, HAIL 25.000000
## 2              COLD AND SNOW 14.000000
## 3      TROPICAL STORM GORDON  8.000000
## 4      RECORD/EXCESSIVE HEAT  5.666667
## 5               EXTREME HEAT  4.363636

Injuries

library(plyr)

head(arrange(totalInjuriesByEvtypes,desc(x)), n = 5)
##          EVTYPES     x
## 1        TORNADO 91346
## 2      TSTM WIND  6957
## 3          FLOOD  6789
## 4 EXCESSIVE HEAT  6525
## 5      LIGHTNING  5230
head(arrange(avgInjuriesByEvtypes,desc(x)), n = 5)
##                 EVTYPES    x
## 1             Heat Wave 70.0
## 2 TROPICAL STORM GORDON 43.0
## 3            WILD FIRES 37.5
## 4         THUNDERSTORMW 27.0
## 5    HIGH WIND AND SEAS 20.0
plot(x = damagePerEvents$INJURIES, y = damagePerEvents$FATALITIES, xlab = "Injuries", ylab = "Fatalities")

Economical Damage

library(plyr)

head(arrange(totalEconomicDamage,desc(x)), n = 5)
##             EVTYPES            x
## 1           TORNADO 357194783780
## 2    TROPICAL STORM 251815199640
## 3 HURRICANE/TYPHOON 233182532500
## 4             FLOOD  70186438740
## 5       STORM SURGE  50826645060
head(arrange(avgEconomicDamage,desc(x)), n = 5)
##                      EVTYPES          x
## 1             HURRICANE OPAL 3055555556
## 2          HURRICANE/TYPHOON 2649801506
## 3  HURRICANE OPAL/HIGH WINDS 2500000000
## 4 TORNADOES, TSTM WIND, HAIL 2500000000
## 5  HEAVY RAIN/SEVERE WEATHER 1250000000