The purpose of this document is to show which weather event has the most threat to public health and economy. The data is from US NOAA. The data includes fatalities, injuries, and economical damage. By analysing this data, we should be able to identify what are the most dangerous weather events to general public.
Download the data and load to memory. The dataset name is weatherData.
fileURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
fileName <- "./data/StormData.csv.bz2"
dataDirector <- "./data"
if(!dir.exists(dataDirector)) {
dir.create(dataDirector)
}
if(!file.exists(fileName)) {
download.file(fileURL, destfile = fileName)
}
weatherData <- read.csv(fileName)
To effectively handle the data, I created more columns to aggregate the economic damage which is orginally two separate columns and selected the columns needed.
The original data had separate columns for fiscal value and it’s exponent. To caculate it accordingly I added a new columns that has numeric value for the economical damage.
# Damage per events
#weatherData$FATALITIES
variableSelection <- c("EVTYPE", "FATALITIES", "INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")
damagePerEvents <- weatherData[variableSelection]
## Health Damage
totalFatalitiesByEvtypes <- aggregate(damagePerEvents$FATALITIES, by=list(EVTYPES = damagePerEvents$EVTYPE), FUN=sum)
avgFatalitiesByEvtypes <- aggregate(damagePerEvents$FATALITIES, by=list(EVTYPES = damagePerEvents$EVTYPE), FUN=mean)
totalInjuriesByEvtypes <- aggregate(damagePerEvents$INJURIES, by=list(EVTYPES = damagePerEvents$EVTYPE), FUN=sum)
avgInjuriesByEvtypes <- aggregate(damagePerEvents$INJURIES, by=list(EVTYPES = damagePerEvents$EVTYPE), FUN=mean)
## Economical Damage
damagePerEvents$PROPDMGAMT[damagePerEvents$PROPDMGEXP == "K"] <- damagePerEvents$PROPDMG*1000
## Warning in damagePerEvents$PROPDMGAMT[damagePerEvents$PROPDMGEXP == "K"] <-
## damagePerEvents$PROPDMG * : number of items to replace is not a multiple of
## replacement length
damagePerEvents$PROPDMGAMT[damagePerEvents$PROPDMGEXP == "M"] <- damagePerEvents$PROPDMG*1000000
## Warning in damagePerEvents$PROPDMGAMT[damagePerEvents$PROPDMGEXP == "M"] <-
## damagePerEvents$PROPDMG * : number of items to replace is not a multiple of
## replacement length
damagePerEvents$PROPDMGAMT[damagePerEvents$PROPDMGEXP == "B"] <- damagePerEvents$PROPDMG*1000000000
## Warning in damagePerEvents$PROPDMGAMT[damagePerEvents$PROPDMGEXP == "B"] <-
## damagePerEvents$PROPDMG * : number of items to replace is not a multiple of
## replacement length
damagePerEvents$PROPDMGAMT[is.na(damagePerEvents$PROPDMGAMT) ] <- 0
damagePerEvents$CROPDMGAMT[damagePerEvents$CROPDMGEXP == "K"] <- damagePerEvents$CROPDMG*1000
## Warning in damagePerEvents$CROPDMGAMT[damagePerEvents$CROPDMGEXP == "K"] <-
## damagePerEvents$CROPDMG * : number of items to replace is not a multiple of
## replacement length
damagePerEvents$CROPDMGAMT[damagePerEvents$CROPDMGEXP == "M"] <- damagePerEvents$CROPDMG*1000000
## Warning in damagePerEvents$CROPDMGAMT[damagePerEvents$CROPDMGEXP == "M"] <-
## damagePerEvents$CROPDMG * : number of items to replace is not a multiple of
## replacement length
damagePerEvents$CROPDMGAMT[damagePerEvents$CROPDMGEXP == "B"] <- damagePerEvents$CROPDMG*1000000000
## Warning in damagePerEvents$CROPDMGAMT[damagePerEvents$CROPDMGEXP == "B"] <-
## damagePerEvents$CROPDMG * : number of items to replace is not a multiple of
## replacement length
damagePerEvents$CROPDMGAMT[is.na(damagePerEvents$CROPDMGAMT) ] <- 0
damagePerEvents$TOTALDMGAMT <- damagePerEvents$PROPDMGAMT + damagePerEvents$CROPDMGAMT
totalEconomicDamage <- aggregate(damagePerEvents$TOTALDMGAMT, by=list(EVTYPES = damagePerEvents$EVTYPE), FUN=sum)
avgEconomicDamage <- aggregate(damagePerEvents$TOTALDMGAMT, by=list(EVTYPES = damagePerEvents$EVTYPE), FUN=mean)
The followings are the most dangerous weather events by total and average.
library(plyr)
head(arrange(totalFatalitiesByEvtypes,desc(x)), n = 5)
## EVTYPES x
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
head(arrange(avgFatalitiesByEvtypes,desc(x)), n = 5)
## EVTYPES x
## 1 TORNADOES, TSTM WIND, HAIL 25.000000
## 2 COLD AND SNOW 14.000000
## 3 TROPICAL STORM GORDON 8.000000
## 4 RECORD/EXCESSIVE HEAT 5.666667
## 5 EXTREME HEAT 4.363636
library(plyr)
head(arrange(totalInjuriesByEvtypes,desc(x)), n = 5)
## EVTYPES x
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
head(arrange(avgInjuriesByEvtypes,desc(x)), n = 5)
## EVTYPES x
## 1 Heat Wave 70.0
## 2 TROPICAL STORM GORDON 43.0
## 3 WILD FIRES 37.5
## 4 THUNDERSTORMW 27.0
## 5 HIGH WIND AND SEAS 20.0
plot(x = damagePerEvents$INJURIES, y = damagePerEvents$FATALITIES, xlab = "Injuries", ylab = "Fatalities")
library(plyr)
head(arrange(totalEconomicDamage,desc(x)), n = 5)
## EVTYPES x
## 1 TORNADO 357194783780
## 2 TROPICAL STORM 251815199640
## 3 HURRICANE/TYPHOON 233182532500
## 4 FLOOD 70186438740
## 5 STORM SURGE 50826645060
head(arrange(avgEconomicDamage,desc(x)), n = 5)
## EVTYPES x
## 1 HURRICANE OPAL 3055555556
## 2 HURRICANE/TYPHOON 2649801506
## 3 HURRICANE OPAL/HIGH WINDS 2500000000
## 4 TORNADOES, TSTM WIND, HAIL 2500000000
## 5 HEAVY RAIN/SEVERE WEATHER 1250000000