The analysis started off with processing raw data to a readable format. Then, the exploratory analysis has been conducted to understand how what kind of severe weather condition results in more devastating consequences including fatalities, injuries, and economic losses. This analysis is focused on types of events that are most harmful with respect to population health and types of events that have the greatest economic consequences. According to the analysis, Torando, Excessive Heat, Flash Flood, and Lightning resulted in the most fatal and injuries cases. Flash flood, drought, and thunderstorm resulted the most economic losses including property losses and crop losses.
WData <- read.csv(bzfile("WData.csv.bz2"))
{echo=TRUE}
length(unique(WData$EVTYPE))
## [1] 985
{echo=TRUE}
library(plyr)
HarmfulEvents <- ddply(WData, .(EVTYPE), summarize,
fatalities = sum(FATALITIES),
injuries = sum(INJURIES))
Fatal_Cases <- head(HarmfulEvents[order(HarmfulEvents$fatalities, decreasing = T), ], 10)
Injury_Cases <- head(HarmfulEvents[order(HarmfulEvents$injuries, decreasing = T), ], 10)
Fatal_Cases[, c("EVTYPE", "fatalities")]
## EVTYPE fatalities
## 834 TORNADO 5633
## 130 EXCESSIVE HEAT 1903
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
## 856 TSTM WIND 504
## 170 FLOOD 470
## 585 RIP CURRENT 368
## 359 HIGH WIND 248
## 19 AVALANCHE 224
Injury_Cases[, c("EVTYPE", "injuries")]
## EVTYPE injuries
## 834 TORNADO 91346
## 856 TSTM WIND 6957
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
## 275 HEAT 2100
## 427 ICE STORM 1975
## 153 FLASH FLOOD 1777
## 760 THUNDERSTORM WIND 1488
## 244 HAIL 1361
{echo=TRUE}
exp_transform <- function(e) {
# h -> hundred, k -> thousand, m -> million, b -> billion
if (e %in% c('h', 'H'))
return(2)
else if (e %in% c('k', 'K'))
return(3)
else if (e %in% c('m', 'M'))
return(6)
else if (e %in% c('b', 'B'))
return(9)
else if (!is.na(as.numeric(e))) # if a digit
return(as.numeric(e))
else if (e %in% c('', '-', '?', '+'))
return(0)
else {
stop("Invalid exponent value.")
}
}
prop_dmg_exp <- sapply(WData$PROPDMGEXP, FUN=exp_transform)
WData$prop_dmg <- WData$PROPDMG * (10 ** prop_dmg_exp)
crop_dmg_exp <- sapply(WData$CROPDMGEXP, FUN=exp_transform)
WData$crop_dmg <- WData$CROPDMG * (10 ** crop_dmg_exp)
library(plyr)
econ_loss <- ddply(WData, .(EVTYPE), summarize,
prop_dmg = sum(prop_dmg),
crop_dmg = sum(crop_dmg))
econ_loss <- econ_loss[(econ_loss$prop_dmg > 0 | econ_loss$crop_dmg > 0), ]
prop_dmg_events <- head(econ_loss[order(econ_loss$prop_dmg, decreasing = T), ], 10)
crop_dmg_events <- head(econ_loss[order(econ_loss$crop_dmg, decreasing = T), ], 10)
prop_dmg_events[, c("EVTYPE", "prop_dmg")]
## EVTYPE prop_dmg
## 153 FLASH FLOOD 6.820237e+13
## 786 THUNDERSTORM WINDS 2.086532e+13
## 834 TORNADO 1.078951e+12
## 244 HAIL 3.157558e+11
## 464 LIGHTNING 1.729433e+11
## 170 FLOOD 1.446577e+11
## 411 HURRICANE/TYPHOON 6.930584e+10
## 185 FLOODING 5.920826e+10
## 670 STORM SURGE 4.332354e+10
## 310 HEAVY SNOW 1.793259e+10
crop_dmg_events[, c("EVTYPE", "crop_dmg")]
## EVTYPE crop_dmg
## 95 DROUGHT 13972566000
## 170 FLOOD 5661968450
## 590 RIVER FLOOD 5029459000
## 427 ICE STORM 5022113500
## 244 HAIL 3025974480
## 402 HURRICANE 2741910000
## 411 HURRICANE/TYPHOON 2607872800
## 153 FLASH FLOOD 1421317100
## 140 EXTREME COLD 1292973000
## 212 FROST/FREEZE 1094086000
{echo=TRUE}
par(mai=c(1,2,1,1))
barplot(Fatal_Cases[ ,2], names.arg = Fatal_Cases[ ,1], horiz=TRUE,cex.name = 0.9, las=1, main="Fatalities with Respect to Type of Event")
par(mai=c(1,2,1,1))
barplot(Injury_Cases[ ,2], names.arg = Injury_Cases[ ,1], horiz=TRUE,cex.name = 0.9, las=1, main="Injuries with Respect to Type of Event")
par(mai=c(1,2,1,1))
barplot(log10(prop_dmg_events[ ,2]), names.arg = prop_dmg_events[ ,1], horiz=TRUE,cex.name = 0.9, las=1, main="Property Losses with Respect to Type of Event")
par(mai=c(1,2,1,1))
barplot(log10(crop_dmg_events[ ,2]), names.arg = crop_dmg_events[ ,1], horiz=TRUE,cex.name = 0.9, las=1, main="Crop Losses with Respect to Type of Event")
{echo=TRUE}
According to results of data visualization, Torando, Excessive Heat, Flash Flood, and Lightning resulted in the most fatal and injuries cases. Flash flood, drought, and thunderstorm resulted the most economic losses including property losses and crop losses.