Synopsis

The analysis started off with processing raw data to a readable format. Then, the exploratory analysis has been conducted to understand how what kind of severe weather condition results in more devastating consequences including fatalities, injuries, and economic losses. This analysis is focused on types of events that are most harmful with respect to population health and types of events that have the greatest economic consequences. According to the analysis, Torando, Excessive Heat, Flash Flood, and Lightning resulted in the most fatal and injuries cases. Flash flood, drought, and thunderstorm resulted the most economic losses including property losses and crop losses.

Data Processing

WData <- read.csv(bzfile("WData.csv.bz2"))
{echo=TRUE}

Number of Different Severe Weather Types

length(unique(WData$EVTYPE))
## [1] 985
{echo=TRUE}

Exploratory Analysis:

First: Aggregating Data to Find Most Harmful Events

library(plyr)
HarmfulEvents <- ddply(WData, .(EVTYPE), summarize,
                    fatalities = sum(FATALITIES),
                    injuries = sum(INJURIES))
Fatal_Cases <- head(HarmfulEvents[order(HarmfulEvents$fatalities, decreasing = T), ], 10)
Injury_Cases <- head(HarmfulEvents[order(HarmfulEvents$injuries, decreasing = T), ], 10)
Fatal_Cases[, c("EVTYPE", "fatalities")]
##             EVTYPE fatalities
## 834        TORNADO       5633
## 130 EXCESSIVE HEAT       1903
## 153    FLASH FLOOD        978
## 275           HEAT        937
## 464      LIGHTNING        816
## 856      TSTM WIND        504
## 170          FLOOD        470
## 585    RIP CURRENT        368
## 359      HIGH WIND        248
## 19       AVALANCHE        224
Injury_Cases[, c("EVTYPE", "injuries")]
##                EVTYPE injuries
## 834           TORNADO    91346
## 856         TSTM WIND     6957
## 170             FLOOD     6789
## 130    EXCESSIVE HEAT     6525
## 464         LIGHTNING     5230
## 275              HEAT     2100
## 427         ICE STORM     1975
## 153       FLASH FLOOD     1777
## 760 THUNDERSTORM WIND     1488
## 244              HAIL     1361
{echo=TRUE}

Second: Aggregating Data to Find Economic Loss

exp_transform <- function(e) {
    # h -> hundred, k -> thousand, m -> million, b -> billion
    if (e %in% c('h', 'H'))
        return(2)
    else if (e %in% c('k', 'K'))
        return(3)
    else if (e %in% c('m', 'M'))
        return(6)
    else if (e %in% c('b', 'B'))
        return(9)
    else if (!is.na(as.numeric(e))) # if a digit
        return(as.numeric(e))
    else if (e %in% c('', '-', '?', '+'))
        return(0)
    else {
        stop("Invalid exponent value.")
    }
}
prop_dmg_exp <- sapply(WData$PROPDMGEXP, FUN=exp_transform)
WData$prop_dmg <- WData$PROPDMG * (10 ** prop_dmg_exp)
crop_dmg_exp <- sapply(WData$CROPDMGEXP, FUN=exp_transform)
WData$crop_dmg <- WData$CROPDMG * (10 ** crop_dmg_exp)
library(plyr)
econ_loss <- ddply(WData, .(EVTYPE), summarize,
                   prop_dmg = sum(prop_dmg),
                   crop_dmg = sum(crop_dmg))
econ_loss <- econ_loss[(econ_loss$prop_dmg > 0 | econ_loss$crop_dmg > 0), ]
prop_dmg_events <- head(econ_loss[order(econ_loss$prop_dmg, decreasing = T), ], 10)
crop_dmg_events <- head(econ_loss[order(econ_loss$crop_dmg, decreasing = T), ], 10)
prop_dmg_events[, c("EVTYPE", "prop_dmg")]
##                 EVTYPE     prop_dmg
## 153        FLASH FLOOD 6.820237e+13
## 786 THUNDERSTORM WINDS 2.086532e+13
## 834            TORNADO 1.078951e+12
## 244               HAIL 3.157558e+11
## 464          LIGHTNING 1.729433e+11
## 170              FLOOD 1.446577e+11
## 411  HURRICANE/TYPHOON 6.930584e+10
## 185           FLOODING 5.920826e+10
## 670        STORM SURGE 4.332354e+10
## 310         HEAVY SNOW 1.793259e+10
crop_dmg_events[, c("EVTYPE", "crop_dmg")]
##                EVTYPE    crop_dmg
## 95            DROUGHT 13972566000
## 170             FLOOD  5661968450
## 590       RIVER FLOOD  5029459000
## 427         ICE STORM  5022113500
## 244              HAIL  3025974480
## 402         HURRICANE  2741910000
## 411 HURRICANE/TYPHOON  2607872800
## 153       FLASH FLOOD  1421317100
## 140      EXTREME COLD  1292973000
## 212      FROST/FREEZE  1094086000
{echo=TRUE}

RESULTS: Visualizing Exploratory Analysis

par(mai=c(1,2,1,1))
barplot(Fatal_Cases[ ,2], names.arg = Fatal_Cases[ ,1], horiz=TRUE,cex.name = 0.9, las=1, main="Fatalities with Respect to Type of Event") 

par(mai=c(1,2,1,1))
barplot(Injury_Cases[ ,2], names.arg = Injury_Cases[ ,1], horiz=TRUE,cex.name = 0.9, las=1, main="Injuries with Respect to Type of Event")

par(mai=c(1,2,1,1))
barplot(log10(prop_dmg_events[ ,2]), names.arg = prop_dmg_events[ ,1], horiz=TRUE,cex.name = 0.9, las=1, main="Property Losses with Respect to Type of Event") 

par(mai=c(1,2,1,1))
barplot(log10(crop_dmg_events[ ,2]), names.arg = crop_dmg_events[ ,1], horiz=TRUE,cex.name = 0.9, las=1, main="Crop Losses with Respect to Type of Event") 

{echo=TRUE}

Summary of Analysis

According to results of data visualization, Torando, Excessive Heat, Flash Flood, and Lightning resulted in the most fatal and injuries cases. Flash flood, drought, and thunderstorm resulted the most economic losses including property losses and crop losses.