Synopsis

Loading add-on package

library(ggplot2)
library(reshape2)
library(plyr)

Loading the data

#setwd("./RepData_PeerAssessment2")
file_url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(file_url, "StormData.bz2")
raw_data <- read.csv(bzfile("StormData.bz2"))

Data Processing

  • Calculate impact on population health

First, I analyzed number of injuries and fatalities. It shows that injuries due to tornado were the largest across the US.

sum_of_victim <- aggregate(raw_data[c("INJURIES", "FATALITIES")], by=raw_data[c("EVTYPE")], FUN=sum)
sortlist_INJ <- order(sum_of_victim$INJURIES, decreasing = T)
TOP5_INJ <- sum_of_victim[sortlist_INJ[1:5], ]
TOP5_INJ
##             EVTYPE INJURIES FATALITIES
## 826        TORNADO    91346       5633
## 846      TSTM WIND     6957        504
## 167          FLOOD     6789        470
## 124 EXCESSIVE HEAT     6525       1903
## 453      LIGHTNING     5230        816

And also fatalities due to tornado were the largest.

sortlist_FAT <- order(sum_of_victim$FATALITIES, decreasing = T)
TOP5_FAT <- sum_of_victim[sortlist_FAT[1:5], ]
TOP5_FAT
##             EVTYPE INJURIES FATALITIES
## 826        TORNADO    91346       5633
## 124 EXCESSIVE HEAT     6525       1903
## 151    FLASH FLOOD     1777        978
## 271           HEAT     2100        937
## 453      LIGHTNING     5230        816

This indicates that tornado was the most harmful event with respect to population health.

melt_TOP5_INJ <- melt(TOP5_INJ, value.name = "number_of_victims", variable.name = "variable", id.vars = "EVTYPE")
a <- ggplot(melt_TOP5_INJ, aes(EVTYPE, number_of_victims, label=EVTYPE))
a + geom_bar(stat = "identity", aes(fill = variable)) +
    labs(title = "THE MOST HARMFUL EVENT TO POPULATION HEALTH")

  • Calculate impact on Economic

I investigate the impact of properties and crops respectively to find event for the greatest economic consequences. First, I investigate the worst 5 events of properties. Second, I investigate the worst 5 events of crops.

I use the following columns:

PROPDMG The amount of property damage

PROPDMGEXP a multiplier for property damage

CROPDMG The amount of crop damage

CROPDMGEXP a multiplier for crop damage

I check unique elements of PROPDMGEXP and CROPDMGEXP.

unique(raw_data$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  + - 0 1 2 3 4 5 6 7 8 ? B H K M h m
unique(raw_data$CROPDMGEXP)
## [1]   M K m B ? 0 k 2
## Levels:  0 2 ? B K M k m

From Storm Data Documentation Section 2.7: Alphabetical characters used to signify magnitude include “H” for hundreds, “K” for thousands, “M” for millions, and “B” for billions.

data_ECODMG <- raw_data

data_ECODMG$PROPex <- data_ECODMG$PROPDMGEXP
data_ECODMG$PROPex <- revalue(data_ECODMG$PROPex, c("K"="3", "M"="6","m"="6","B"="9","+"="0","h"="2","H"="2","-"="0","?"="0"))
data_ECODMG$PROPex[data_ECODMG$PROPex==""] <- "0"
data_ECODMG$PROPex <- as.numeric(data_ECODMG$PROPex)

data_ECODMG$CROPex <- data_ECODMG$CROPDMGEXP
data_ECODMG$CROPex <- revalue(data_ECODMG$CROPex, c("K"="3","k"="3", "M"="6","m"="6","B"="9","?"="0"))
data_ECODMG$CROPex[data_ECODMG$CROPex==""] <- "0"
data_ECODMG$CROPex <- as.numeric(data_ECODMG$CROPex)

data_ECODMG$TOTALPROPDMG <- data_ECODMG$PROPDMG * (10^data_ECODMG$PROPex)
data_ECODMG$TOTALCROPDMG <- data_ECODMG$CROPDMG * (10^data_ECODMG$CROPex)
  • the worst 5 events to properties
data_PRPDMG <- aggregate(data_ECODMG["TOTALPROPDMG"], by=data_ECODMG[c("EVTYPE")], FUN=sum)
sortlist_PRPDMG <- order(data_PRPDMG$TOTALPROPDMG, decreasing = T)
TOP5_PRPDMG <- data_PRPDMG[sortlist_PRPDMG[1:5],]
TOP5_PRPDMG
##                EVTYPE TOTALPROPDMG
## 167             FLOOD 1.446577e+13
## 393 HURRICANE/TYPHOON 6.930584e+12
## 826           TORNADO 5.694738e+12
## 656       STORM SURGE 4.332354e+12
## 151       FLASH FLOOD 1.682267e+12

This graph indicates that FLOOD was the largest impact on the properties.

melt_TOP5_PRPDMG <- melt(TOP5_PRPDMG, value.name = "PROPDMG", id.vars = "EVTYPE")
b <- ggplot(melt_TOP5_PRPDMG, aes(EVTYPE, PROPDMG, label=EVTYPE))
b + geom_bar(stat = "identity", aes(fill = variable)) +
    labs(title = "TOP5 ECONOMIC DAMAGES IN PROPERTIES") +
    theme(axis.text.x = element_text(angle = 90))

  • the worst 5 events to crops
data_CROPDMG <- aggregate(data_ECODMG["TOTALCROPDMG"], by=data_ECODMG[c("EVTYPE")], FUN=sum)
sortlist_CROPDMG <- order(data_CROPDMG$TOTALCROPDMG, decreasing = T)
TOP5_CROPDMG <- data_CROPDMG[sortlist_CROPDMG[1:5],]
TOP5_CROPDMG
##          EVTYPE TOTALCROPDMG
## 241        HAIL  60161277300
## 167       FLOOD  21753275000
## 151 FLASH FLOOD  19039070000
## 91      DROUGHT  14595735000
## 846   TSTM WIND  11320985000

This graph indicates that HAIL was the largest impact on the crops.

melt_TOP5_CROPDMG <- melt(TOP5_CROPDMG, value.name = "CROPDMG", id.vars = "EVTYPE")
c <- ggplot(melt_TOP5_CROPDMG, aes(EVTYPE, CROPDMG, label=EVTYPE))
c + geom_bar(stat = "identity", aes(fill = variable)) +
    labs(title = "TOP5 ECONOMIC DAMAGES IN CROPS")  +
    theme(axis.text.x = element_text(angle = 90))

Finally, I merged these two data.

merged_ECODMG <- merge(TOP5_PRPDMG, TOP5_CROPDMG, all=TRUE)
merged_ECODMG[is.na(merged_ECODMG)] <- 0    #fill NA with 0
merged_ECODMG
##              EVTYPE TOTALPROPDMG TOTALCROPDMG
## 1           DROUGHT 0.000000e+00  14595735000
## 2       FLASH FLOOD 1.682267e+12  19039070000
## 3             FLOOD 1.446577e+13  21753275000
## 4              HAIL 0.000000e+00  60161277300
## 5 HURRICANE/TYPHOON 6.930584e+12            0
## 6       STORM SURGE 4.332354e+12            0
## 7           TORNADO 5.694738e+12            0
## 8         TSTM WIND 0.000000e+00  11320985000

This graph indicates that FLOOD was the largest impact on the economics.

melt_merged_ECODMG<- melt(merged_ECODMG, value.name = "ECO_DMG", id.vars = "EVTYPE")
c <- ggplot(melt_merged_ECODMG, aes(EVTYPE, ECO_DMG, label=EVTYPE))
c + geom_bar(stat = "identity", aes(fill = variable)) +
    labs(title = "TOTAL ECONOMIC DAMAGES") +
    theme(axis.text.x = element_text(angle = 90))

Result

  • This report indicates that TORNADO was the most harmful event with respect to population health.
  • This report indicates that FLOOD was the largest impact on the properties.
  • This report indicates that HAIL was the largest impact on the crops.