library(ggplot2)
library(reshape2)
RawData <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))
First, we investigate the worst 5 most injured events. It shows that injuries due to hurricane were the largest across the US.
victim <- aggregate(RawData[c("INJURIES", "FATALITIES")], by=RawData[c("EVTYPE")], FUN=sum)
sortlist_INJ <- order(victim$INJURIES, decreasing = T)
TOP5_INJ <- victim[sortlist_INJ[1:5], ]
TOP5_INJ
## EVTYPE INJURIES FATALITIES
## 834 TORNADO 91346 5633
## 856 TSTM WIND 6957 504
## 170 FLOOD 6789 470
## 130 EXCESSIVE HEAT 6525 1903
## 464 LIGHTNING 5230 816
The barplot is created from the tidy data.
melt_TOP5_INJ <- melt(TOP5_INJ, value.name = "value", variable.name = "variable", id.vars = "EVTYPE")
qplot(data = melt_TOP5_INJ, x = EVTYPE, y = value, geom="bar", stat = "identity", fill=variable)
We can also check the worst 5 event for fatalities number. It shows that fatalities due to hurricane were the largest across the US.
sortlist_FAT <- order(victim$FATALITIES, decreasing = T)
TOP5_FAT <- victim[sortlist_FAT[1:5], ]
TOP5_FAT
## EVTYPE INJURIES FATALITIES
## 834 TORNADO 91346 5633
## 130 EXCESSIVE HEAT 6525 1903
## 153 FLASH FLOOD 1777 978
## 275 HEAT 2100 937
## 464 LIGHTNING 5230 816
I investigate the impact of properties and crops respectively to find event for the greatest economic consequences. First, I investigate the worst 5 events of properties. Second, I investigate the worst 5 events of crops. It shows that the impact of hail was the largest to crops. However, the order of magnitude is smaller than properties’ one. Therefore, we can see hurricane had the greatest economic consequences.
data_PRPDMG <- aggregate(RawData["PROPDMG"], by=RawData[c("EVTYPE")], FUN=sum)
sortlist_PRPDMG <- order(data_PRPDMG$PROPDMG, decreasing = T)
TOP5_PRPDMG <- data_PRPDMG[sortlist_PRPDMG[1:5],]
TOP5_PRPDMG
## EVTYPE PROPDMG
## 834 TORNADO 3212258.2
## 153 FLASH FLOOD 1420124.6
## 856 TSTM WIND 1335965.6
## 170 FLOOD 899938.5
## 760 THUNDERSTORM WIND 876844.2
melt_TOP5_PRPDMG <- melt(TOP5_PRPDMG, value.name = "PROPDMG", id.vars = "EVTYPE")
qplot(data = melt_TOP5_PRPDMG, x = EVTYPE, y = PROPDMG, geom="bar", stat = "identity", main = "TOTAL ECONOMIC DAMAGES IN PROPERTIES")
data_CROPDMG <- aggregate(RawData["CROPDMG"], by=RawData[c("EVTYPE")], FUN=sum)
sortlist_CROPDMG <- order(data_CROPDMG$CROPDMG, decreasing = T)
TOP5_CROPDMG <- data_CROPDMG[sortlist_CROPDMG[1:5],]
TOP5_CROPDMG
## EVTYPE CROPDMG
## 244 HAIL 579596.3
## 153 FLASH FLOOD 179200.5
## 170 FLOOD 168037.9
## 856 TSTM WIND 109202.6
## 834 TORNADO 100018.5
melt_TOP5_CROPDMG <- melt(TOP5_CROPDMG, value.name = "CROPDMG", id.vars = "EVTYPE")
qplot(data = melt_TOP5_CROPDMG, x = EVTYPE, y = CROPDMG, geom="bar", stat = "identity", main = "TOTAL ECONOMIC DAMAGES IN CROPS")