library(ggplot2)
library(reshape2)
library(plyr)
#setwd("./RepData_PeerAssessment2")
file_url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(file_url, "StormData.bz2")
raw_data <- read.csv(bzfile("StormData.bz2"))
First, I analyzed number of injuries and fatalities. It shows that injuries due to tornado were the largest across the US.
sum_of_victim <- aggregate(raw_data[c("INJURIES", "FATALITIES")], by=raw_data[c("EVTYPE")], FUN=sum)
sortlist_INJ <- order(sum_of_victim$INJURIES, decreasing = T)
TOP5_INJ <- sum_of_victim[sortlist_INJ[1:5], ]
TOP5_INJ
## EVTYPE INJURIES FATALITIES
## 826 TORNADO 91346 5633
## 846 TSTM WIND 6957 504
## 167 FLOOD 6789 470
## 124 EXCESSIVE HEAT 6525 1903
## 453 LIGHTNING 5230 816
And also fatalities due to tornado were the largest.
sortlist_FAT <- order(sum_of_victim$FATALITIES, decreasing = T)
TOP5_FAT <- sum_of_victim[sortlist_FAT[1:5], ]
TOP5_FAT
## EVTYPE INJURIES FATALITIES
## 826 TORNADO 91346 5633
## 124 EXCESSIVE HEAT 6525 1903
## 151 FLASH FLOOD 1777 978
## 271 HEAT 2100 937
## 453 LIGHTNING 5230 816
This indicates that tornado was the most harmful event with respect to population health.
melt_TOP5_INJ <- melt(TOP5_INJ, value.name = "number_of_victims", variable.name = "variable", id.vars = "EVTYPE")
a <- ggplot(melt_TOP5_INJ, aes(EVTYPE, number_of_victims, label=EVTYPE))
a + geom_bar(stat = "identity", aes(fill = variable)) +
labs(title = "THE MOST HARMFUL EVENT TO POPULATION HEALTH")
I investigate the impact of properties and crops respectively to find event for the greatest economic consequences. First, I investigate the worst 5 events of properties. Second, I investigate the worst 5 events of crops.
I use the following columns:
PROPDMG The amount of property damage
PROPDMGEXP a multiplier for property damage
CROPDMG The amount of crop damage
CROPDMGEXP a multiplier for crop damage
I check unique elements of PROPDMGEXP and CROPDMGEXP.
unique(raw_data$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: + - 0 1 2 3 4 5 6 7 8 ? B H K M h m
unique(raw_data$CROPDMGEXP)
## [1] M K m B ? 0 k 2
## Levels: 0 2 ? B K M k m
From Storm Data Documentation Section 2.7: Alphabetical characters used to signify magnitude include “H” for hundreds, “K” for thousands, “M” for millions, and “B” for billions.
data_ECODMG <- raw_data
data_ECODMG$PROPex <- data_ECODMG$PROPDMGEXP
data_ECODMG$PROPex <- revalue(data_ECODMG$PROPex, c("K"="3", "M"="6","m"="6","B"="9","+"="0","h"="2","H"="2","-"="0","?"="0"))
data_ECODMG$PROPex[data_ECODMG$PROPex==""] <- "0"
data_ECODMG$PROPex <- as.numeric(data_ECODMG$PROPex)
data_ECODMG$CROPex <- data_ECODMG$CROPDMGEXP
data_ECODMG$CROPex <- revalue(data_ECODMG$CROPex, c("K"="3","k"="3", "M"="6","m"="6","B"="9","?"="0"))
data_ECODMG$CROPex[data_ECODMG$CROPex==""] <- "0"
data_ECODMG$CROPex <- as.numeric(data_ECODMG$CROPex)
data_ECODMG$TOTALPROPDMG <- data_ECODMG$PROPDMG * (10^data_ECODMG$PROPex)
data_ECODMG$TOTALCROPDMG <- data_ECODMG$CROPDMG * (10^data_ECODMG$CROPex)
data_PRPDMG <- aggregate(data_ECODMG["TOTALPROPDMG"], by=data_ECODMG[c("EVTYPE")], FUN=sum)
sortlist_PRPDMG <- order(data_PRPDMG$TOTALPROPDMG, decreasing = T)
TOP5_PRPDMG <- data_PRPDMG[sortlist_PRPDMG[1:5],]
TOP5_PRPDMG
## EVTYPE TOTALPROPDMG
## 167 FLOOD 1.446577e+13
## 393 HURRICANE/TYPHOON 6.930584e+12
## 826 TORNADO 5.694738e+12
## 656 STORM SURGE 4.332354e+12
## 151 FLASH FLOOD 1.682267e+12
This graph indicates that FLOOD was the largest impact on the properties.
melt_TOP5_PRPDMG <- melt(TOP5_PRPDMG, value.name = "PROPDMG", id.vars = "EVTYPE")
b <- ggplot(melt_TOP5_PRPDMG, aes(EVTYPE, PROPDMG, label=EVTYPE))
b + geom_bar(stat = "identity", aes(fill = variable)) +
labs(title = "TOP5 ECONOMIC DAMAGES IN PROPERTIES") +
theme(axis.text.x = element_text(angle = 90))
data_CROPDMG <- aggregate(data_ECODMG["TOTALCROPDMG"], by=data_ECODMG[c("EVTYPE")], FUN=sum)
sortlist_CROPDMG <- order(data_CROPDMG$TOTALCROPDMG, decreasing = T)
TOP5_CROPDMG <- data_CROPDMG[sortlist_CROPDMG[1:5],]
TOP5_CROPDMG
## EVTYPE TOTALCROPDMG
## 241 HAIL 60161277300
## 167 FLOOD 21753275000
## 151 FLASH FLOOD 19039070000
## 91 DROUGHT 14595735000
## 846 TSTM WIND 11320985000
This graph indicates that HAIL was the largest impact on the crops.
melt_TOP5_CROPDMG <- melt(TOP5_CROPDMG, value.name = "CROPDMG", id.vars = "EVTYPE")
c <- ggplot(melt_TOP5_CROPDMG, aes(EVTYPE, CROPDMG, label=EVTYPE))
c + geom_bar(stat = "identity", aes(fill = variable)) +
labs(title = "TOP5 ECONOMIC DAMAGES IN CROPS") +
theme(axis.text.x = element_text(angle = 90))
Finally, I merged these two data.
merged_ECODMG <- merge(TOP5_PRPDMG, TOP5_CROPDMG, all=TRUE)
merged_ECODMG[is.na(merged_ECODMG)] <- 0 #fill NA with 0
merged_ECODMG
## EVTYPE TOTALPROPDMG TOTALCROPDMG
## 1 DROUGHT 0.000000e+00 14595735000
## 2 FLASH FLOOD 1.682267e+12 19039070000
## 3 FLOOD 1.446577e+13 21753275000
## 4 HAIL 0.000000e+00 60161277300
## 5 HURRICANE/TYPHOON 6.930584e+12 0
## 6 STORM SURGE 4.332354e+12 0
## 7 TORNADO 5.694738e+12 0
## 8 TSTM WIND 0.000000e+00 11320985000
This graph indicates that FLOOD was the largest impact on the economics.
melt_merged_ECODMG<- melt(merged_ECODMG, value.name = "ECO_DMG", id.vars = "EVTYPE")
c <- ggplot(melt_merged_ECODMG, aes(EVTYPE, ECO_DMG, label=EVTYPE))
c + geom_bar(stat = "identity", aes(fill = variable)) +
labs(title = "TOTAL ECONOMIC DAMAGES") +
theme(axis.text.x = element_text(angle = 90))