NOAA storm database was downloaded and read into a data frame called “stormdata” in R. The subset with all the information of population health and economic loss were named “sdata”. The original data was further cleaned before analysis. The fatality and injury information were grouped according to different types of events. Top ten types with highest fatality and injury were ploted seperately by using ggplot2. The economic consequence includes two parts: property damage and crop damage. These two damages were calculated seperatedly and then combined together. Subsequently, the economic loss was analyzed by different types of events. The top ten types with highest ecnomic consequence was finally plotted with ggplot2.
stormdata <- read.csv("stormdata.csv.bz2", header = T)
sdata <- stormdata[, c(8, 23:28)]
sdata$EVTYPE <- trimws(sdata$EVTYPE, which = "left")
sdata$EVTYPE <- toupper(sdata$EVTYPE)
Fcount <- with(sdata, tapply(FATALITIES, EVTYPE, sum))
Fcount <- data.frame(Fcount)
Fcount$type <- rownames(Fcount)
rownames(Fcount) <- NULL
Fcount <- Fcount[order(Fcount$Fcount, decreasing = T),]
Icount <- with(sdata, tapply(INJURIES, EVTYPE, sum))
Icount <- data.frame(Icount)
Icount$type <- rownames(Icount)
rownames(Icount) <- NULL
Icount <- Icount[order(Icount$Icount, decreasing = T),]
table(sdata$PROPDMGEXP)
##
## - ? + 0 1 2 3 4 5
## 465934 1 8 5 216 25 13 4 4 28
## 6 7 8 B h H K m M
## 4 5 1 40 1 6 424665 7 11330
table(sdata$CROPDMGEXP)
##
## ? 0 2 B k K m M
## 618413 7 19 1 9 21 281832 1 1994
sdata$PROPDMGEXP <- as.character(sdata$PROPDMGEXP)
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="-"] <- 1e0
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="?"] <- 1e0
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="+"] <- 1e0
sdata$PROPDMGEXP[sdata$PROPDMGEXP==0] <- 1e0
sdata$PROPDMGEXP[sdata$PROPDMGEXP==1] <- 1e1
sdata$PROPDMGEXP[sdata$PROPDMGEXP==2] <- 1e2
sdata$PROPDMGEXP[sdata$PROPDMGEXP==3] <- 1e3
sdata$PROPDMGEXP[sdata$PROPDMGEXP==4] <- 1e4
sdata$PROPDMGEXP[sdata$PROPDMGEXP==5] <- 1e5
sdata$PROPDMGEXP[sdata$PROPDMGEXP==6] <- 1e6
sdata$PROPDMGEXP[sdata$PROPDMGEXP==7] <- 1e7
sdata$PROPDMGEXP[sdata$PROPDMGEXP==8] <- 1e8
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="B"] <- 1e9
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="H" | sdata$PROPDMGEXP=="h"] <- 1e2
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="K"] <- 1e3
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="M" | sdata$PROPDMGEXP=="m"] <- 1e6
sdata$PROPDMGEXP[sdata$PROPDMGEXP==""] <- 1
sdata$CROPDMGEXP <- as.character(sdata$CROPDMGEXP)
sdata$CROPDMGEXP[sdata$CROPDMGEXP=="?"] <- 1e0
sdata$CROPDMGEXP[sdata$CROPDMGEXP==0] <- 1e0
sdata$CROPDMGEXP[sdata$CROPDMGEXP==2] <- 1e2
sdata$CROPDMGEXP[sdata$CROPDMGEXP=="B"] <- 1e9
sdata$CROPDMGEXP[sdata$CROPDMGEXP=="K" | sdata$CROPDMGEXP=="k"] <- 1e3
sdata$CROPDMGEXP[sdata$CROPDMGEXP=="M" | sdata$CROPDMGEXP=="m"] <- 1e6
sdata$CROPDMGEXP[sdata$CROPDMGEXP==""] <- 1
sdata$TDprop <- as.numeric(sdata$PROPDMG) * as.numeric(sdata$PROPDMGEXP)
sdata$TDcrop <- as.numeric(sdata$CROPDMG) * as.numeric(sdata$CROPDMGEXP)
sdata$TDeconomy <- sdata$TDprop + sdata$TDcrop
TDeco <- with(sdata, tapply(TDeconomy, EVTYPE, sum))
TDeco <- data.frame(TDeco)
TDeco$type<-rownames(TDeco)
rownames(TDeco) <- NULL
TDeco <- TDeco[order(TDeco$TDeco, decreasing = T),]
library(ggplot2)
Fcount$type <- factor(Fcount$type, levels = Fcount$type)
ggplot(data = Fcount[1:10,], aes(x = type, y = Fcount)) + geom_bar(stat = "identity") + labs(title="Top ten events of Fatalities Across US", x="Event Type", y="Fatality Number") + theme(axis.text.x = element_text(size=5))
Icount$type <- factor(Icount$type, levels = Icount$type)
ggplot(data = Icount[1:10,], aes(x = type, y = Icount)) + geom_bar(stat = "identity") + labs(title="Top ten events of Injuries Across US", x="Event Type", y="Injury Number") + theme(axis.text.x = element_text(size=5))
TDeco$type <- factor(TDeco$type, levels = TDeco$type)
ggplot(data = TDeco[1:10,], aes(x = type, y = TDeco)) + geom_bar(stat = "identity") + labs(title="Top ten events in Storm causing Economic Consequences Across US", x="Event Type", y="Total Economic Loss") + theme(axis.text.x = element_text(size=5))