Title: The analysis of NOAA Storm data with Human Health and Economy in US

synopsis:

NOAA storm database was downloaded and read into a data frame called “stormdata” in R. The subset with all the information of population health and economic loss were named “sdata”. The original data was further cleaned before analysis. The fatality and injury information were grouped according to different types of events. Top ten types with highest fatality and injury were ploted seperately by using ggplot2. The economic consequence includes two parts: property damage and crop damage. These two damages were calculated seperatedly and then combined together. Subsequently, the economic loss was analyzed by different types of events. The top ten types with highest ecnomic consequence was finally plotted with ggplot2.

Data Processing:

  1. Read data into R:
stormdata <- read.csv("stormdata.csv.bz2", header = T)
sdata <- stormdata[, c(8, 23:28)]
  1. Data cleaning: in order to group all the same event type together, the space before the first letterwas removed, and all the letters were changed to uppercase.
sdata$EVTYPE <- trimws(sdata$EVTYPE, which = "left")
sdata$EVTYPE <- toupper(sdata$EVTYPE)
  1. The fatality data was grouped by event type:
Fcount <- with(sdata, tapply(FATALITIES, EVTYPE, sum))
Fcount <- data.frame(Fcount)
Fcount$type <- rownames(Fcount)
rownames(Fcount) <- NULL
Fcount <- Fcount[order(Fcount$Fcount, decreasing = T),]
  1. The Injury data was grouped by event type:
Icount <- with(sdata, tapply(INJURIES, EVTYPE, sum))
Icount <- data.frame(Icount)
Icount$type <- rownames(Icount)
rownames(Icount) <- NULL
Icount <- Icount[order(Icount$Icount, decreasing = T),]
  1. Change columns “PROPDMGEXP” and “CROPDMGEXP” values:
    According to the Storm Data Documentation (2.7 in page 12), the letters and numbers in these columns represent the magnitude, so replace them by related magnitude value in these two clolumns is necessary for the calculation.
table(sdata$PROPDMGEXP)
## 
##             -      ?      +      0      1      2      3      4      5 
## 465934      1      8      5    216     25     13      4      4     28 
##      6      7      8      B      h      H      K      m      M 
##      4      5      1     40      1      6 424665      7  11330
table(sdata$CROPDMGEXP)
## 
##             ?      0      2      B      k      K      m      M 
## 618413      7     19      1      9     21 281832      1   1994
sdata$PROPDMGEXP <- as.character(sdata$PROPDMGEXP)
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="-"] <- 1e0
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="?"] <- 1e0
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="+"] <- 1e0
sdata$PROPDMGEXP[sdata$PROPDMGEXP==0] <- 1e0
sdata$PROPDMGEXP[sdata$PROPDMGEXP==1] <- 1e1
sdata$PROPDMGEXP[sdata$PROPDMGEXP==2] <- 1e2
sdata$PROPDMGEXP[sdata$PROPDMGEXP==3] <- 1e3
sdata$PROPDMGEXP[sdata$PROPDMGEXP==4] <- 1e4
sdata$PROPDMGEXP[sdata$PROPDMGEXP==5] <- 1e5
sdata$PROPDMGEXP[sdata$PROPDMGEXP==6] <- 1e6
sdata$PROPDMGEXP[sdata$PROPDMGEXP==7] <- 1e7
sdata$PROPDMGEXP[sdata$PROPDMGEXP==8] <- 1e8
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="B"] <- 1e9
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="H" | sdata$PROPDMGEXP=="h"] <- 1e2
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="K"] <- 1e3
sdata$PROPDMGEXP[sdata$PROPDMGEXP=="M" | sdata$PROPDMGEXP=="m"] <- 1e6
sdata$PROPDMGEXP[sdata$PROPDMGEXP==""] <- 1
sdata$CROPDMGEXP <- as.character(sdata$CROPDMGEXP)
sdata$CROPDMGEXP[sdata$CROPDMGEXP=="?"] <- 1e0
sdata$CROPDMGEXP[sdata$CROPDMGEXP==0] <- 1e0
sdata$CROPDMGEXP[sdata$CROPDMGEXP==2] <- 1e2
sdata$CROPDMGEXP[sdata$CROPDMGEXP=="B"] <- 1e9
sdata$CROPDMGEXP[sdata$CROPDMGEXP=="K" | sdata$CROPDMGEXP=="k"] <- 1e3
sdata$CROPDMGEXP[sdata$CROPDMGEXP=="M" | sdata$CROPDMGEXP=="m"] <- 1e6
sdata$CROPDMGEXP[sdata$CROPDMGEXP==""] <- 1
  1. Calculate the economic consequence of damage:
    Total economic loss = property damage + crop damage.
sdata$TDprop <- as.numeric(sdata$PROPDMG) * as.numeric(sdata$PROPDMGEXP)
sdata$TDcrop <- as.numeric(sdata$CROPDMG) * as.numeric(sdata$CROPDMGEXP)
sdata$TDeconomy <- sdata$TDprop + sdata$TDcrop
  1. The total economic loss data was grouped by event type:
TDeco <- with(sdata, tapply(TDeconomy, EVTYPE, sum))
TDeco <- data.frame(TDeco)
TDeco$type<-rownames(TDeco)
rownames(TDeco) <- NULL
TDeco <- TDeco[order(TDeco$TDeco, decreasing = T),]

Results:

  1. Across the United States, which types of events are most harmful with respect to population health?
    After plotting the top 10 most harmful events with population health, it clearly showed that TORNADO is the most harmful event!
library(ggplot2)
Fcount$type <- factor(Fcount$type, levels = Fcount$type)
ggplot(data = Fcount[1:10,], aes(x = type, y = Fcount)) + geom_bar(stat = "identity") + labs(title="Top ten events of Fatalities Across US", x="Event Type", y="Fatality Number") + theme(axis.text.x  = element_text(size=5))

Icount$type <- factor(Icount$type, levels = Icount$type)
ggplot(data = Icount[1:10,], aes(x = type, y = Icount)) + geom_bar(stat = "identity") + labs(title="Top ten events of Injuries Across US", x="Event Type", y="Injury Number") + theme(axis.text.x  = element_text(size=5))

  1. Across the United States, which types of events have the greatest economic consequences?
    After combining the property and crop damage together, the plotting with top 10 types of event showed that FLOOD is the storm event with greatest economic consequences!
TDeco$type <- factor(TDeco$type, levels = TDeco$type)
ggplot(data = TDeco[1:10,], aes(x = type, y = TDeco)) + geom_bar(stat = "identity") + labs(title="Top ten events in Storm causing Economic Consequences Across US", x="Event Type", y="Total Economic Loss") + theme(axis.text.x  = element_text(size=5))