Synopsis

The purpose of this assignment is to look at the impacts on people and economic of some severe weather events by exploring the NOAA Storm Database. In particularly, the analysis will focus on the number of damage at the amount of injuries, fatalities, property damage and crop damage. Hence, we can answer some basic question, which is:

Data Processing

First step is to read the data with only variables we considered including event type (EVTYPE), number of fatalities (FATALITIES), number of injuries (INJURIES), property damage (PROPDMG), exponent of property damage (PROPDMGEXP), crop damage (CROPDMG), exponent of crop damage (CROPDMGEXP).

data=read.csv("repdata_data_StormData.csv")
var=c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
data=data[var]
head(data,5)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0

Results

1. Health Effects

1.1 Calculating the total number of injuries and fatalities accross the US. Then the top 5 worst impacted weather events will be extracted

#total value for fatality and injury
fatal <- aggregate(FATALITIES ~ EVTYPE, data, FUN = sum)
injury <- aggregate(INJURIES ~ EVTYPE, data, FUN = sum)
#top 5 largest values
fataltop <- fatal[order(-fatal$FATALITIES),][1:5, ]
injurytop <- injury[order(-injury$INJURIES),][1:5, ]

fataltop
##             EVTYPE FATALITIES
## 834        TORNADO       5633
## 130 EXCESSIVE HEAT       1903
## 153    FLASH FLOOD        978
## 275           HEAT        937
## 464      LIGHTNING        816
injurytop
##             EVTYPE INJURIES
## 834        TORNADO    91346
## 856      TSTM WIND     6957
## 170          FLOOD     6789
## 130 EXCESSIVE HEAT     6525
## 464      LIGHTNING     5230

2.3 Picture the results by plotting

library(ggplot2)
library(gridExtra)

plot1=ggplot(injurytop, aes(y=INJURIES,x=EVTYPE,label=INJURIES)) +
  geom_bar(fill="steelblue",stat="identity")+ coord_flip()+
  ylab("Total number of injuries") + xlab("Event type") +
  ggtitle("Injuries by weather events in the US - Top 5")


plot2=ggplot(fataltop, aes(x=EVTYPE,y=FATALITIES,label=FATALITIES)) +
  geom_bar(fill="firebrick",stat="identity") +coord_flip()+
  ylab("Total number of fatalities") + xlab("Event type") +
  ggtitle("Fatalities by weather events in the US - Top 5")
grid.arrange(plot1, plot2, nrow =2)

2. Economic Effects

2.1 Proper value of damage

Since the value of damage is recorded as 2 variables: the number and the exponent, it is neccessary to produce another proper variable for the estimate property damage and crop damage. This value will be generated by multiplying the number of damage and its exponent. Moreover, the exponent variable, which is recorded as letter or unable to be recorded, need to convert to usable number. At the end, 2 more variables will be added in the data which are property exponent (PROPEXP) and crop exponent (CROPEXP). Now, value of property damage and crop damage can be calculated to property damage value(PROPDMGVAL) and crop damage value(CROPDMGVAL)

#Convert exponent of property damage to usable number
data$PROPDMGEXP=as.character(data$PROPDMGEXP)
exp=as.character(unique(data$PROPDMGEXP))
value=c(1000,1e+06,1,1e+09,1e+06,0,1,1e+05,1e+06,0,10000,100,1000,100,1e+07,100,0,10,1e+08)
for (i in 1:length(exp)){
  data$PROPEXP[data$PROPDMGEXP==exp[i]]=value[i]
}
# Calculating the property damage value
data$PROPDMGVAL <- data$PROPDMG * as.numeric(data$PROPEXP)


# Convert exponent of crop damage to usable number
cropexp=as.character(unique(data$CROPDMGEXP))
cropvalue=c(1,1e+06,1000,1e+06,1e+09,0,1,1000,100)
for(i in 1:length(cropexp)){
  data$CROPEXP[data$CROPDMGEXP == cropexp[i]] = cropvalue[i]
}
# calculating the crop damage value
data$CROPDMGVAL <- data$CROPDMG * data$CROPEXP

head(data,5)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0           
##   PROPEXP PROPDMGVAL CROPEXP CROPDMGVAL
## 1    1000      25000       1          0
## 2    1000       2500       1          0
## 3    1000      25000       1          0
## 4    1000       2500       1          0
## 5    1000       2500       1          0

2.2 Calculating the total damage value for each events across the US. Select top 5 worst damaged weather events on property and crop

#total value of damage on property and crop
propdmg <- aggregate(PROPDMGVAL ~ EVTYPE, data, FUN = sum)
cropdmg <- aggregate(CROPDMGVAL ~ EVTYPE, data, FUN = sum)

#top 5 worst damage events
propdmgtop <- propdmg[order(-propdmg$PROPDMGVAL),][1:5, ]
cropdmgtop <- cropdmg[order(-cropdmg$CROPDMGVAL), ][1:5, ]

propdmgtop
##                EVTYPE   PROPDMGVAL
## 170             FLOOD 144657709807
## 411 HURRICANE/TYPHOON  69305840000
## 834           TORNADO  56947380617
## 670       STORM SURGE  43323536000
## 153       FLASH FLOOD  16822673979
cropdmgtop
##          EVTYPE  CROPDMGVAL
## 95      DROUGHT 13972566000
## 170       FLOOD  5661968450
## 590 RIVER FLOOD  5029459000
## 427   ICE STORM  5022113500
## 244        HAIL  3025954473

2.3 Picture the results by plotting

plot1=ggplot(data=propdmgtop, aes(y=PROPDMGVAL,x=EVTYPE,label=PROPDMGVAL)) +
  geom_bar(fill="steelblue",stat="identity")+ coord_flip()+
  ylab("Total value of propperty damage") + xlab("Event type") +
  ggtitle("Property damaged by weather events in the US - Top 5")


plot2=ggplot(cropdmgtop, aes(x=EVTYPE,y=CROPDMGVAL,label=CROPDMGVAL)) +
  geom_bar(fill="firebrick",stat="identity") +coord_flip()+
  ylab("Total value of crop damage") + xlab("Event type") +
  ggtitle("Crop damaged by weather events in the US - Top 5")
grid.arrange(plot1, plot2, nrow =2)

Conclusion

In conclusion, Tornado has the worst impact on population health regarding both number of injuries and number of fatalities. On the aspect of economic, Flood has the worst impact on property while Draught play the major factor on damage of crop.