1.Synopsis

Storms and other severe weather events not only can threaten public health, but also can cause economic problems. The goal of the assignment is to find out 10 most harmful severe weathers on both area respectively. For public health, we explore data in injuries and fatalities, and for economic problems, we retrieved data from property and crops damages. Data are extracted from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.

2. Data Processing

2.1 Download and read data.

fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = "repdata%2Fdata%2FStormData.csv.bz2", method = "curl")
dataDownloaded <- date()
data <- read.csv("repdata%2Fdata%2FStormData.csv.bz2", stringsAsFactors = FALSE)

2.2 Subset data with columns we are focus on:
“EVTYPE”: event types
“FATALITIES”: the number of fatalities in each observation
“INJURIES”: the number of injuries in each observation
“PROPDMG”: the value of property damages in each observation
“PROPDMGEXP”: the exponential of the property damages value
“CROPDMG”: the value of crops damages in each observation
“CROPDMGEXP”:the exponential of the crops damages value

data <- data[, c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
data$EVTYPE <- as.factor(data$EVTYPE)

2.3 Only select data with positive value in “FATALITIES”,“INJURIES”,“PROPDMG”, and “CROPDMG”.

data <- subset(data, FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0)

2.4 Deal with messing format in “PROPDMGEXP” and “CROPDMGEXP”. Add two new columns: totalPROPDMG and totalCROPDMG, which show the value of property damages and crops damages.

#change all characters to uppercase in "PROPDMGEXP" and CROPDMGEXP
data$PROPDMGEXP <- toupper(data$PROPDMGEXP)
data$CROPDMGEXP <- toupper(data$CROPDMGEXP)

#add two new columns
data$totalPROPDMG <- 0
data$totalCROPDMG <- 0

#calculate the value of totalPROPDMG in each observations
data$totalPROPDMG[which(data$PROPDMGEXP == "")] <- data$PROPDMG[which(data$PROPDMGEXP == "")] * 1
data$totalPROPDMG[which(data$PROPDMGEXP == "+")] <- data$PROPDMG[which(data$PROPDMGEXP == "+")] * 1
data$totalPROPDMG[which(data$PROPDMGEXP == "-")] <- data$PROPDMG[which(data$PROPDMGEXP == "-")] * 1
data$totalPROPDMG[which(data$PROPDMGEXP == "H")] <- data$PROPDMG[which(data$PROPDMGEXP == "H")] * 100
data$totalPROPDMG[which(data$PROPDMGEXP == "K")] <- data$PROPDMG[which(data$PROPDMGEXP == "K")] * 1000
data$totalPROPDMG[which(data$PROPDMGEXP == "M")] <- data$PROPDMG[which(data$PROPDMGEXP == "M")] * 1000000
data$totalPROPDMG[which(data$PROPDMGEXP == "B")] <- data$PROPDMG[which(data$PROPDMGEXP == "B")] * 1000000000
data$totalPROPDMG[which(data$PROPDMGEXP == "0")] <- data$PROPDMG[which(data$PROPDMGEXP == "0")] * 1
data$totalPROPDMG[which(data$PROPDMGEXP == "2")] <- data$PROPDMG[which(data$PROPDMGEXP == "2")] * 100
data$totalPROPDMG[which(data$PROPDMGEXP == "3")] <- data$PROPDMG[which(data$PROPDMGEXP == "3")] * 1000
data$totalPROPDMG[which(data$PROPDMGEXP == "4")] <- data$PROPDMG[which(data$PROPDMGEXP == "4")] * 10000
data$totalPROPDMG[which(data$PROPDMGEXP == "5")] <- data$PROPDMG[which(data$PROPDMGEXP == "5")] * 100000
data$totalPROPDMG[which(data$PROPDMGEXP == "6")] <- data$PROPDMG[which(data$PROPDMGEXP == "6")] * 1000000
data$totalPROPDMG[which(data$PROPDMGEXP == "7")] <- data$PROPDMG[which(data$PROPDMGEXP == "7")] * 10000000

#calculate the value of totalCROPDMG in each observations
data$totalCROPDMG[which(data$CROPDMGEXP == "")] <- data$CROPDMG[which(data$CROPDMGEXP == "")] * 1
data$totalCROPDMG[which(data$CROPDMGEXP == "?")] <- data$CROPDMG[which(data$CROPDMGEXP == "?")] * 1
data$totalCROPDMG[which(data$CROPDMGEXP == "K")] <- data$CROPDMG[which(data$CROPDMGEXP == "K")] * 1000
data$totalCROPDMG[which(data$CROPDMGEXP == "M")] <- data$CROPDMG[which(data$CROPDMGEXP == "M")] * 1000000
data$totalCROPDMG[which(data$CROPDMGEXP == "B")] <- data$CROPDMG[which(data$CROPDMGEXP == "B")] * 1000000000
data$totalCROPDMG[which(data$CROPDMGEXP == "0")] <- data$CROPDMG[which(data$CROPDMGEXP == "0")] * 1

2.5 Calculate the sum of INJURIES and FATALITIES in each event type.

injuries <- aggregate(INJURIES ~ EVTYPE, data, sum)
injuries <- injuries[order(injuries[,"INJURIES"], decreasing = T),]

fatalities <- aggregate(FATALITIES ~ EVTYPE, data, sum)
fatalities <- fatalities[order(fatalities[,"FATALITIES"], decreasing = T),]

2.6 Ten most harmfuls event with the highest injuries & fatalities number.

top10injuries <- head(injuries,10)
top10fatalities <- head(fatalities,10)

2.7 Calculate the sum of Property damages and Crops damages in each event type.

properties <- aggregate(totalPROPDMG ~ EVTYPE, data, sum)
properties <- properties[order(properties[,"totalPROPDMG"], decreasing = T),]
properties$totalPROPDMG <- properties$totalPROPDMG/1000000000

crops <- aggregate(totalCROPDMG ~ EVTYPE, data, sum)
crops <- crops[order(crops[,"totalCROPDMG"], decreasing = T),]
crops$totalCROPDMG <- crops$totalCROPDMG/1000000000

2.8 Ten most harmful events with the highest Property damages and Crops damages number

top10properties <- head(properties,10)
top10crops <- head(crops,10)

3. Results

3.1 Ten most harmful events result in injuries are:

top10injuries
##                EVTYPE INJURIES
## 407           TORNADO    91346
## 423         TSTM WIND     6957
## 86              FLOOD     6789
## 61     EXCESSIVE HEAT     6525
## 258         LIGHTNING     5230
## 151              HEAT     2100
## 238         ICE STORM     1975
## 73        FLASH FLOOD     1777
## 364 THUNDERSTORM WIND     1488
## 134              HAIL     1361

3.2 Ten most harmful events result in fatalities are:

top10fatalities
##             EVTYPE FATALITIES
## 407        TORNADO       5633
## 61  EXCESSIVE HEAT       1903
## 73     FLASH FLOOD        978
## 151           HEAT        937
## 258      LIGHTNING        816
## 423      TSTM WIND        504
## 86           FLOOD        470
## 306    RIP CURRENT        368
## 200      HIGH WIND        248
## 11       AVALANCHE        224

Since the figure top10injuries will be similar to the top10fatalities one, we only show top10fatalities plot below:

library(ggplot2)
ggplot(top10fatalities, aes(x=EVTYPE, y=FATALITIES)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(x = "Event Type", y = "Fatalities Number", title = "Top 10 Events With The Highest Fatalities Number")

3.3 Ten most harmful events result in property damages are (the unit of totalPROPDMG is Billion):

top10properties
##                EVTYPE totalPROPDMG
## 86              FLOOD   144.657710
## 224 HURRICANE/TYPHOON    69.305840
## 407           TORNADO    56.947381
## 350       STORM SURGE    43.323536
## 73        FLASH FLOOD    16.822674
## 134              HAIL    15.735268
## 215         HURRICANE    11.868319
## 417    TROPICAL STORM     7.703891
## 481      WINTER STORM     6.688497
## 200         HIGH WIND     5.270046

3.4 Ten most harmful events result in crops damages are (the unit of totalCROPDMG is Billion):

top10crops
##                EVTYPE totalCROPDMG
## 49            DROUGHT    13.972566
## 86              FLOOD     5.661968
## 310       RIVER FLOOD     5.029459
## 238         ICE STORM     5.022113
## 134              HAIL     3.025954
## 215         HURRICANE     2.741910
## 224 HURRICANE/TYPHOON     2.607873
## 73        FLASH FLOOD     1.421317
## 67       EXTREME COLD     1.292973
## 114      FROST/FREEZE     1.094086

Similarly, we just show top10properties figures here, and top10crops will be the similar.

ggplot(top10properties, aes(x=EVTYPE, y= totalPROPDMG)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(x = "Event Type", y = "Total Property Damage (in Billions)", title = "Top 10 Events With The Greatest Property Damage")