Storms and other severe weather events not only can threaten public health, but also can cause economic problems. The goal of the assignment is to find out 10 most harmful severe weathers on both area respectively. For public health, we explore data in injuries and fatalities, and for economic problems, we retrieved data from property and crops damages. Data are extracted from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.
2.1 Download and read data.
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = "repdata%2Fdata%2FStormData.csv.bz2", method = "curl")
dataDownloaded <- date()
data <- read.csv("repdata%2Fdata%2FStormData.csv.bz2", stringsAsFactors = FALSE)
2.2 Subset data with columns we are focus on:
“EVTYPE”: event types
“FATALITIES”: the number of fatalities in each observation
“INJURIES”: the number of injuries in each observation
“PROPDMG”: the value of property damages in each observation
“PROPDMGEXP”: the exponential of the property damages value
“CROPDMG”: the value of crops damages in each observation
“CROPDMGEXP”:the exponential of the crops damages value
data <- data[, c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
data$EVTYPE <- as.factor(data$EVTYPE)
2.3 Only select data with positive value in “FATALITIES”,“INJURIES”,“PROPDMG”, and “CROPDMG”.
data <- subset(data, FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0)
2.4 Deal with messing format in “PROPDMGEXP” and “CROPDMGEXP”. Add two new columns: totalPROPDMG and totalCROPDMG, which show the value of property damages and crops damages.
#change all characters to uppercase in "PROPDMGEXP" and CROPDMGEXP
data$PROPDMGEXP <- toupper(data$PROPDMGEXP)
data$CROPDMGEXP <- toupper(data$CROPDMGEXP)
#add two new columns
data$totalPROPDMG <- 0
data$totalCROPDMG <- 0
#calculate the value of totalPROPDMG in each observations
data$totalPROPDMG[which(data$PROPDMGEXP == "")] <- data$PROPDMG[which(data$PROPDMGEXP == "")] * 1
data$totalPROPDMG[which(data$PROPDMGEXP == "+")] <- data$PROPDMG[which(data$PROPDMGEXP == "+")] * 1
data$totalPROPDMG[which(data$PROPDMGEXP == "-")] <- data$PROPDMG[which(data$PROPDMGEXP == "-")] * 1
data$totalPROPDMG[which(data$PROPDMGEXP == "H")] <- data$PROPDMG[which(data$PROPDMGEXP == "H")] * 100
data$totalPROPDMG[which(data$PROPDMGEXP == "K")] <- data$PROPDMG[which(data$PROPDMGEXP == "K")] * 1000
data$totalPROPDMG[which(data$PROPDMGEXP == "M")] <- data$PROPDMG[which(data$PROPDMGEXP == "M")] * 1000000
data$totalPROPDMG[which(data$PROPDMGEXP == "B")] <- data$PROPDMG[which(data$PROPDMGEXP == "B")] * 1000000000
data$totalPROPDMG[which(data$PROPDMGEXP == "0")] <- data$PROPDMG[which(data$PROPDMGEXP == "0")] * 1
data$totalPROPDMG[which(data$PROPDMGEXP == "2")] <- data$PROPDMG[which(data$PROPDMGEXP == "2")] * 100
data$totalPROPDMG[which(data$PROPDMGEXP == "3")] <- data$PROPDMG[which(data$PROPDMGEXP == "3")] * 1000
data$totalPROPDMG[which(data$PROPDMGEXP == "4")] <- data$PROPDMG[which(data$PROPDMGEXP == "4")] * 10000
data$totalPROPDMG[which(data$PROPDMGEXP == "5")] <- data$PROPDMG[which(data$PROPDMGEXP == "5")] * 100000
data$totalPROPDMG[which(data$PROPDMGEXP == "6")] <- data$PROPDMG[which(data$PROPDMGEXP == "6")] * 1000000
data$totalPROPDMG[which(data$PROPDMGEXP == "7")] <- data$PROPDMG[which(data$PROPDMGEXP == "7")] * 10000000
#calculate the value of totalCROPDMG in each observations
data$totalCROPDMG[which(data$CROPDMGEXP == "")] <- data$CROPDMG[which(data$CROPDMGEXP == "")] * 1
data$totalCROPDMG[which(data$CROPDMGEXP == "?")] <- data$CROPDMG[which(data$CROPDMGEXP == "?")] * 1
data$totalCROPDMG[which(data$CROPDMGEXP == "K")] <- data$CROPDMG[which(data$CROPDMGEXP == "K")] * 1000
data$totalCROPDMG[which(data$CROPDMGEXP == "M")] <- data$CROPDMG[which(data$CROPDMGEXP == "M")] * 1000000
data$totalCROPDMG[which(data$CROPDMGEXP == "B")] <- data$CROPDMG[which(data$CROPDMGEXP == "B")] * 1000000000
data$totalCROPDMG[which(data$CROPDMGEXP == "0")] <- data$CROPDMG[which(data$CROPDMGEXP == "0")] * 1
2.5 Calculate the sum of INJURIES and FATALITIES in each event type.
injuries <- aggregate(INJURIES ~ EVTYPE, data, sum)
injuries <- injuries[order(injuries[,"INJURIES"], decreasing = T),]
fatalities <- aggregate(FATALITIES ~ EVTYPE, data, sum)
fatalities <- fatalities[order(fatalities[,"FATALITIES"], decreasing = T),]
2.6 Ten most harmfuls event with the highest injuries & fatalities number.
top10injuries <- head(injuries,10)
top10fatalities <- head(fatalities,10)
2.7 Calculate the sum of Property damages and Crops damages in each event type.
properties <- aggregate(totalPROPDMG ~ EVTYPE, data, sum)
properties <- properties[order(properties[,"totalPROPDMG"], decreasing = T),]
properties$totalPROPDMG <- properties$totalPROPDMG/1000000000
crops <- aggregate(totalCROPDMG ~ EVTYPE, data, sum)
crops <- crops[order(crops[,"totalCROPDMG"], decreasing = T),]
crops$totalCROPDMG <- crops$totalCROPDMG/1000000000
2.8 Ten most harmful events with the highest Property damages and Crops damages number
top10properties <- head(properties,10)
top10crops <- head(crops,10)
3.1 Ten most harmful events result in injuries are:
top10injuries
## EVTYPE INJURIES
## 407 TORNADO 91346
## 423 TSTM WIND 6957
## 86 FLOOD 6789
## 61 EXCESSIVE HEAT 6525
## 258 LIGHTNING 5230
## 151 HEAT 2100
## 238 ICE STORM 1975
## 73 FLASH FLOOD 1777
## 364 THUNDERSTORM WIND 1488
## 134 HAIL 1361
3.2 Ten most harmful events result in fatalities are:
top10fatalities
## EVTYPE FATALITIES
## 407 TORNADO 5633
## 61 EXCESSIVE HEAT 1903
## 73 FLASH FLOOD 978
## 151 HEAT 937
## 258 LIGHTNING 816
## 423 TSTM WIND 504
## 86 FLOOD 470
## 306 RIP CURRENT 368
## 200 HIGH WIND 248
## 11 AVALANCHE 224
Since the figure top10injuries will be similar to the top10fatalities one, we only show top10fatalities plot below:
library(ggplot2)
ggplot(top10fatalities, aes(x=EVTYPE, y=FATALITIES)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(x = "Event Type", y = "Fatalities Number", title = "Top 10 Events With The Highest Fatalities Number")
3.3 Ten most harmful events result in property damages are (the unit of totalPROPDMG is Billion):
top10properties
## EVTYPE totalPROPDMG
## 86 FLOOD 144.657710
## 224 HURRICANE/TYPHOON 69.305840
## 407 TORNADO 56.947381
## 350 STORM SURGE 43.323536
## 73 FLASH FLOOD 16.822674
## 134 HAIL 15.735268
## 215 HURRICANE 11.868319
## 417 TROPICAL STORM 7.703891
## 481 WINTER STORM 6.688497
## 200 HIGH WIND 5.270046
3.4 Ten most harmful events result in crops damages are (the unit of totalCROPDMG is Billion):
top10crops
## EVTYPE totalCROPDMG
## 49 DROUGHT 13.972566
## 86 FLOOD 5.661968
## 310 RIVER FLOOD 5.029459
## 238 ICE STORM 5.022113
## 134 HAIL 3.025954
## 215 HURRICANE 2.741910
## 224 HURRICANE/TYPHOON 2.607873
## 73 FLASH FLOOD 1.421317
## 67 EXTREME COLD 1.292973
## 114 FROST/FREEZE 1.094086
Similarly, we just show top10properties figures here, and top10crops will be the similar.
ggplot(top10properties, aes(x=EVTYPE, y= totalPROPDMG)) + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(x = "Event Type", y = "Total Property Damage (in Billions)", title = "Top 10 Events With The Greatest Property Damage")