Synopsis: The purpose of the data analysis in this report is to analyse the data collected in the NOAA Storm Database and identify which events are the most harmful with the respect to the population health and which ones have the greatest economic consequences.
Data Download
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
storm <- download.file(url, "./stormdata.csv")
storm <- read.csv("./stormdata.csv")
Data Processing
In order to evaluate the health impact, the total fatalities and injuries for each event type are calculated with the following code
## Order the fatalities according to the event type decreasingly
fa <- aggregate(storm$FATALITIES, list(storm$EVTYPE),sum)
colnames(fa) <- c("EVTYPE","FATALITIES")
orderfa <- fa[order(fa$FATALITIES, decreasing = TRUE),][1:5,]
## Order the injuries according to the event type decreasingly
inj <- aggregate(storm$INJURIES, list(storm$EVTYPE),sum)
colnames(inj) <- c("EVTYPE","INJURIES")
orderinj <- inj[order(inj$INJURIES, decreasing = TRUE),][1:5,]
Results
orderfa
## EVTYPE FATALITIES
## 834 TORNADO 5633
## 130 EXCESSIVE HEAT 1903
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
orderinj
## EVTYPE INJURIES
## 834 TORNADO 91346
## 856 TSTM WIND 6957
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
library(ggplot2)
gfa <- ggplot()+ geom_bar(data = orderfa, aes(x = EVTYPE, y = FATALITIES), stat = "identity", show.legend = F)
gfa + ggtitle("Top 5 events with highest fatalities")
ginj <- ggplot() + geom_bar(data = orderinj, aes(x = EVTYPE, y = INJURIES), stat = "identity", show.legend = F)
ginj + ggtitle("Top 5 events with highest injuries")
The event with the highest fatalities is tornado, following by excessive heat, flash flood, heat, and lightning. The event with the most injuries is also tornado, following by tstm wind, flood, excessive heat and lightning.
There are two kinds of damage resulted from the storm. The first kind of damage is property damage, noted as PROPDMG. PROPDMGEXP is an alphabetical character signifying the magnitude of the number, include “K” for thousands, “M” for millions, and “B” for billions. The second kind of damage is crop damage, noted as CROPDMG and CROPDMGEXP.
## select the information we need from the original dataset
new<- subset(storm, !storm$PROPDMG == 0 & !storm$CROPDMG == 0, select = c("EVTYPE","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP"))
## substitute the alphabetic letter with numeric number for property damage
new$PROPDMGEXP <- gsub("[hH]","2", new$PROPDMGEXP)
new$PROPDMGEXP <- gsub("[kK]","3", new$PROPDMGEXP)
new$PROPDMGEXP <- gsub("[mM]","6", new$PROPDMGEXP)
new$PROPDMGEXP <- gsub("[bB]","8", new$PROPDMGEXP)
new$PROPDMGEXP <- gsub("\\+|\\-|\\?\\ ","0", new$PROPDMGEXP)
new$PROPDMGEXP <- as.numeric(new$PROPDMGEXP)
## substitute the alphabetic letter with numeric number for crop damage
new$CROPDMGEXP <- gsub("[Hh]","2",new$CROPDMGEXP)
new$CROPDMGEXP <- gsub("[Mm]","6",new$CROPDMGEXP)
new$CROPDMGEXP <- gsub("[Kk]","3",new$CROPDMGEXP)
new$CROPDMGEXP <- gsub("[Bb]","8",new$CROPDMGEXP)
new$CROPDMGEXP <- gsub("\\+|\\-|\\?\\ ","0", new$CROPDMGEXP)
new$CROPDMGEXP <- as.numeric(new$CROPDMGEXP)
## create new variable PROPNEW and CROPNEW for the final result of property and crop damage
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
new <- mutate(new, PROPNEW = PROPDMG * (10 ^PROPDMGEXP), CROPNEW = CROPDMG *(10^CROPDMGEXP))
Results
## create a new dataframe with the total property damage of each event type
prop <- aggregate(new$PROPNEW, list(new$EVTYPE), sum)
## rename the colnames
colnames(prop) <- c("EVTYPE","TOTALPROP")
## reorder the total property damage in decreasing order
totalprop <- prop[order(prop$TOTALPROP, decreasing = TRUE),][1:10,]
## create a new dataframe with the total crop damage of each event type
crop <- aggregate(new$CROPNEW, list(new$EVTYPE),sum)
## rename the colnames
colnames(crop) <- c("EVTYPE","TOTALCROP")
## reorder the total crop damage in decreasing order
totalcrop <- crop[order(crop$TOTALCROP, decreasing = TRUE),][1:10,]
Graph the property and crop damage
ggplot() + geom_bar(data = totalcrop, aes(x = EVTYPE, y = TOTALCROP), stat = "identity", show.legend = F) + ggtitle("Total crop damage due to weather events") + coord_flip()
ggplot() + geom_bar(data = totalprop, aes(x = EVTYPE, y = TOTALPROP), stat = "identity", show.legend = F) + ggtitle("Total prop damage due to weather events") + coord_flip()
Flood creates the most damage for both property and crop.