Peer review Assignment 2: Explore the NOAA Storm Database

Sinopsis

Data from NOAA Storm Database was given to understand how weather affects people and properties in United States. Here, we analyze this file to determine Tornado as the most lethal weather for people (considering deaths and injuries), whereas Floods constitutes a serious risk for properties and crops with 150 billion dollars lost in a large period of time (1950-2011). ### Data Processing Let’s download the bz2 file.

fileURL<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
#Download file
download.file(fileURL,"data.bz2","curl")

Now lets read the file and select just relevant information for our purpose. The events in the database start in the year 1950 and end in November 2011, however, we do not need to look this.

read.csv("data.bz2")->data
data <- data[ , c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
str(data)
## 'data.frame':    902297 obs. of  7 variables:
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...

Let’s organize data from fatalities and injuries, selecting the top10 for each group. Then, let’s sum these values for each type of weather events.

#Selecting the top 10 weather for fatalities
fatalEvents <- aggregate(FATALITIES ~ EVTYPE, data=data, sum)
data.frame(fatalEvents,condition=rep("fatality",nrow(fatalEvents)))->fatalEvents
fatalEvents <- fatalEvents[order(-fatalEvents$FATALITIES), ][1:10, ]
#Selecting the top 10 weather for injuries
injuriesEvents <- aggregate(INJURIES ~ EVTYPE, data=data, sum)
data.frame(injuriesEvents,condition=rep("injuries",nrow(injuriesEvents)))->injuriesEvents
injuriesEvents <- injuriesEvents[order(-injuriesEvents$INJURIES), ][1:10, ]
#Just to see the head of each group
head(fatalEvents)
##             EVTYPE FATALITIES condition
## 834        TORNADO       5633  fatality
## 130 EXCESSIVE HEAT       1903  fatality
## 153    FLASH FLOOD        978  fatality
## 275           HEAT        937  fatality
## 464      LIGHTNING        816  fatality
## 856      TSTM WIND        504  fatality
head(injuriesEvents)
##             EVTYPE INJURIES condition
## 834        TORNADO    91346  injuries
## 856      TSTM WIND     6957  injuries
## 170          FLOOD     6789  injuries
## 130 EXCESSIVE HEAT     6525  injuries
## 464      LIGHTNING     5230  injuries
## 275           HEAT     2100  injuries
#Combinating both fatalities and injuries in a same object
colnames(fatalEvents)->colnames(injuriesEvents)
rbind(fatalEvents,injuriesEvents)->damage
damage[which(damage$FATALITIES!=0),]->damage

Now, let’s edit the exponential values for crop and properties (CROPDMGEXP and PROPDMGEXP). Remember that “K” (or “k”) stands for kilo (10^3), “M” (or “m”) stands for mega (10^6), and so on.

#Create a matrix with possibilities to transform exponential data.
c("k","K","m","M","B","b","H","h")->letters
 c(3,3,6,6,9,9,2,2)->values
data.frame(letters=letters,values=values)->matriz
#Using our matrix, let's transform data object
transform(data,CROPDMGEXP=ifelse(data$CROPDMGEXP%in%matriz$letters,matriz$values[match(data$CROPDMGEXP,matriz$letters)],data$CROPDMGEXP))->data
as.numeric(data$CROPDMGEXP)->data$CROPDMGEXP
## Warning: NAs introducidos por coerción
transform(data,PROPDMGEXP=ifelse(data$PROPDMGEXP%in%matriz$letters,matriz$values[match(data$PROPDMGEXP,matriz$letters)],data$PROPDMGEXP))->data
as.numeric(data$PROPDMGEXP)->data$PROPDMGEXP
## Warning: NAs introducidos por coerción
#Convert data in ten values
ifelse(is.na(data$PROPDMGEXP),1,10^data$PROPDMGEXP)->data$PROPDMGEXP
ifelse(is.na(data$CROPDMGEXP),1,10^data$CROPDMGEXP)->data$CROPDMGEXP
#Add a column with a total value of properties and crop damage
(data$PROPDMG*data$PROPDMGEXP)+(data$CROPDMG*data$CROPDMGEXP)->result
cbind(data,result)->data
#Just to see our data
head(data)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP result
## 1 TORNADO          0       15    25.0       1000       0          1  25000
## 2 TORNADO          0        0     2.5       1000       0          1   2500
## 3 TORNADO          0        2    25.0       1000       0          1  25000
## 4 TORNADO          0        2     2.5       1000       0          1   2500
## 5 TORNADO          0        2     2.5       1000       0          1   2500
## 6 TORNADO          0        6     2.5       1000       0          1   2500

Results

Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

According to our results, Tornado is the weather event most harmful to population health.

#Construct a object with sum of fatalities and injuries
aggregate(result~EVTYPE,data=data,sum)->total
#Organizing the information
total <- total[order(-total$result), ][1:10, ]
head(total)
##                EVTYPE       result
## 170             FLOOD 150319678257
## 411 HURRICANE/TYPHOON  71913712800
## 834           TORNADO  57362333946
## 670       STORM SURGE  43323541000
## 244              HAIL  18761221986
## 153       FLASH FLOOD  18243991078
#Plotting a graph to summarize
library("ggplot2")
ggplot(damage,aes(x=EVTYPE,y=FATALITIES))->g
g+geom_bar(stat="identity")+facet_grid(condition~.,scales="free_y")+theme(axis.text.x = element_text(angle = 90, hjust = 1))+xlab("Number of affected people")+ylab("Type of event")

Across the United States, which types of events have the greatest economic consequences?

According to our analysis, Flood is the weather event which more economic consequences show.

#Just plotting our result
ggplot(total,aes(x=EVTYPE,y=result))->g

g+geom_bar(stat="identity",fill="green")+theme(axis.text.x = element_text(angle = 90, hjust = 1))+ylab("Total damage ($)")+xlab("Type of event")+ggtitle("Top 10 Weather Events damaging properties and crop")