The intention of the following document is to enumerate the main weather events in USA. For that propose we analiye the data from storm database of U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. Database scope starts in the year 1950 and ends in November 2011.
This document summarize data of both, human and economic damages.
The Data Processing section shows how the data were loaded,transformed and aggregated in order to produce the results.
At result section two figures and two tables are presented with the top five event types for human,injuries and fatalities, and economic consequenses related to properties damages and agricultural (crops) damages.
Loading data:
fileLocal<-"repdata_data_StormData.csv.bz2"
filetoUnzip<-bzfile(fileLocal,"r")
columnSelection<-c(rep(NA,37))
#not loading unnecessary columns
columnSelection[c(1:7,9:22,29:37)]="NULL"
stormData<-read.csv(filetoUnzip,sep=",",colClasses=columnSelection)
close(filetoUnzip)
Information is presented at compact form using different scales so exp operation is needed.
#Getting Multipliers
# For Property Damages
stormData$PROPDMGEXP<-as.character(stormData$PROPDMGEXP)
stormData$PROPDMGEXP[toupper(stormData$PROPDMGEXP) == "H" ] <- "2"
stormData$PROPDMGEXP[toupper(stormData$PROPDMGEXP) == "K" ] <- "3"
stormData$PROPDMGEXP[toupper(stormData$PROPDMGEXP) == "M" ] <- "6"
stormData$PROPDMGEXP[toupper(stormData$PROPDMGEXP) == "B" ] <- "9"
stormData$PROPDMGEXP[toupper(stormData$PROPDMGEXP) == "+" ] <- "0"
stormData$PROPDMGEXP[toupper(stormData$PROPDMGEXP) == "?" ] <- "0"
stormData$PROPDMGEXP[toupper(stormData$PROPDMGEXP) == "" ] <- "0"
stormData$PROPDMGEXP[toupper(stormData$PROPDMGEXP) == "-" ] <- "0"
# For Crop Damages
stormData$CROPDMGEXP<-as.character(stormData$CROPDMGEXP)
stormData$CROPDMGEXP[toupper(stormData$CROPDMGEXP) == "H" ] <- "2"
stormData$CROPDMGEXP[toupper(stormData$CROPDMGEXP) == "K" ] <- "3"
stormData$CROPDMGEXP[toupper(stormData$CROPDMGEXP) == "M" ] <- "6"
stormData$CROPDMGEXP[toupper(stormData$CROPDMGEXP) == "B" ] <- "9"
stormData$CROPDMGEXP[toupper(stormData$CROPDMGEXP) == "+" ] <- "0"
stormData$CROPDMGEXP[toupper(stormData$CROPDMGEXP) == "?" ] <- "0"
stormData$CROPDMGEXP[toupper(stormData$CROPDMGEXP) == "" ] <- "0"
stormData$CROPDMGEXP[toupper(stormData$CROPDMGEXP) == "-" ] <- "0"
stormData$PROPDMG=stormData$PROPDMG*(10^as.numeric(stormData$PROPDMGEXP))
stormData$CROPDMG=stormData$CROPDMG*(10^as.numeric(stormData$CROPDMGEXP))
Now we prepare data for plotting:
peopleDamage<-aggregate(cbind(stormData$FATALITIES,stormData$INJURIES), by=list(stormData$EVTYPE), FUN="sum")
colnames(peopleDamage) <- c("Event","Fatalities","Injuries")
library(plyr)
economyDamage<-aggregate(cbind(stormData$PROPDMG,stormData$CROPDMG), by=list(stormData$EVTYPE), FUN="sum")
colnames(economyDamage) <- c("Event","Property","Crops")
#Top events for plotting
peopleDamageExtract<-arrange(peopleDamage,desc(Fatalities))[1:5,]
economyDamageExtract<-arrange(economyDamage,desc(Property))[1:5,]
The following figure and table show the top five events related to people damage:
# Ploting results
colors=c("blue","red","green","orange","brown")
#Figure 1 . 2 Plots at this figure
par(mfrow = c(1, 2),mar = c(4, 4, 2, 2), oma = c(1, 0, 2, 0))
barplot(peopleDamageExtract$Fatalities,names.arg=peopleDamageExtract$Event,
ylab="Fatalities",
col=colors)
legend("topright",legend=peopleDamageExtract$Event,fill=colors,cex=0.8)
peopleDamageExtract<-arrange(peopleDamage,desc(Injuries))[1:5,]
barplot(peopleDamageExtract$Injuries,names.arg=peopleDamageExtract$Event,
ylab="Injuries",
col=colors)
legend("topright",legend=peopleDamageExtract$Event,fill=colors,cex=0.8)
mtext("Most harmful events to population health", outer = TRUE)
mtext("Event",side=1, outer = TRUE)
People fatalities and injuries:
print(arrange(peopleDamage,desc(Fatalities))[1:5,c(1,2)])
## Event Fatalities
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
print(arrange(peopleDamage,desc(Injuries))[1:5,c(1,3)])
## Event Injuries
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
Now the data related to economic issues.
#Figure 2 . 2 Plots at this figure
colors=c("blue","red","green","orange","brown")
par(mfrow = c(1, 2),mar = c(4, 4, 2, 2), oma = c(1, 0, 2, 0))
barplot(economyDamageExtract$Property,names.arg=economyDamageExtract$Event,
ylab="Property",
col=colors)
legend("topright",legend=economyDamageExtract$Event,fill=colors,cex=0.6)
economyDamageExtract<-arrange(economyDamage,desc(Crops))[1:5,]
barplot(economyDamageExtract$Crops,names.arg=economyDamageExtract$Event,
ylab="Crops",
col=colors)
legend("topright",legend=economyDamageExtract$Event,fill=colors,cex=0.6)
mtext("Top events with greatest economic consequences \n USD", outer = TRUE)
mtext("Event",side=1, outer = TRUE)
Economic loss.
print(arrange(economyDamage,desc(Property))[1:5,c(1,2)])
## Event Property
## 1 FLOOD 1.447e+11
## 2 HURRICANE/TYPHOON 6.931e+10
## 3 TORNADO 5.695e+10
## 4 STORM SURGE 4.332e+10
## 5 FLASH FLOOD 1.682e+10
print(arrange(economyDamage,desc(Crops))[1:5,c(1,3)])
## Event Crops
## 1 DROUGHT 1.397e+10
## 2 FLOOD 5.662e+09
## 3 RIVER FLOOD 5.029e+09
## 4 ICE STORM 5.022e+09
## 5 HAIL 3.026e+09