The goal of this survey is to analyse the NOAA Storm Database to know the impact of the severe weather events on the population health and the economy of the United States.
The Pre-processing section show the procedure used to prepare the data for analysis. The steps of the procedure include:
The outputs of the survey are showed are are presmade up of three plots:
The raw data used in this survey can be downloaded from here: https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.6.1 (2014-01-04) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.18.0 (2014-02-22) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
##
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
##
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
##
## R.utils v1.32.4 (2014-05-14) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
##
## The following object is masked from 'package:utils':
##
## timestamp
##
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
library(reshape)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following objects are masked from 'package:reshape':
##
## colsplit, melt, recast
options(scipen=999)
# unzip bz2 file
bunzip2("repdata-data-StormData.csv.bz2")
## Error: File already exists: repdata-data-StormData.csv
# load data
df <- read.csv("repdata-data-StormData.csv", stringsAsFactors=FALSE,na.strings = "NA",header=TRUE )
#set data type
df$PROPDMG <- as.numeric(df$PROPDMG)
df$INJURIES <- as.numeric(df$INJURIES)
df$FATALITIES <- as.numeric(df$FATALITIES)
df$EVTYPE <- toupper(df$EVTYPE)
#standardise property damage to $k
df[df$PROPDMGEXP == "M", 25] <- df[df$PROPDMGEXP == "M", 25] * 1000
df[df$PROPDMGEXP == "B", 25] <- df[df$PROPDMGEXP == "B", 25] * 1000000
#select subset of fields
columns <- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG")
df <- df[columns]
id <- rownames(df)
# add id column for each row
df <- cbind(id=id, df)
#create a second data frame to be used for outputs
df2 <- melt(df, id=c("id", "EVTYPE"))
#plot total fatalities by weather event
totFat <- dcast(df2[df2$variable == "FATALITIES",], EVTYPE ~ variable, fun.aggregate = sum)
totFat <- totFat[totFat$FATALITIES > 0, ]
perc <- quantile(totFat$FATALITIES, c(.90))
totFat <- totFat[totFat$FATALITIES > perc, ]
fatChart <- tapply(totFat$FATALITIES, totFat$EVTYPE, FUN=sum)
par(las=3, mar = c(9,4,4,2) + 0.1)
barplot(fatChart, main="Total fatalities per weather event", xlab="", ylab="Fatalities")
title(xlab="Weather Event",mgp=c(0,0,0))
#plot total injuries by weather event
totInj <- dcast(df2[df2$variable == "INJURIES",], EVTYPE ~ variable, fun.aggregate = sum)
totInj <- totInj[totInj$INJURIES > 0, ]
perc2 <- quantile(totInj$INJURIES, c(.90))
totInj <- totInj[totInj$INJURIES > perc2, ]
injChart <- tapply(totInj$INJURIES, totInj$EVTYPE, FUN=sum)
par(las=3, mar = c(9,4,4,2) + 0.1)
barplot(injChart, main="Total injuries per weather event", xlab="", ylab="Injuries")
title(xlab="Weather Event",mgp=c(0,0,0))
#Plot total property damage by weather type
totDam <- dcast(df2[df2$variable == "PROPDMG",], EVTYPE ~ variable, fun.aggregate = sum)
totDam <- totDam[totDam$PROPDMG > 0, ]
perc3 <- quantile(totDam$PROPDMG, c(.90))
totDam <- totDam[totDam$PROPDMG > perc3, ]
damChart <- tapply(totDam$PROPDMG, totDam$EVTYPE, FUN=sum)
par(las=3, mar = c(9,4,4,2) + 0.1)
barplot(damChart, main="Total property damage per weather event", xlab="", ylab="Property Damage")
title(xlab="Weather Event",mgp=c(0,0,0))