Peer Assessment 2

This report analyzes data on weather events collected in the U.S. in terms of harm to health or economic factors. The data was collected between January 1950 and November 2011 by the National Oceanic and Atmospheric Administration (NOAA). The database tracks characteristics of major storms and weather events in the U.S., including when and where they occur, as well as estimates of fatalities, injuries, and property damage. The data is available at https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2. Documentation for the data is available at https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf. The National Climatic Data Center Storm Events FAQ is available at https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2FNCDC%20Storm%20Events-FAQ%20Page.pdf.

Data Processing

The following is a high-level overview of the steps in data processing.

  1. Load the data
  2. Reclassify EVTYPE values
  3. Adjust crop and property damage values
  4. Group and summarize the data
  5. Gather information for top weather events causing fatalities
  6. Gather information for top weather events causing injuries
  7. Gather information for top weather events causing crop damages
  8. Gather information for top weather events causing property damages
  9. Plot graphs

Download data

library(plyr)
## Warning: package 'plyr' was built under R version 3.1.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.1.2
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)
## Warning: package 'stringr' was built under R version 3.1.2
setwd("C:/Users/Bhawna Arora/Desktop/Coursera")
storm.data <- read.csv("repdata-data-StormData.csv")
storm.data$year <- as.numeric(format(as.Date(storm.data$BGN_DATE, format = "%m/%d/%Y %H:%M:%S"), "%Y"))
storm.data1 <- subset(storm.data, storm.data$year > 2000)
dim(storm.data1)
## [1] 488692     38

Reclassify EVTYPE values

evt_aggr <- integer()
exclude <- function(set1) {
  result = setdiff(set1, evt_aggr)
  evt_aggr <<- union(evt_aggr, set1)
  return (result)
}

storm.data1$EVTYPE_GRP <- str_trim(storm.data1$EVTYPE)
evt_groups <- data.frame(group=character(), term1=character(), term2=character(), term3=character(), term4=character(),
                         term5=character(), term6=character(), term7=character(), term8=character(), term9=character(), stringsAsFactors=FALSE)
evt_groups[1,] <- c("HEAT (GRP)", "HEAT", "HOT", "WARMTH", "WARM", rep(NA,5))
evt_groups[2,] <- c("TORNADO (GRP)", "TORNADO", "WATERSPOUT", "WAYTERSPOUT", "TORNDAO", "GUSTNADO", rep(NA,4))
evt_groups[3,] <- c("COLD (GRP)", "COLD", "HYPOTHERMIA", "EXPOSURE", "WIND CHILL", "LOW.*TEMP", "UNSEASON.*COOL", rep(NA,3))
evt_groups[4,] <- c("WIND (GRP)", "WIND", rep(NA,8))
evt_groups[5,] <- c("RIP CURRENT (GRP)", "RIP CURRENT", rep(NA,8))
evt_groups[6,] <- c("WINTER (GRP)", "WINTER", "WINTRY", "SNOW", "BLIZZARD", "ICE", "ICY", "FROST", "FREEZE", "FREEZING")
evt_groups[7,] <- c("HURRICANE (GRP)", "HURRICANE", "TYPHOON", rep(NA,7))
evt_groups[8,] <- c("FLOOD (GRP)", "FLOOD", "FLDG", rep(NA,7))
evt_groups[9,] <- c("FIRE (GRP)", "FIRE", rep(NA,8))
evt_groups[10,] <- c("HAIL (GRP)", "HAIL", rep(NA,8))
evt_groups[11,] <- c("TROP. STORM (GRP)", "TROPICAL STORM", rep(NA,8))
evt_groups[12,] <- c("T-STORM (GRP)", "THUNDERSTORM", "TSTM", rep(NA,7))
evt_groups[13,] <- c("LIGHTNING (GRP)", "LIGHTNING","LIGNTNING","LIGHTING", rep(NA,6))
evt_groups[14,] <- c("MUD SLIDE (GRP)", "MUD.*SLIDE", rep(NA,8))
evt_aggr= integer()
evt_groups_data = list()
for(i in 1:nrow(evt_groups)) {
  non_na_cols <- which(!is.na(evt_groups[i,2:10])) + 1
  pattern = paste0("(", paste0(evt_groups[i,non_na_cols], sep=")", collapse="|("))
  x <- grep(pattern, storm.data1$EVTYPE_GRP, ignore.case=TRUE)
  x <- exclude(x)
  evt_groups_data[[evt_groups[i,]$group]] = x
  storm.data1[x,]$EVTYPE_GRP <- evt_groups[i,]$group
}

Adjust crop and property damage values

storm.data1$CROPDMGFACTOR <- 0
storm.data1[storm.data1$CROPDMGEXP %in% c("k","K"),]$CROPDMGFACTOR = 1000
storm.data1[storm.data1$CROPDMGEXP %in% c("m","M"),]$CROPDMGFACTOR = 1000000
storm.data1[storm.data1$CROPDMGEXP %in% c("b","B"),]$CROPDMGFACTOR = 1000000000
storm.data1$CROPDMGADJ <- storm.data1$CROPDMG * storm.data1$CROPDMGFACTOR

storm.data1$PROPDMGFACTOR <- 0
storm.data1[storm.data1$PROPDMGEXP %in% c("k","K"),]$PROPDMGFACTOR = 1000
storm.data1[storm.data1$PROPDMGEXP %in% c("m","M"),]$PROPDMGFACTOR = 1000000
storm.data1[storm.data1$PROPDMGEXP %in% c("b","B"),]$PROPDMGFACTOR = 1000000000
storm.data1$PROPDMGADJ <- storm.data1$PROPDMG * storm.data1$PROPDMGFACTOR

Group and summarize the data

storm.data1.summary <- group_by(storm.data1, EVTYPE_GRP) %>% 
  summarize(FATALITIES=sum(FATALITIES), INJURIES=sum(INJURIES), CROPDMG=sum(CROPDMGADJ), PROPDMG=sum(PROPDMGADJ))

Gather information for top weather events causing fatalities

top_fatalities <- head(
  storm.data1.summary %>%
    filter(FATALITIES > 0) %>%
    arrange(desc(FATALITIES)) %>%
    select(EVTYPE_GRP, FATALITIES)
  , 20)
total_fatalities <- sum(storm.data1$FATALITIES)
total_fatalities_top_causes <- sum(top_fatalities$FATALITIES)
par(las=2,mar=c(9,4,4,4))

Gather information for top weather events causing injuries

top_injuries <- head(
  storm.data1.summary %>%
    filter(INJURIES > 0) %>%
    arrange(desc(INJURIES)) %>%
    select(EVTYPE_GRP , INJURIES)
  ,20)
total_injuries <- sum(storm.data1$INJURIES)
total_injuries_top_causes <- sum(top_injuries$INJURIES)

Gather information for top weather events causing crop damages

top_cropdmg <- head(
  storm.data1.summary %>%
    filter(CROPDMG > 0) %>%
    arrange(desc(CROPDMG)) %>%
    select(EVTYPE_GRP, CROPDMG)
  , 20)
total_cropdmg <- sum(storm.data1.summary$CROPDMG)
total_cropdmg_top_causes <- sum(top_cropdmg$CROPDMG)

Gather information for top weather events causing property damages

top_propdmg <- head(
  storm.data1.summary %>%
    filter(PROPDMG > 0) %>%
    arrange(desc(PROPDMG)) %>%
    select(EVTYPE_GRP, PROPDMG)
  , 20)
total_propdmg <- sum(storm.data1.summary$PROPDMG)
total_propdmg_top_causes <- sum(top_propdmg$PROPDMG)

Plot graphs

par(las=2,mfrow=c(1,2),mar=c(12,3,3,3))
barplot(top_fatalities$FATALITIES, names.arg=top_fatalities$EVTYPE_GRP, cex.names=0.95, main="Top Weather-Related Causes of Fatality")
barplot(top_injuries$INJURIES, names.arg=top_injuries$EVTYPE_GRP, cex.names=0.95, main="Top Weather-Related Causes of Injury")

par(las=1)
mtext
## function (text, side = 3, line = 0, outer = FALSE, at = NA, adj = NA, 
##     padj = NA, cex = NA, col = NA, font = NA, ...) 
## invisible(.External.graphics(C_mtext, as.graphicsAnnot(text), 
##     side, line, outer, at, adj, padj, cex, col, font, ...))
## <bytecode: 0x19e8b80c>
## <environment: namespace:graphics>
par(las=2,mfrow=c(1,2),mar=c(14,7,3,3))
barplot(top_cropdmg$CROPDMG, names.arg=top_cropdmg$EVTYPE, cex.names=0.95, main="Top Weather-Related Causes of Crop Damage")
barplot(top_propdmg$PROPDMG, names.arg=top_propdmg$EVTYPE, cex.names=0.95, main="Top Weather-Related Causes of Property Damage")
par(las=1)
mtext("Weather events causing the most economic damage", side=3, line=-35, outer=TRUE)