APPENDIX: Additional Metrics

Data wrangling

setwd("~/Google Drive/DataScienceClasses/Reproducible Research/Assignment2")
library(data.table)
library(dplyr)
library(ggplot2)
library(lubridate)
library(R.utils)
library(googleVis)
# Dowload File and unzip
if(!file.exists("StormData.csv")){
  download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "StormData.csv.bz2")
  bunzip2("StormData.csv.bz2")
}

# Read data (datatable package for faster processing)
dat <- fread("StormData.csv")

# Selecting relevant colums and only harm > 0 in at least one column to reduce data strain
dat <- dat %>% select(BGN_DATE, STATE, EVTYPE, FATALITIES:CROPDMGEXP) %>% filter(FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0)

# Parsing dates (only needed for appendix)
dat$BGN_DATE <- mdy_hms(dat$BGN_DATE)

# Extrating the unique exponential units and making decoding table
decode <- data.frame(symb = unique(append(unique(dat$CROPDMGEXP), unique(dat$PROPDMGEXP))),
           decode = 10^c(0,6,3,6,9,0,0,3,0,5,6,4,2,2,7,3,3,0))

# The following merges in the decode table, renames the merged columns, computes the value 
# (multiply exponent with value) and reduces the data by removing unwanted columns
# Pipeline operator for the win :-)
dat <- dat %>% 
  merge(y = decode, by.x = "PROPDMGEXP", by.y = "symb") %>% 
  rename(PropExp = decode) %>%
  merge(y = decode, by.x = "CROPDMGEXP", by.y = "symb") %>% 
  rename(CropExp = decode) %>%
  mutate(PropDam = PROPDMG*PropExp) %>% 
  mutate(CropDam = CROPDMG*CropExp) %>%
  select(BGN_DATE:INJURIES,PropDam,CropDam)

APPENDIX: Additional Metrics

CA

January 23, 2016

Synopsis

Data wrangling

Trend over time

Spatial Distribution

Fatalities Map

Injuries Map

Damage Map