The analysis below expands the analysis of the US natural hazards by plotting the fatalities and damages over time and the spatial distribution across the US. This may help inform the allocation of research and emergency funding.
setwd("~/Google Drive/DataScienceClasses/Reproducible Research/Assignment2")
library(data.table)
library(dplyr)
library(ggplot2)
library(lubridate)
library(R.utils)
library(googleVis)
# Dowload File and unzip
if(!file.exists("StormData.csv")){
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "StormData.csv.bz2")
bunzip2("StormData.csv.bz2")
}
# Read data (datatable package for faster processing)
dat <- fread("StormData.csv")
# Selecting relevant colums and only harm > 0 in at least one column to reduce data strain
dat <- dat %>% select(BGN_DATE, STATE, EVTYPE, FATALITIES:CROPDMGEXP) %>% filter(FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0)
# Parsing dates (only needed for appendix)
dat$BGN_DATE <- mdy_hms(dat$BGN_DATE)
# Extrating the unique exponential units and making decoding table
decode <- data.frame(symb = unique(append(unique(dat$CROPDMGEXP), unique(dat$PROPDMGEXP))),
decode = 10^c(0,6,3,6,9,0,0,3,0,5,6,4,2,2,7,3,3,0))
# The following merges in the decode table, renames the merged columns, computes the value
# (multiply exponent with value) and reduces the data by removing unwanted columns
# Pipeline operator for the win :-)
dat <- dat %>%
merge(y = decode, by.x = "PROPDMGEXP", by.y = "symb") %>%
rename(PropExp = decode) %>%
merge(y = decode, by.x = "CROPDMGEXP", by.y = "symb") %>%
rename(CropExp = decode) %>%
mutate(PropDam = PROPDMG*PropExp) %>%
mutate(CropDam = CROPDMG*CropExp) %>%
select(BGN_DATE:INJURIES,PropDam,CropDam)