The following libraries were used to create this report.
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(stringr)
library(tidyr)
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
A file of US Census regions was downloaded from the census website and cleaned via the following code. This was used to plot the data by region. The US Census is a generally accepted source for population and region reporting.
getRegions <- function() {
download.file("http://www2.census.gov/geo/docs/maps-data/maps/reg_div.txt", "reg_div.txt")
reg_div = read.csv("reg_div.txt")
names(reg_div) <- c('Description')
#Pull out the division
reg_div$DIVISION <- ifelse( str_detect( reg_div$Description, 'Division' ), str_extract( reg_div$Description, ":.+"), NA )
reg_div$DIVISION <- str_replace( reg_div$DIVISION, ':', '')
reg_div$DIVISION <- str_trim( reg_div$DIVISION )
reg_div$DIVISION <- na.locf(reg_div$DIVISION, na.rm = F)
#Pull out the regions
reg_div$REGION <- ifelse( str_detect( reg_div$Description, 'REGION' ), str_extract( reg_div$Description, ":.+"), NA )
reg_div$REGION <- str_replace( reg_div$REGION, ':', '')
reg_div$REGION <- str_trim( reg_div$REGION )
reg_div$REGION <- na.locf(reg_div$REGION, na.rm = F)
#Pull out state numbers
reg_div$STATENUM <- str_extract( reg_div$Description, "[:digit:]+")
reg_div$STATENUM <- as.integer( reg_div$STATENUM )
reg_div <- reg_div %>% filter( !is.na(STATENUM) & STATENUM < 100 & !str_detect(Description, 'Division') & !str_detect(Description, 'REGION') )
}
The storm data was downloaded from the Coursera website and unzipped via the bzip2 algorithmn. The unzipped CSV file was read into R via the code below.
readData <- function() {
storm <- read.csv("repdata-data-StormData.csv")
storm <- storm %>% select(STATE__, STATE, EVTYPE, INJURIES, FATALITIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP )
storm <- storm %>% rename( STATENUM = STATE__)
storm <- tbl_df(storm)
#Clean up EVTYPE
storm$EVTYPE <- str_to_upper( as.character(storm$EVTYPE) )
storm$EVTYPE <- factor( storm$EVTYPE )
#Add Regions
reg <- getRegions()
storm <- merge( storm, reg, all.x = T)
#Calculate Property and Crop damage
storm$PROPDMG <- ifelse( storm$PROPDMGEXP == 'K', storm$PROPDMG * 1, ifelse( storm$PROPDMGEXP == 'M', storm$PROPDMG * 1000, ifelse( storm$PROPDMG == 'B', storm$PROPDMG * 1000000, storm$PROPDMG )))
storm$CROPDMG <- ifelse( storm$CROPDMGEXP == 'K', storm$CROPDMG * 1, ifelse( storm$CROPDMGEXP == 'M', storm$CROPDMG * 1000, ifelse( storm$CROPDMG == 'B', storm$CROPDMG * 1000000, storm$CROPDMG )))
tbl_df(storm)
}
storm <- readData()
The following functions were used to group the data before creating plots.
tornadoGrouping <- function( storm ) {
injfat <- storm %>% group_by( EVTYPE ) %>% summarize( injuries = sum(INJURIES), fatalities = sum(FATALITIES), propdmg = sum(PROPDMG), cropdmg = sum(CROPDMG) ) %>% arrange( desc(injuries), desc(fatalities) )
injfat$tornado <- ifelse( injfat$EVTYPE == 'TORNADO', 'Tornado', 'Other')
injfat$tornado <- factor( injfat$tornado )
tornadoGroup <- injfat %>% group_by( tornado ) %>% summarize( injuries = sum(injuries), fatalities = sum(fatalities), propdmg = sum(propdmg), cropdmg = sum(cropdmg) )
tornadoTidy <- tornadoGroup %>% gather( injuryType, val, c( injuries, fatalities, propdmg, cropdmg) )
tornadoTidy$property <- ifelse( tornadoTidy$injuryType %in% c( 'propdmg', 'cropdmg') , 1, 0 )
tornadoTidy
}
tornadoByRegion <- function( storm ) {
injfat <- storm %>% group_by( REGION, EVTYPE ) %>% summarize( injuries = sum(INJURIES), fatalities = sum(FATALITIES), propdmg = sum(PROPDMG), cropdmg = sum(CROPDMG) ) %>% arrange( desc(injuries), desc(fatalities) )
injfat$tornado <- ifelse( injfat$EVTYPE == 'TORNADO', 'Tornado', 'Other')
injfat$tornado <- factor( injfat$tornado )
tornadoGroup <- injfat %>% group_by( REGION, tornado ) %>% summarize( injuries = sum(injuries), fatalities = sum(fatalities), propdmg = sum(propdmg), cropdmg = sum(cropdmg) )
tornadoTidy <- tornadoGroup %>% gather( injuryType, val, c( injuries, fatalities, propdmg, cropdmg) )
tornadoTidy$property <- ifelse( tornadoTidy$injuryType %in% c( 'propdmg', 'cropdmg') , 1, 0 )
tornadoTidy
}
The three plots used in this analysis were created by the functions below.
tornadoBars <- function( tornadoData ) {
injuryOnly <- tornadoData %>% filter( property == 0 )
ggplot( injuryOnly, aes( x = tornado, y = val)) + geom_bar( stat = 'identity') + facet_wrap( ~ injuryType )
}
regionBars <- function( tornadoData ) {
injuries <- tornadoData %>% filter( property == 0)
plot1 <- ggplot( injuries, aes( x = REGION, y = val)) + geom_bar( stat = 'identity') + facet_wrap( injuryType ~ tornado )
property <- tornadoData %>% filter( property == 1)
plot2 <- ggplot( property, aes( x = REGION, y = val)) + geom_bar( stat = 'identity') + facet_wrap( injuryType ~ tornado )
list( plot1, plot2 )
}
The first plot compares injuries/fatalities due to tornadoes versus other weather events. Tornadoes account for more injuries than all other weather events combined.
storm <- readData()
tornadoData <- tornadoGrouping( storm )
tornadoBars( tornadoData )
The next compares injuries/fatalities due to tornadoes versus other events, but also separates by region.
tornadoData <- tornadoByRegion( storm )
regionPlots <- regionBars( tornadoData )
regionPlots[[1]]
The last plot shows property and crop damage by region. Tornadoes cause a significant proportion of property damage. Tornadoes do not cause a significant portion of crop damage. Property and crop damages are by the 1000s.
regionPlots[[2]]
Tornadoes account for the largest number of injuries, fatalities, and property damage. When combining all other events and comparing to injuries, fatalities and property damage, tornadoes are overwhelm any other events. Tornadoes are the most harmful to population health. Tornaodes also have the greatest economic impact – though their impact is much more significant when observing property damage versus cop damage.