library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(knitr)
library(ggplot2)
setwd("C:/Users/jgpolanc/Desktop/Coursera/c5p2/StormData_PeerAssessment2")
dat <- read.csv(paste(getwd(),"/data/StormData.csv",sep=""))
event_dat<-select(dat, EVTYPE, FATALITIES, INJURIES)
sum_event<- event_dat %>% group_by(EVTYPE) %>% summarise_each(funs(sum)) %>%
mutate( TOTAL = FATALITIES + INJURIES) %>%arrange(desc(TOTAL))
clean_event<- sum_event %>% filter(TOTAL > 1500) %>% arrange(desc(TOTAL))
clean_event <- clean_event[order(clean_event$TOTAL, decreasing=TRUE), ]
clean_event$EVTYPE <- factor(clean_event$EVTYPE, levels=clean_event$EVTYPE)
clean_event_breakout <- clean_event %>% select (EVTYPE, FATALITIES, INJURIES) %>%
gather(IJ_TYPE, COUNT, FATALITIES:INJURIES)
event_dat2<-select(dat, EVTYPE, PROPDMG:CROPDMGEXP)
PROP_EXP<- event_dat2 %>% select(PROPDMG, PROPDMGEXP) %>% distinct(PROPDMGEXP)
CROP_EXP<- event_dat2 %>% select(CROPDMG,CROPDMGEXP) %>% distinct(CROPDMGEXP)
PROP_EXP CROP_EXP
multiplier <- c('H'= 1000, 'h' = 100, 'K'= 1000, 'k' = 1000, 'M' = 1000000, 'm' = 1000000, 'B'=1000000000,'b'=1000000000,
'+'= 1, '-' = 0, '?'=0, ' ' = 0 , '0' = 10, '1' = 10, '2' = 10, '3' = 10, '4' = 10, '5' = 10, '6' = 10,
'7' = 10, '8' = 10, '9' = 10)
multiplier<-as.data.frame(multiplier)
multiplier<-data.frame(as.factor(rownames(multiplier)),multiplier)
names(multiplier)[1] <- "factor"
event_dat2$PROPDMGEXP<-multiplier[match(event_dat2$PROPDMGEXP, multiplier$factor),2]
event_dat2$CROPDMGEXP<-multiplier[match(event_dat2$CROPDMGEXP, multiplier$factor),2]
event_dat2[is.na(event_dat2)] <- 0
event_dat_final<- event_dat2 %>% mutate(CROP_DAMAGE = CROPDMGEXP * CROPDMG) %>%
mutate(PROP_DAMAGE = PROPDMGEXP * PROPDMG) %>% select(EVTYPE, PROP_DAMAGE, CROP_DAMAGE)
sum_event_damage<- event_dat_final %>% group_by(EVTYPE) %>% summarise_each(funs(sum)) %>%
mutate(TOTAL_DAMAGE = PROP_DAMAGE + CROP_DAMAGE) %>%arrange(desc(TOTAL_DAMAGE))
clean_event_damage <- sum_event_damage %>% slice(1:10) %>% arrange(desc(TOTAL_DAMAGE))
clean_event_damage <- clean_event_damage[order(clean_event_damage$TOTAL_DAMAGE, decreasing=TRUE), ]
clean_event_damage$EVTYPE <- factor(clean_event_damage$EVTYPE, levels=clean_event_damage$EVTYPE)
clean_event_damage_breakout <- clean_event_damage %>% select (EVTYPE, PROP_DAMAGE, CROP_DAMAGE) %>%
gather(DAMAGE_TYPE, TOTAL, PROP_DAMAGE:CROP_DAMAGE)
ggplot(data=clean_event_breakout, aes(x=EVTYPE, y=COUNT,fill=IJ_TYPE)) +
geom_bar(stat="identity") + coord_flip()
clean_event_damage
ggplot(data=clean_event_damage, aes(x=EVTYPE, y=TOTAL_DAMAGE),group=1) +
geom_bar(stat="identity") + coord_flip()
ggplot(data=clean_event_damage_breakout, aes(x=EVTYPE, y=TOTAL,fill=DAMAGE_TYPE)) +
geom_bar(stat="identity") + coord_flip()