Synopsis

Analyse the storm data and find Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health? Across the United States, which types of events have the greatest economic consequences?

data processing

setwd("C:\\Users\\suman\\Desktop\\datasciencecoursera\\course5\\ass2")
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2","StormData.csv")
storm <- read.csv("StormData.csv")

convert all to lower

event_types<-tolower(storm$EVTYPE)

replace all punct. characters with a space

event_types <- gsub("[[:blank:][:punct:]+]", " ", event_types)
storm$EVTYPE <- event_types
library(plyr)

find the most fatal event and injury event

casualties<-ddply(storm,.(EVTYPE),summarize,fatal=sum(FATALITIES),injury=sum(INJURIES))
fatal<-head(casualties[order(casualties$fatal,decreasing=T),],10)
injury<-head(casualties[order(casualties$injury,decreasing=T),],10)

plot

barplot(fatal$fatal,ylab="fatality",xlab="top 10 fatality",col=rainbow(10),legend.text=fatal[1:10,1])

as per injury can be shown as below barplot(injury$injury,ylab=“injury”,xlab=“top 10 injury”,col=rainbow(10),legend.text=injury[1:10,1])

economy damage define a function to get the exponent

exp_tr<-function(x){
  if (x %in% c('h','H'))
   return (2)
  else if (x %in% c('k','K'))
    return (3)
  else if (x %in% c('m','M'))
    return (6)
  else if (x %in% c('b','B'))
    return (9)
  else if (!is.na(as.numeric(x)))
    return (as.numeric(x))
  else if (x %in% c('','-','?','+'))
    return(0)
  else{
    stop("invalid")
  }
}
prop_dmg_exp <- sapply(storm$PROPDMGEXP, FUN=exp_tr)
storm$prop_dmg <- storm$PROPDMG * (10 ** prop_dmg_exp)
crop_dmg_exp <- sapply(storm$CROPDMGEXP, FUN=exp_tr)
storm$crop_dmg <- storm$CROPDMG * (10 ** crop_dmg_exp)

Results

Compute the economic loss by event type

library(plyr)
econ_loss <- ddply(storm, .(EVTYPE), summarize,
                   prop_dmg = sum(prop_dmg),
                   crop_dmg = sum(crop_dmg))

filter out events that caused no economic loss

econ_loss <- econ_loss[(econ_loss$prop_dmg > 0 | econ_loss$crop_dmg > 0), ]
prop_dmg_events <- head(econ_loss[order(econ_loss$prop_dmg, decreasing = T), ], 10)
crop_dmg_events <- head(econ_loss[order(econ_loss$crop_dmg, decreasing = T), ], 10)

see top 10

prop_dmg_events[, c("EVTYPE", "prop_dmg")]
##                 EVTYPE     prop_dmg
## 138        flash flood 6.820237e+13
## 697 thunderstorm winds 2.086532e+13
## 741            tornado 1.078951e+12
## 209               hail 3.157558e+11
## 410          lightning 1.729433e+11
## 154              flood 1.446577e+11
## 366  hurricane typhoon 6.930584e+10
## 166           flooding 5.920826e+10
## 585        storm surge 4.332354e+10
## 270         heavy snow 1.793259e+10
crop_dmg_events[,c("EVTYPE","prop_dmg")]
##                EVTYPE     prop_dmg
## 84            drought 1.046106e+09
## 154             flood 1.446577e+11
## 519       river flood 5.118946e+09
## 382         ice storm 3.949928e+09
## 209              hail 3.157558e+11
## 357         hurricane 1.186832e+10
## 366 hurricane typhoon 6.930584e+10
## 138       flash flood 6.820237e+13
## 125      extreme cold 6.773740e+07
## 185      frost freeze 1.098000e+07

plot

barplot(prop_dmg_events$prop_dmg,ylab="prop damage",xlab="top 10 property damage",col=rainbow(10),legend.text=prop_dmg_events[1:10,1])

barplot(crop_dmg_events$crop_dmg,ylab="crop damage",xlab="top 10 crop damage",col=rainbow(10),legend.text=crop_dmg_events[1:10,1])