Analyse the storm data and find Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health? Across the United States, which types of events have the greatest economic consequences?
setwd("C:\\Users\\suman\\Desktop\\datasciencecoursera\\course5\\ass2")
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2","StormData.csv")
storm <- read.csv("StormData.csv")
convert all to lower
event_types<-tolower(storm$EVTYPE)
replace all punct. characters with a space
event_types <- gsub("[[:blank:][:punct:]+]", " ", event_types)
storm$EVTYPE <- event_types
library(plyr)
find the most fatal event and injury event
casualties<-ddply(storm,.(EVTYPE),summarize,fatal=sum(FATALITIES),injury=sum(INJURIES))
fatal<-head(casualties[order(casualties$fatal,decreasing=T),],10)
injury<-head(casualties[order(casualties$injury,decreasing=T),],10)
plot
barplot(fatal$fatal,ylab="fatality",xlab="top 10 fatality",col=rainbow(10),legend.text=fatal[1:10,1])
as per injury can be shown as below barplot(injury$injury,ylab=“injury”,xlab=“top 10 injury”,col=rainbow(10),legend.text=injury[1:10,1])
economy damage define a function to get the exponent
exp_tr<-function(x){
if (x %in% c('h','H'))
return (2)
else if (x %in% c('k','K'))
return (3)
else if (x %in% c('m','M'))
return (6)
else if (x %in% c('b','B'))
return (9)
else if (!is.na(as.numeric(x)))
return (as.numeric(x))
else if (x %in% c('','-','?','+'))
return(0)
else{
stop("invalid")
}
}
prop_dmg_exp <- sapply(storm$PROPDMGEXP, FUN=exp_tr)
storm$prop_dmg <- storm$PROPDMG * (10 ** prop_dmg_exp)
crop_dmg_exp <- sapply(storm$CROPDMGEXP, FUN=exp_tr)
storm$crop_dmg <- storm$CROPDMG * (10 ** crop_dmg_exp)
Compute the economic loss by event type
library(plyr)
econ_loss <- ddply(storm, .(EVTYPE), summarize,
prop_dmg = sum(prop_dmg),
crop_dmg = sum(crop_dmg))
filter out events that caused no economic loss
econ_loss <- econ_loss[(econ_loss$prop_dmg > 0 | econ_loss$crop_dmg > 0), ]
prop_dmg_events <- head(econ_loss[order(econ_loss$prop_dmg, decreasing = T), ], 10)
crop_dmg_events <- head(econ_loss[order(econ_loss$crop_dmg, decreasing = T), ], 10)
see top 10
prop_dmg_events[, c("EVTYPE", "prop_dmg")]
## EVTYPE prop_dmg
## 138 flash flood 6.820237e+13
## 697 thunderstorm winds 2.086532e+13
## 741 tornado 1.078951e+12
## 209 hail 3.157558e+11
## 410 lightning 1.729433e+11
## 154 flood 1.446577e+11
## 366 hurricane typhoon 6.930584e+10
## 166 flooding 5.920826e+10
## 585 storm surge 4.332354e+10
## 270 heavy snow 1.793259e+10
crop_dmg_events[,c("EVTYPE","prop_dmg")]
## EVTYPE prop_dmg
## 84 drought 1.046106e+09
## 154 flood 1.446577e+11
## 519 river flood 5.118946e+09
## 382 ice storm 3.949928e+09
## 209 hail 3.157558e+11
## 357 hurricane 1.186832e+10
## 366 hurricane typhoon 6.930584e+10
## 138 flash flood 6.820237e+13
## 125 extreme cold 6.773740e+07
## 185 frost freeze 1.098000e+07
plot
barplot(prop_dmg_events$prop_dmg,ylab="prop damage",xlab="top 10 property damage",col=rainbow(10),legend.text=prop_dmg_events[1:10,1])
barplot(crop_dmg_events$crop_dmg,ylab="crop damage",xlab="top 10 crop damage",col=rainbow(10),legend.text=crop_dmg_events[1:10,1])