rawdata<-read.csv(bzfile("repdata-data-StormData.csv.bz2"), header=TRUE, stringsAsFactors=FALSE)
names(rawdata)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
variables<-c("EVTYPE","FATALITIES","INJURIES","PROPDMG", "PROPDMGEXP","CROPDMG","CROPDMGEXP")
data<-rawdata[variables]
fatal <- aggregate(FATALITIES ~ EVTYPE, data = data, FUN = sum)
injury <- aggregate(INJURIES ~ EVTYPE, data = data, FUN = sum)
fatal10 <- fatal[order(-fatal$FATALITIES),][1:10, ]
injury10 <- injury[order(-injury$INJURIES),][1:10, ]
unique(data$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-"
## [18] "1" "8"
data$PROPEXP[data$PROPDMGEXP == "K" ] <- 1000
data$PROPEXP[data$PROPDMGEXP == "M" ] <- 10^6
data$PROPEXP[data$PROPDMGEXP == "" ] <- 1
data$PROPEXP[data$PROPDMGEXP == "B" ] <- 10^9
data$PROPEXP[data$PROPDMGEXP == "m" ] <- 10^6
data$PROPEXP[data$PROPDMGEXP == "+" ] <- 0
data$PROPEXP[data$PROPDMGEXP == "0" ] <- 1
data$PROPEXP[data$PROPDMGEXP == "5" ] <- 10^5
data$PROPEXP[data$PROPDMGEXP == "6" ] <- 10^6
data$PROPEXP[data$PROPDMGEXP == "?" ] <- 0
data$PROPEXP[data$PROPDMGEXP == "4" ] <- 10000
data$PROPEXP[data$PROPDMGEXP == "2" ] <- 100
data$PROPEXP[data$PROPDMGEXP == "3" ] <- 1000
data$PROPEXP[data$PROPDMGEXP == "h" ] <- 100
data$PROPEXP[data$PROPDMGEXP == "7" ] <- 10^7
data$PROPEXP[data$PROPDMGEXP == "H" ] <- 100
data$PROPEXP[data$PROPDMGEXP == "-" ] <- 0
data$PROPEXP[data$PROPDMGEXP == "1" ] <- 10
data$PROPEXP[data$PROPDMGEXP == "8" ] <- 10^8
data$PROPDMGVAL <- data$PROPDMG * data$PROPEXP
unique(data$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
data$CROPEXP[data$CROPDMGEXP == "" ] <- 1
data$CROPEXP[data$CROPDMGEXP == "M" ] <- 10^6
data$CROPEXP[data$CROPDMGEXP == "K" ] <- 1000
data$CROPEXP[data$CROPDMGEXP == "m" ] <- 10^9
data$CROPEXP[data$CROPDMGEXP == "B" ] <- 10^6
data$CROPEXP[data$CROPDMGEXP == "?" ] <- 0
data$CROPEXP[data$CROPDMGEXP == "0" ] <- 1
data$CROPEXP[data$CROPDMGEXP == "k" ] <- 1000
data$CROPEXP[data$CROPDMGEXP == "2" ] <- 100
data$CROPDMGVAL <- data$CROPDMG * data$CROPEXP
propdmg <- aggregate(PROPDMGVAL ~ EVTYPE, data = data, FUN = sum)
cropdmg <- aggregate(CROPDMGVAL ~ EVTYPE, data = data, FUN = sum)
propdmg10<-propdmg[order(-propdmg$PROPDMGVAL), ][1:10,]
cropdmg10<-cropdmg[order(-cropdmg$CROPDMGVAL), ][1:10,]
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), las=3,cex = 0.8)
barplot(fatal10$FATALITIES, names.arg=fatal10$EVTYPE, ylim= c(0,7000),col=heat.colors(10),ylab="Number of Fatalities", main=" Top 10 Events with Highest Fatalities")
barplot(injury10$INJURIES, names.arg=injury10$EVTYPE,ylim= c(0,10000), col=terrain.colors(10), ylab="Number of Injuries", main=" Top 10 Events with Highest Injuries")
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), las=3,cex = 0.8, cex.main = 0.9)
barplot((propdmg10$PROPDMGVAL)/(1*10^9), names.arg=propdmg10$EVTYPE, col=heat.colors(10, alpha = 1), ylab=" Cost of Property Damage($ billions)", main="Top 10 Events Causing Highest Property Damage")
barplot((cropdmg10$CROPDMGVAL)/(1*10^9), names.arg=cropdmg10$EVTYPE, col=terrain.colors(10, alpha = 1), ylab=" Cost of Crop Damage($ billions)", main="Top 10 Events Causing Highest Crop Damage")