Analysing the fatalities, injuries and damage caused by the natural events
fatdata <- read.csv("repdata_data_StormData.csv.bz2")
# Have a look at our dataset
dim(fatdata)
## [1] 902297 37
str(fatdata)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
head(fatdata)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
# only few columns are of our importance
#EVTYPE : the type of natural calamity occured
# FATALITIES : deaths caused by the natural calamity
#INJURIES : number of injuries caused by the event.
#PROPDMG : value of property damaged.
#PROPDMGEXP - the exponent of the value of property damaged.
#CROPDMG : the value of crops damaged.
#CROPDMGEXP - the exponent of the value of property damaged.
calamity <- c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
workingfatdata <- fatdata[calamity]
# Have a look at our main working dataset
dim(workingfatdata)
## [1] 902297 7
str(workingfatdata)
## 'data.frame': 902297 obs. of 7 variables:
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
head(workingfatdata)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
fatal <- aggregate(FATALITIES~EVTYPE, data = workingfatdata, FUN = sum)
head(fatal)
## EVTYPE FATALITIES
## 1 HIGH SURF ADVISORY 0
## 2 COASTAL FLOOD 0
## 3 FLASH FLOOD 0
## 4 LIGHTNING 0
## 5 TSTM WIND 0
## 6 TSTM WIND (G45) 0
injury <- aggregate(INJURIES~EVTYPE, data = workingfatdata, FUN = sum)
head(injury)
## EVTYPE INJURIES
## 1 HIGH SURF ADVISORY 0
## 2 COASTAL FLOOD 0
## 3 FLASH FLOOD 0
## 4 LIGHTNING 0
## 5 TSTM WIND 0
## 6 TSTM WIND (G45) 0
topfatal <- fatal[order(-fatal$FATALITIES),][1:10,]
topfatal
## EVTYPE FATALITIES
## 834 TORNADO 5633
## 130 EXCESSIVE HEAT 1903
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
## 856 TSTM WIND 504
## 170 FLOOD 470
## 585 RIP CURRENT 368
## 359 HIGH WIND 248
## 19 AVALANCHE 224
topinjury <- injury[order(-injury$INJURIES),][1:10,]
topinjury
## EVTYPE INJURIES
## 834 TORNADO 91346
## 856 TSTM WIND 6957
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
## 275 HEAT 2100
## 427 ICE STORM 1975
## 153 FLASH FLOOD 1777
## 760 THUNDERSTORM WIND 1488
## 244 HAIL 1361
# Now looking the damaged property
unique(workingfatdata$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
unique(workingfatdata$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
# Evaluating ht data from the given document
# Evaluating property damage
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "K" ] <- 1000
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "M" ] <- 1000000
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "" ] <- 1
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "B" ] <- 1000000000
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "m" ] <- 1000000
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "+" ] <- 0
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "0" ] <- 1
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "5" ] <- 100000
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "6" ] <- 1000000
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "?" ] <- 0
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "4" ] <- 10000
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "2" ] <- 100
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "3" ] <- 1000
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "h" ] <- 100
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "7" ] <- 10000000
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "H" ] <- 100
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "-" ] <- 0
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "1" ] <- 10
workingfatdata$PROPEXP[workingfatdata$PROPDMGEXP == "8" ] <- 100000000
# evaluating crop damage
workingfatdata$CROPEXP[workingfatdata$CROPDMGEXP == "" ] <- 1
workingfatdata$CROPEXP[workingfatdata$CROPDMGEXP == "M" ] <- 1000000
workingfatdata$CROPEXP[workingfatdata$CROPDMGEXP == "K" ] <- 1000
workingfatdata$CROPEXP[workingfatdata$CROPDMGEXP == "m" ] <- 1000000000
workingfatdata$CROPEXP[workingfatdata$CROPDMGEXP == "B" ] <- 1000000
workingfatdata$CROPEXP[workingfatdata$CROPDMGEXP == "?" ] <- 0
workingfatdata$CROPEXP[workingfatdata$CROPDMGEXP == "0" ] <- 1
workingfatdata$CROPEXP[workingfatdata$CROPDMGEXP == "k" ] <- 1000
workingfatdata$CROPEXP[workingfatdata$CROPDMGEXP == "2" ] <- 100
workingfatdata$PROPDMGVAL <- workingfatdata$PROPDMG * workingfatdata$PROPEXP
workingfatdata$CROPDMGVAL <- workingfatdata$CROPDMG * workingfatdata$CROPEXP
workingfatdata$ALLDMGVAL <- workingfatdata$PROPDMGVAL + workingfatdata$CROPDMGVAL
dim(workingfatdata)
## [1] 902297 12
str(workingfatdata)
## 'data.frame': 902297 obs. of 12 variables:
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ PROPEXP : num 1000 1000 1000 1000 1000 1000 1000 1000 1000 1000 ...
## $ CROPEXP : num 1 1 1 1 1 1 1 1 1 1 ...
## $ PROPDMGVAL: num 25000 2500 25000 2500 2500 2500 2500 2500 25000 25000 ...
## $ CROPDMGVAL: num 0 0 0 0 0 0 0 0 0 0 ...
## $ ALLDMGVAL : num 25000 2500 25000 2500 2500 2500 2500 2500 25000 25000 ...
head(workingfatdata)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP PROPEXP
## 1 TORNADO 0 15 25.0 K 0 1000
## 2 TORNADO 0 0 2.5 K 0 1000
## 3 TORNADO 0 2 25.0 K 0 1000
## 4 TORNADO 0 2 2.5 K 0 1000
## 5 TORNADO 0 2 2.5 K 0 1000
## 6 TORNADO 0 6 2.5 K 0 1000
## CROPEXP PROPDMGVAL CROPDMGVAL ALLDMGVAL
## 1 1 25000 0 25000
## 2 1 2500 0 2500
## 3 1 25000 0 25000
## 4 1 2500 0 2500
## 5 1 2500 0 2500
## 6 1 2500 0 2500
propcropdmg <- aggregate(ALLDMGVAL ~ EVTYPE, data = workingfatdata, FUN = sum)
toppropcropdmg <- propcropdmg[order(-propcropdmg$ALLDMGVAL),][1:10,]
toppropcropdmg
## EVTYPE ALLDMGVAL
## 170 FLOOD 150319678257
## 411 HURRICANE/TYPHOON 70405222800
## 834 TORNADO 57362333887
## 670 STORM SURGE 43323541000
## 244 HAIL 18761221986
## 153 FLASH FLOOD 18243991079
## 402 HURRICANE 14610229010
## 95 DROUGHT 13520172000
## 409 HURRICANE OPAL 13181846000
## 848 TROPICAL STORM 8382236550
(echo = TRUE)
## [1] TRUE
Results
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), las=3,cex = 0.8)
barplot(topfatal$FATALITIES, names.arg=topfatal$EVTYPE,ylim = c(1,10000),col="red",ylab="Number of Fatalities", main=" Top 10 Events with Highest Fatalities")
barplot(topinjury$INJURIES, names.arg=topinjury$EVTYPE,ylim = c(0,100000), col="green", ylab="Number of Injuries", main=" Top 10 Events with Highest Injuries")

par( mar = c(12, 4, 3, 2), las=3,cex = 0.8, cex.main = 0.8)
barplot((toppropcropdmg$ALLDMGVAL)/(1*1000000000), names.arg=toppropcropdmg$EVTYPE, col="orange", ylab=" Cost of Property Damage($ billions)", main="Top 10 Events Causing Highest Property/Crop Damage Value")
(echo = TRUE)
## [1] TRUE
