In this report we aim to summarize the fatality, injury, property damage, and crop damage data by storm event types in the United States between the years 1950 and 2011. Our overall goal is to address the following two questions: (1) Which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health? and (2) Which types of events have the greatest economic consequences? The results indicate that Tornado is the weather event most harmful to population health, but Flood and Drought are the weather events most damaging to properties and crop, respectively.
library(data.table)
data0 <- fread("repdata_data_StormData.csv", sep=",", header="auto", na.strings="NA")
## Warning in fread("repdata_data_StormData.csv", sep = ",", header =
## "auto", : Read less rows (902297) than were allocated (967216). Run again
## with verbose=TRUE and please report.
The warning message can be ignored. Some data rows have large amount of text in the REMARKS column, which contain commas in them and contribute to the extra estimated and allocated rows.
str(data0)
## Classes 'data.table' and 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : chr "3" "2" "2" "2" ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
## - attr(*, ".internal.selfref")=<externalptr>
length(unique(data0$EVTYPE))
## [1] 985
According to National Weather Service Instruction documentation, the units for the PROPDMG and CROPDMG columns are in the PROPDMGEXP and CROPDMGEXP columns, respectively. The value “B”, “M”, and “K” correspond to unit in Billion, Million, and Thousand, respectively.
However, there are some values other than “B”, “M”, “K” in PROPDMGEXP and CROPDMGEXP columns.
library(plyr)
count(data0, 'PROPDMGEXP')
## PROPDMGEXP freq
## 1 465934
## 2 - 1
## 3 ? 8
## 4 + 5
## 5 0 216
## 6 1 25
## 7 2 13
## 8 3 4
## 9 4 4
## 10 5 28
## 11 6 4
## 12 7 5
## 13 8 1
## 14 B 40
## 15 h 1
## 16 H 6
## 17 K 424665
## 18 m 7
## 19 M 11330
count(data0, 'CROPDMGEXP')
## CROPDMGEXP freq
## 1 618413
## 2 ? 7
## 3 0 19
## 4 2 1
## 5 B 9
## 6 k 21
## 7 K 281832
## 8 m 1
## 9 M 1994
The fraction of those undefined units is relatively small and we will ignore them and treat their amount as 0 in this analysis.
normalized_value = function(dmg, dmgexp) {
if(dmgexp %in% c("B", "b", "M", "m", "K", "k")) {
if (dmgexp %in% c("B", "b"))
dmg * 1000
else if (dmgexp %in% c("M", "m"))
dmg
else
dmg * 0.001
}
else
0
}
data1 = data0[, PropDmgM:=mapply(normalized_value, PROPDMG, PROPDMGEXP)]
data2 = data1[, CropDmgM:=mapply(normalized_value, CROPDMG, CROPDMGEXP)]
dim(data2)
## [1] 902297 39
Dmg_Evtype <- aggregate(cbind(PropDmgM, CropDmgM) ~ EVTYPE, data = data2, sum)
Dmg_Evtype["TotalDmgM"] <- rowSums(Dmg_Evtype[,c("PropDmgM","CropDmgM")])
Health_Evtype <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data = data2, sum)
Health_Evtype["TotalCasualties"] <- rowSums(Health_Evtype[,c("FATALITIES","INJURIES")])
TopFatalityEvts <- Health_Evtype[order(Health_Evtype$FATALITIES,decreasing=TRUE),][1:20,c("EVTYPE","FATALITIES")]
TopFatalityEvts
## EVTYPE FATALITIES
## 834 TORNADO 5633
## 130 EXCESSIVE HEAT 1903
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
## 856 TSTM WIND 504
## 170 FLOOD 470
## 585 RIP CURRENT 368
## 359 HIGH WIND 248
## 19 AVALANCHE 224
## 972 WINTER STORM 206
## 586 RIP CURRENTS 204
## 278 HEAT WAVE 172
## 140 EXTREME COLD 160
## 760 THUNDERSTORM WIND 133
## 310 HEAVY SNOW 127
## 141 EXTREME COLD/WIND CHILL 125
## 676 STRONG WIND 103
## 30 BLIZZARD 101
## 350 HIGH SURF 101
TopInjuryEvts <- Health_Evtype[order(Health_Evtype$INJURIES,decreasing=TRUE),][1:20,c("EVTYPE","INJURIES")]
TopInjuryEvts
## EVTYPE INJURIES
## 834 TORNADO 91346
## 856 TSTM WIND 6957
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
## 275 HEAT 2100
## 427 ICE STORM 1975
## 153 FLASH FLOOD 1777
## 760 THUNDERSTORM WIND 1488
## 244 HAIL 1361
## 972 WINTER STORM 1321
## 411 HURRICANE/TYPHOON 1275
## 359 HIGH WIND 1137
## 310 HEAVY SNOW 1021
## 957 WILDFIRE 911
## 786 THUNDERSTORM WINDS 908
## 30 BLIZZARD 805
## 188 FOG 734
## 955 WILD/FOREST FIRE 545
## 117 DUST STORM 440
TopCasualtyEvts <- Health_Evtype[order(Health_Evtype$TotalCasualties,decreasing=TRUE),][1:20,c("EVTYPE","TotalCasualties")]
TopCasualtyEvts
## EVTYPE TotalCasualties
## 834 TORNADO 96979
## 130 EXCESSIVE HEAT 8428
## 856 TSTM WIND 7461
## 170 FLOOD 7259
## 464 LIGHTNING 6046
## 275 HEAT 3037
## 153 FLASH FLOOD 2755
## 427 ICE STORM 2064
## 760 THUNDERSTORM WIND 1621
## 972 WINTER STORM 1527
## 359 HIGH WIND 1385
## 244 HAIL 1376
## 411 HURRICANE/TYPHOON 1339
## 310 HEAVY SNOW 1148
## 957 WILDFIRE 986
## 786 THUNDERSTORM WINDS 972
## 30 BLIZZARD 906
## 188 FOG 796
## 585 RIP CURRENT 600
## 955 WILD/FOREST FIRE 557
TopPropDmgEvts <- Dmg_Evtype[order(Dmg_Evtype$PropDmgM,decreasing=TRUE),][1:20,c("EVTYPE","PropDmgM")]
TopPropDmgEvts
## EVTYPE PropDmgM
## 170 FLOOD 144657.710
## 411 HURRICANE/TYPHOON 69305.840
## 834 TORNADO 56937.160
## 670 STORM SURGE 43323.536
## 153 FLASH FLOOD 16140.812
## 244 HAIL 15732.267
## 402 HURRICANE 11868.319
## 848 TROPICAL STORM 7703.891
## 972 WINTER STORM 6688.497
## 359 HIGH WIND 5270.046
## 590 RIVER FLOOD 5118.945
## 957 WILDFIRE 4765.114
## 671 STORM SURGE/TIDE 4641.188
## 856 TSTM WIND 4484.928
## 427 ICE STORM 3944.928
## 760 THUNDERSTORM WIND 3483.121
## 409 HURRICANE OPAL 3172.846
## 955 WILD/FOREST FIRE 3001.829
## 298 HEAVY RAIN/SEVERE WEATHER 2500.000
## 786 THUNDERSTORM WINDS 1735.953
TopCropDmgEvts <- Dmg_Evtype[order(Dmg_Evtype$CropDmgM,decreasing=TRUE),][1:20,c("EVTYPE","CropDmgM")]
TopCropDmgEvts
## EVTYPE CropDmgM
## 95 DROUGHT 13972.5660
## 170 FLOOD 5661.9685
## 590 RIVER FLOOD 5029.4590
## 427 ICE STORM 5022.1135
## 244 HAIL 3025.9545
## 402 HURRICANE 2741.9100
## 411 HURRICANE/TYPHOON 2607.8728
## 153 FLASH FLOOD 1421.3171
## 140 EXTREME COLD 1292.9730
## 212 FROST/FREEZE 1094.0860
## 290 HEAVY RAIN 733.3998
## 848 TROPICAL STORM 678.3460
## 359 HIGH WIND 638.5713
## 856 TSTM WIND 554.0073
## 130 EXCESSIVE HEAT 492.4020
## 192 FREEZE 446.2250
## 834 TORNADO 414.9531
## 760 THUNDERSTORM WIND 414.8431
## 275 HEAT 401.4615
## 957 WILDFIRE 295.4728
TopTotalDmgEvts <- Dmg_Evtype[order(Dmg_Evtype$TotalDmgM,decreasing=TRUE),][1:20,c("EVTYPE","TotalDmgM")]
TopTotalDmgEvts
## EVTYPE TotalDmgM
## 170 FLOOD 150319.678
## 411 HURRICANE/TYPHOON 71913.713
## 834 TORNADO 57352.114
## 670 STORM SURGE 43323.541
## 244 HAIL 18758.221
## 153 FLASH FLOOD 17562.129
## 95 DROUGHT 15018.672
## 402 HURRICANE 14610.229
## 590 RIVER FLOOD 10148.405
## 427 ICE STORM 8967.041
## 848 TROPICAL STORM 8382.237
## 972 WINTER STORM 6715.441
## 359 HIGH WIND 5908.618
## 957 WILDFIRE 5060.587
## 856 TSTM WIND 5038.936
## 671 STORM SURGE/TIDE 4642.038
## 760 THUNDERSTORM WIND 3897.964
## 409 HURRICANE OPAL 3191.846
## 955 WILD/FOREST FIRE 3108.626
## 298 HEAVY RAIN/SEVERE WEATHER 2500.000
FatalityPlotSet <- TopFatalityEvts[order(TopFatalityEvts$FATALITIES,decreasing=FALSE),][11:20,]
InjuryPlotSet <- TopInjuryEvts[order(TopInjuryEvts$INJURIES,decreasing=FALSE),][11:20,]
CasualtyPlotSet <- TopCasualtyEvts[order(TopCasualtyEvts$TotalCasualties,decreasing=FALSE),][11:20,]
par(mfrow=c(3,1))
par(mar=c(4,9.5,1.5,2))
barplot(FatalityPlotSet$FATALITIES, main="Top 10 Total Fatality Event Type in US from 1950 to 2011 by", horiz=TRUE,xlim=c(0,6000), names.arg=FatalityPlotSet$EVTYPE,cex.names=0.8,las=1)
barplot(InjuryPlotSet$INJURIES, main="Top 10 Total Injury Event Type in US from 1950 to 2011", horiz=TRUE,xlim=c(0,92000), names.arg=InjuryPlotSet$EVTYPE,cex.names=0.8,las=1,xaxp=c(0,95000,19))
barplot(CasualtyPlotSet$TotalCasualties, main="Top 10 Total Casualty Event Type in US from 1950 to 2011",xlab="Number of Persons", horiz=TRUE,xlim=c(0,98000), names.arg=CasualtyPlotSet$EVTYPE,cex.names=0.8,las=1,xaxp=c(0,100000,20))
PropDmgPlotSet <- TopPropDmgEvts[order(TopPropDmgEvts$PropDmgM,decreasing=FALSE),][11:20,]
CropDmgPlotSet <- TopCropDmgEvts[order(TopCropDmgEvts$CropDmgM,decreasing=FALSE),][11:20,]
TotalDmgPlotSet <- TopTotalDmgEvts[order(TopTotalDmgEvts$TotalDmgM,decreasing=FALSE),][11:20,]
par(mfrow=c(3,1))
par(mar=c(4.2,9.5,1.5,2.5))
barplot(PropDmgPlotSet$PropDmgM, main="Top 10 Total Property Damage Event Type in US from 1950 to 2011", horiz=TRUE,xlim=c(0,145000), names.arg=PropDmgPlotSet$EVTYPE,cex.names=0.8,las=1,xaxp=c(0,145000,29))
barplot(CropDmgPlotSet$CropDmgM, main="Top 10 Total Crop Damage Event Type in US from 1950 to 2011", horiz=TRUE,xlim=c(0,15000), names.arg=CropDmgPlotSet$EVTYPE,cex.names=0.8,las=1,xaxp=c(0,15000,15))
barplot(TotalDmgPlotSet$TotalDmgM, main="Top 10 Total Damage Event Type in US from 1950 to 2011",xlab="Damage Cost (in Millions)", horiz=TRUE,xlim=c(0,151000), names.arg=TotalDmgPlotSet$EVTYPE,cex.names=0.8,las=1,xaxp=c(0,150000,15))