Created by Jianfa Chen on August 14, 2016.
All rights reserved.
I analyzed the data from U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database (1950-2011).
* The top five events with most harmful result to population health is Tornado, Thunderstorm Wind, Excessive Heat, Flood, and Lightning.
* The top five events caused greatest economic consequence is Flood,Hurricane,Tornado,Storm Surge/Tide, and Hail.
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
"./stormData.csv.bz2")
weatherData <- read.csv("stormData.csv.bz2",header = TRUE,na.strings = "")
There are many variables in the data set, but only part of them are used in our analysis. We grab those useful variables for further analysis.
names(weatherData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
weatherData <- weatherData[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP",
"CROPDMG","CROPDMGEXP")] #grab useful variables
head(weatherData)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0 <NA>
## 2 TORNADO 0 0 2.5 K 0 <NA>
## 3 TORNADO 0 2 25.0 K 0 <NA>
## 4 TORNADO 0 2 2.5 K 0 <NA>
## 5 TORNADO 0 2 2.5 K 0 <NA>
## 6 TORNADO 0 6 2.5 K 0 <NA>
str(weatherData)
## 'data.frame': 902297 obs. of 7 variables:
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 18 levels "-","?","+","0",..: 16 16 16 16 16 16 16 16 16 16 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 8 levels "?","0","2","B",..: NA NA NA NA NA NA NA NA NA NA ...
summary(weatherData)
## EVTYPE FATALITIES INJURIES
## HAIL :288661 Min. : 0.0000 Min. : 0.0000
## TSTM WIND :219940 1st Qu.: 0.0000 1st Qu.: 0.0000
## THUNDERSTORM WIND: 82563 Median : 0.0000 Median : 0.0000
## TORNADO : 60652 Mean : 0.0168 Mean : 0.1557
## FLASH FLOOD : 54277 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## FLOOD : 25326 Max. :583.0000 Max. :1700.0000
## (Other) :170878
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## Min. : 0.00 K :424665 Min. : 0.000 K :281832
## 1st Qu.: 0.00 M : 11330 1st Qu.: 0.000 M : 1994
## Median : 0.00 0 : 216 Median : 0.000 k : 21
## Mean : 12.06 B : 40 Mean : 1.527 0 : 19
## 3rd Qu.: 0.50 5 : 28 3rd Qu.: 0.000 B : 9
## Max. :5000.00 (Other): 84 Max. :990.000 (Other): 9
## NA's :465934 NA's :618413
When look at the detail of data, we find several problems: - 985 types of events in the data set should be catogerized into 48 groups listed in storm data event table; - PROPDMG should combine with PROPDMGEXP for further analysis, so does CROPDMG and CROPDMGEXP; - Population health damanage should be the sum of FATALITIES and INJURIES; - Economic consequence should be evaluted by the sum of PROPDMG and CROPDMG.
Eliminate the number of event types from 985 to standard 48.
weatherData <- weatherData[-grep("Summary",weatherData$EVTYPE),] # eliminate summary data
event <- weatherData$EVTYPE
# 1.Avalanche
event <- sub(".*AVALANCHE.*|^AVALANCE.*","Avalanche",event)
# 2.Debris Flow
event <- sub(".*DEBRIS.*","Debris Flow",event)
# 3.Waterspout
event <-sub(".*WATER.*SPOUT.*","Waterspout",event)
# 4.Volcanic Ash
event <- sub(".*VOLCANIC.*|.*Volcanic.*","Volcanic Ash",event)
# 5.Wildfire
event <- sub(".*WILD.*|.*FIRE.*|.*Fire.*|.*fire.*","Wildfire",event)
# 6.Drought
event <- sub(".*DROUGHT.*|.*DRY.*|.*Dry.*|.*dry.*","Drought",event)
# 7.Tsunami
event <- sub("TSUNAMI","Tsunami",event)
# 8.Tornado
event <-sub(".*TORN.*","Tornado",event)
# 9.Hurricane
event <- sub(".*HURR.*|.*Hurr.*|.*TYPHOON.*","Hurricane",event)
# 10.Ice Storm
event <- sub(".*ICE.*STORM.*|Icestorm.*","Ice Storm",event)
# 11.Funnel Cloud
event <- sub(".*FUNNEL.*|.*Funnel.*","Funnel Cloud",event)
# 12.Freezing Fog
event <- sub(".*Freezing Fog.*|.*FREEZING FOG.*|.*Ice Fog.*|Freezing Spray","Freezing Fog",event)
# 13.Dense Fog
event <- sub(".*FOG.*","Dense Fog",event)
# 14.Dense Smoke
event <- sub(".*SMOKE.*","Dense Smoke",event)
# 15.Dust Storm
event <- sub(".*DUST.*STORM.*","Dust Storm",event)
# 16.Dust Devil
event <- sub(".*DUST.*DEVIL.*|.*DUST.*DEVEL.*|.*SAHARAN.*DUST.*|.*Saharan.*Dust.*|Dust Devil|BLOWING DUST","Dust Devil",event)
# 17.Seiche
event <- sub("SEICHE","Seiche",event)
# 18. Sleet
event <- sub(".*SLEET.*|.*Sleet.*|.*sleet.*","Sleet",event)
# 19.Rip Current
event <- sub(".*RIP.*","Rip Current",event)
# 20.Storm Surge/Tide
event <- sub(".*SURGE.*|.*HIGH.*TIDE.*|.*BLOW.*TIDE.*","Storm Surge/Tide",event)
# 21.High Surf
event <- sub(".*SURF.*|.*Surf.*|.*surf.*|HEAVY SEAS|HIGH WAVES","High Surf",event)
# 22.Tropical Depression
event <- sub(".*TROPICAL DEPRESSION.*","Tropical Depression",event)
# 23.Tropical Storm
event <- sub(".*TROPICAL.*","Tropical Storm",event)
# 24.Blizzard
event <- sub(".*BLIZZ.*|.*Blizz.*","Blizzard",event)
# 25.Astronomical Low Tide
event <- sub(".*LOW TIDE.*","Astronomical Low Tide",event)
# 26.Excessive Heat
event <- sub(".*EX.*HEAT.*|Record Heat|RECORD HEAT.*|RECORD HIGH.*|HIGH.*RECORD.*","Excessive Heat",event)
# 27.Heat
event <- sub(".*HEAT.*|Heatburst|Heat Wave|.*HOT.*|.*hot.*|.*HIGH.*SWELL.*","Heat",event)
# 28.Marine Hail
event <- sub("MARINE HAIL","Marine Hail",event)
# 29.Marine High Wind
event <- sub("MARINE HIGH WIND","Marine High Wind",event)
# 30.Marine Strong Wind
event <- sub("MARINE STRONG WIND","Marine Strong Wind",event)
# 31.Marine Thunderstorm Wind
event <- sub("MARINE THUNDERSTORM.*|MARINE TSTM.*","Marine Thunderstorm Wind",event)
# 32.Lake-Effect Snow
event <- sub(".*LAKE.*SNOW.*|.*Lake.*Snow.*","Lake-Effect Snow",event)
# 33.Lakeshore Flood
event <- sub(".*LAKE.*FLOOD.*","Lakeshore Flood",event)
# 34. Coastal Flood
event <- sub(".*COAS.*FLOOD.*|.*Coas.*Flood.*|.*coas.*flood.*|.*COAS.*EROS.*|Tidal Flooding|CSTL FLOODING|TIDAL FLOODING","Coastal Flood",event)
# 35.Hail
event <- sub(".*HAIL.*|^Hail.*|.*hail.*|.*PRECIPATATION.*|Heavy Precipitation","Hail",event)
# 36.Frost/Freeze
event <- sub(".*FROST.*|.*Frost.*|.*frost.*|.*FREEZE.*|.*Freez.*drizzle.*|.*Freez.*rain.*|.*Freez.*Rain.*|.*ICY.*|.*ICE.*|GLAZE","Frost/Freeze",event)
# 37.Extreme Cold/Wind Chill
event <- sub(".*EX.*COLD.*|.*EX.*WIND.*|.*RECORD.*CODE.*|.*Ex.*Cold.*|.*UN.*COLD.*|.*PRO.*COLD.*|.*Pro.*Cold.*|.*SEVERE.*COLD.*|.*Un.*Cold.*|.*Record.*Cold.*|.*BITTER.*CHILL.*","Extreme Cold",event)
# 38.Cold/Wind Chill
event <- sub(".*COLD.*|Cold|.*CHILL.*|RECORD LOW","Cold/Wind Chill",event)
# 39.Thunderstorm Wind
event <- sub(".*THUNDER.*|.*TSTM.*|^Thun.*|.*Tstm.*","Thunderstorm Wind",event)
# 40. Strong Wind
event <- sub(".*STRONG.*WIND.*|^Strong.*Wind.*|.*Strong.*wind.*","Strong Wind",event)
# 41.Lightning
event <- sub(".*LIGHTNING.*|LIGHTING|LIGNTNING","Lightning",event)
# 42. Flash Flood
event <- sub(".*FLASH.*|.*Flash.*","Flash Flood",event)
# 43.Flood
event <- sub(".*FLOOD.*|.*flood.*|.*River.*Flood.*|.*Urban.*Flood.*","Flood",event)
# 44.Heavy Rain
event <- sub(".*RAIN.*|.*Heavy.*Rain.*|.*Heavy.*rain.*","Heavy Rain",event)
# 45.Heavy Snow
event <- sub(".*HEAVY.*SNOW|.*Heavy.*Snow.*|.*SNOW.*|.*snow.*|.*Record.*Snow.*","Heavy Snow",event)
# 46. High Wind
event <- sub(".*WIND.*|.*wind.*|.*Winds.*|.*Wind.*Dama.*|.*Gusty.*Wind.*|WND","High Wind",event)
# 47. Winter Storm
event <- sub(".*WINTER.*.*STORM.*","Winter Storm",event)
# 48.Winter Weather
event <- sub(".*WINTER.*|^Snow.*","Winter Weather",event)
# others
standardEvent <- c("Astronomical Low Tide","Avalanche","Blizzard","Coastal Flood","Cold/Wind Chill","Debris Flow",
"Dense Fog","Dense Smoke","Drought","Dust Devil","Dust Storm","Excessive Heat",
"Extreme Cold/Wind Chill","Flash Flood","Flood","Frost/Freeze","Funnel Cloud","Freezing Fog",
"Hail","Heat","Heavy Rain","Heavy Snow","High Surf","High Wind","Hurricane","Ice Storm",
"Lake-Effect Snow","Lakeshore Flood","Lightning","Marine Hail","Marine High Wind",
"Marine Strong Wind","Marine Thunderstorm Wind","Rip Current","Seiche","Sleet",
"Storm Surge/Tide","Strong Wind","Thunderstorm Wind","Tornado","Tropical Depression",
"Tropical Storm","Tsunami","Volcanic Ash","Waterspout","Wildfire","Winter Storm",
"Winter Weather")
event[!event %in% standardEvent] <- "Others"
weatherData$EVTYPE <- event
Clean PROPDMGEXP and CROPDMGEXP, and compute the economic damage and population health damage.
PROPDMGEXP <- weatherData$PROPDMGEXP
PROPDMGEXP <- sub(".*K.*|.*k.*",1000,PROPDMGEXP)
PROPDMGEXP <- sub(".*M.*|.*m.*",1000000,PROPDMGEXP)
PROPDMGEXP <- sub(".*B.*|.*b.*",1000000000,PROPDMGEXP)
PROPDMGEXP[! PROPDMGEXP %in% c(1000,1000000,1000000000)] <- 1
weatherData$PROPDMGEXP <- as.numeric(PROPDMGEXP)
CROPDMGEXP <- weatherData$CROPDMGEXP
CROPDMGEXP <- sub(".*K.*|.*k.*",1000,CROPDMGEXP)
CROPDMGEXP <- sub(".*M.*|.*m.*",1000000,CROPDMGEXP)
CROPDMGEXP <- sub(".*B.*|.*b.*",1000000000,CROPDMGEXP)
CROPDMGEXP[! CROPDMGEXP %in% c(1000,1000000,1000000000)] <- 1
weatherData$CROPDMGEXP <- as.numeric(CROPDMGEXP)
weatherData$EconomicDamage <- weatherData$PROPDMG*weatherData$PROPDMGEXP+
weatherData$CROPDMG*weatherData$CROPDMGEXP
weatherData$HealthDamage <- weatherData$FATALITIES + weatherData$INJURIES
Population health damage and economic consequence caused by each event type.
healthDamageTable <- with(weatherData,tapply(HealthDamage,EVTYPE,sum))
healthDamageTable<- sort(healthDamageTable,decreasing = TRUE)
healthDamageTable
## Tornado Thunderstorm Wind Excessive Heat
## 97022 10122 8748
## Flood Lightning Heat
## 7278 6048 3594
## Flash Flood Ice Storm High Wind
## 2835 2081 1903
## Wildfire Winter Storm Hail
## 1698 1554 1487
## Hurricane Heavy Snow Dense Fog
## 1468 1290 1158
## Rip Current Blizzard Winter Weather
## 1106 906 602
## Extreme Cold/Wind Chill Dust Storm Frost/Freeze
## 567 462 451
## Tropical Storm Others Heavy Rain
## 449 440 414
## Strong Wind High Surf Avalanche
## 412 411 396
## Cold/Wind Chill Tsunami Drought
## 222 162 86
## Waterspout Storm Surge/Tide Marine Thunderstorm Wind
## 78 67 53
## Dust Devil Marine Strong Wind Coastal Flood
## 45 36 14
## Funnel Cloud Marine High Wind Sleet
## 3 2 2
## Freezing Fog Astronomical Low Tide Debris Flow
## 1 0 0
## Dense Smoke Lake-Effect Snow Lakeshore Flood
## 0 0 0
## Marine Hail Seiche Tropical Depression
## 0 0 0
## Volcanic Ash
## 0
barplot(healthDamageTable[1:5],main = "Top five events most harmful to population health")
EconomicDamageTable <- with(weatherData,tapply(EconomicDamage,EVTYPE,sum))
EconomicDamageTable <- sort(EconomicDamageTable,decreasing = TRUE)
EconomicDamageTable
## Flood Hurricane Tornado
## 161002698635 90872527810 58959394049
## Storm Surge/Tide Hail Flash Flood
## 47975504150 19131699945 18439375337
## Drought Thunderstorm Wind Ice Storm
## 15025675380 12132326792 8968141360
## Wildfire Tropical Storm Winter Storm
## 8904910130 8409286550 6716441251
## High Wind Heavy Rain Frost/Freeze
## 6638663443 4040700990 2005965550
## Extreme Cold/Wind Chill Heavy Snow Lightning
## 1437455900 1095815802 945824537
## Blizzard Others Excessive Heat
## 771973950 628688900 505270700
## Coastal Flood Heat Strong Wind
## 433401060 419273550 249327740
## Cold/Wind Chill Tsunami High Surf
## 243694000 144082000 116525000
## Waterspout Winter Weather Lake-Effect Snow
## 60730700 42418000 40682000
## Dense Fog Dust Storm Avalanche
## 22829500 9199000 8721800
## Lakeshore Flood Marine Thunderstorm Wind Freezing Fog
## 7570000 5907400 2182000
## Tropical Depression Sleet Marine High Wind
## 1737000 1500000 1297010
## Seiche Dust Devil Volcanic Ash
## 980000 738630 500000
## Marine Strong Wind Astronomical Low Tide Funnel Cloud
## 418330 320000 199600
## Rip Current Dense Smoke Marine Hail
## 163000 100000 4000
## Debris Flow
## 0
barplot(EconomicDamageTable[1:5],main = "Five event type caused greatest economic consequence")
Sys.info()
## sysname
## "Darwin"
## release
## "13.4.0"
## version
## "Darwin Kernel Version 13.4.0: Wed Mar 18 16:20:14 PDT 2015; root:xnu-2422.115.14~1/RELEASE_X86_64"
## nodename
## "Jianfas-MacBook-Air.local"
## machine
## "x86_64"
## login
## "jianfa"
## user
## "jianfa"
## effective_user
## "jianfa"