Additionally data that did not cause fatalty/injury or economic loss are excluded from this analysis.
## Filter data starting 1996, as data prior is not available for all EVTYPES
d1$BGN_DATE <- mdy_hms(d1$BGN_DATE)
d1 <- filter(d1, BGN_DATE >= "1996-01-01")
## Filter only data that contains either Fatalities or Injuries or Prop Damage or Crop Damage
d1 <- filter(d1, FATALITIES != 0 | INJURIES != 0 | PROPDMG != 0 | CROPDMG !=0)
## Select only 7 columns that will be necessary for processing
d1 <- select(d1, EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
Adjust Crop and Property loss into standard numeric value
test <- grepl("[K]", d1$PROPDMGEXP)
d1[test,]$PROPDMG <- d1[test,]$PROPDMG * 1000
test <- grepl("[M]", d1$PROPDMGEXP)
d1[test,]$PROPDMG <- d1[test,]$PROPDMG * 1000000
test <- grepl("[B]", d1$PROPDMGEXP)
d1[test,]$PROPDMG <- d1[test,]$PROPDMG * 1000000000
test <- grepl("[K]", d1$CROPDMGEXP)
d1[test,]$CROPDMG <- d1[test,]$CROPDMG * 1000
test <- grepl("[M]", d1$CROPDMGEXP)
d1[test,]$CROPDMG <- d1[test,]$CROPDMG * 1000000
test <- grepl("[B]", d1$CROPDMGEXP)
d1[test,]$CROPDMG <- d1[test,]$CROPDMG * 1000000000
d1 <- select(d1, EVTYPE, FATALITIES, INJURIES, PROPDMG, CROPDMG)
The data contains may errors with regards to EVTYPE coding. Adjusting for various errors with end result of matching most to appropriate official EVTYPE
## convert all to lower case
d1$EVTYPE <- tolower(d1$EVTYPE)
## Official EVTYPES as outlined in National Weather Service Instruction 10-1605 (dated Aug2007)
officialEVTYPE <-
c('Astronomical Low Tide',
'Avalanche',
'Blizzard',
'Coastal Flood',
'Cold/Wind Chill',
'Debris Flow',
'Dense Fog',
'Dense Smoke',
'Drought',
'Dust Devil',
'Dust Storm',
'Excessive Heat',
'Extreme Cold/Wind Chill',
'Flash Flood',
'Flood',
'Freezing Fog',
'Frost/Freeze',
'Funnel Cloud',
'Hail',
'Heat',
'Heavy Rain',
'Heavy Snow',
'High Surf',
'High Wind',
'Hurricane/Typhoon',
'Ice Storm',
'Lakeshore Flood',
'Lake-Effect Snow',
'Lightning',
'Marine Hail',
'Marine High Wind',
'Marine Strong Wind',
'Marine Thunderstorm Wind',
'Rip Current',
'Seiche',
'Sleet',
'Storm Tide',
'Strong Wind',
'Thunderstorm Wind',
'Tornado',
'Tropical Depression',
'Tropical Storm',
'Tsunami',
'Volcanic Ash',
'Waterspout',
'Wildfire',
'Winter Storm',
'Winter Weather')
## clean EVTYPE to conform to official EVTYPE list
d1$EVTYPE[grepl("hurricane", d1$EVTYPE)] <- "hurricane/typhoon"
d1$EVTYPE[grepl("typhoon", d1$EVTYPE)] <- "hurricane/typhoon"
d1$EVTYPE[grepl("tstm wind|gusty wind", d1$EVTYPE)] <- "high wind"
d1$EVTYPE[grepl("thunderstorm", d1$EVTYPE)] <- "thunderstorm wind"
d1$EVTYPE[grepl("frost", d1$EVTYPE)] <- "frost/freezing"
d1$EVTYPE[grepl("freeze|freezing|cold", d1$EVTYPE)] <- "frost/freezing"
d1$EVTYPE[grepl("surf", d1$EVTYPE)] <- "high surf"
d1$EVTYPE[grepl("fire", d1$EVTYPE)] <- "wild fire"
d1$EVTYPE[grepl("tide", d1$EVTYPE)] <- "storm tide"
d1$EVTYPE[grepl("flash/flood|flash flood", d1$EVTYPE)] <- "flash flood"
d1$EVTYPE[grepl("stream fld|river flood|river flooding|unseasonal rain|dam break", d1$EVTYPE)] <- "flood"
d1$EVTYPE[grepl("tidal flooding|cstl flood|coastal flooding|flooding/erosion|beach erosion|coastal erosion", d1$EVTYPE)] <- "coastal flood"
d1$EVTYPE[grepl("snow", d1$EVTYPE)] <- "heavy snow"
d1$EVTYPE[grepl("heat wave", d1$EVTYPE)] <- "excessive heat"
d1$EVTYPE[grepl("landslide|mudslide|mud slide|rock slide", d1$EVTYPE)] <- "debris flow"
d1$EVTYPE[grepl("hail", d1$EVTYPE)] <- "hail"
d1$EVTYPE[grepl("extreme windchill", d1$EVTYPE)] <- "Extreme Cold/Wind Chill"
##Match data to official EVTYPE list
matchEVTYPE <- amatch(d1$EVTYPE, tolower(officialEVTYPE), maxDist = 4)
d1 <- mutate(d1, correctedEVTYPE = officialEVTYPE[matchEVTYPE])
## percentage of data that could not be place on official list
m1 <- mean(is.na(d1$correctedEVTYPE))
Unable to match 8.345213710^{-4}% of data with appropriate offical EVTYPE.