Tornados are the most damaging weather phenomenon in terms of monetary damage and in terms of lives lost. Heat is the second most dangerous phenomenon in terms of lives taken; however doens’t even register in the top ten for property damage. Winter weather is deadly but not nearly as deadly as heat although winter weather is much costlier in terms of property damage. Thunderstorms are a weather even that are very costly in terms of cost; however much lest costly in terms of lives lost. This particular analysis coud be improved with a better understaning of the costs associated with preventing property damage or mortality with the different weather events assuming the goal is better allocation of resources to prevent fatalaties and monetary damage. In addition there could be better aggregation of weather events with better domain knowledge.
url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url,destfile = "storm.csv",method = "curl")
library(data.table)
storm = data.table(read.csv("storm.csv",sep = ","))
library(plyr)
options(scipen = 999)
#
event.summary = ddply(.data = storm,
.variables = .(EVTYPE),
summarize,
count = sum(!(is.na(EVTYPE))),
fatality_sum = as.numeric(sum(FATALITIES,na.rm = TRUE)),
injuries_sum = as.numeric(sum(INJURIES,na.rm = TRUE)),
fatal_and_injury = as.numeric(sum(FATALITIES,na.rm = TRUE) +
sum(INJURIES,na.rm = TRUE))
)
library(data.table)
storm.merge = data.table(subset(storm,
select = c('EVTYPE','FATALITIES','INJURIES',
'PROPDMG','PROPDMGEXP',
'CROPDMG','CROPDMGEXP')))
#Merge Heat related events together
x = grep("HEAT",storm.merge$EVTYPE)
unique(subset(storm.merge[x,],select = c("EVTYPE")))
## EVTYPE
## 1: HEAT
## 2: EXTREME HEAT
## 3: EXCESSIVE HEAT
## 4: RECORD HEAT
## 5: HEAT WAVE
## 6: DROUGHT/EXCESSIVE HEAT
## 7: RECORD HEAT WAVE
## 8: RECORD/EXCESSIVE HEAT
## 9: HEAT WAVES
## 10: HEAT WAVE DROUGHT
## 11: HEAT/DROUGHT
## 12: HEAT DROUGHT
## 13: EXCESSIVE HEAT/DROUGHT
storm.merge[x,EVTYPE := "HEAT"]
x = grep("THUNDERSTORM",storm.merge$EVTYPE)
unique(subset(storm.merge[x,],select = c("EVTYPE")))
## EVTYPE
## 1: THUNDERSTORM WINDS
## 2: THUNDERSTORM WIND
## 3: THUNDERSTORM WINS
## 4: THUNDERSTORM WINDS LIGHTNING
## 5: THUNDERSTORM WINDS/HAIL
## 6: THUNDERSTORM WINDS HAIL
## 7: FLASH FLOODING/THUNDERSTORM WI
## 8: THUNDERSTORM
## 9: THUNDERSTORM WINDS/FUNNEL CLOU
## 10: SEVERE THUNDERSTORM
## 11: SEVERE THUNDERSTORMS
## 12: SEVERE THUNDERSTORM WINDS
## 13: THUNDERSTORMS WINDS
## 14: THUNDERSTORMS
## 15: LIGHTNING THUNDERSTORM WINDSS
## 16: THUNDERSTORM WINDS 60
## 17: THUNDERSTORM WINDSS
## 18: LIGHTNING THUNDERSTORM WINDS
## 19: LIGHTNING AND THUNDERSTORM WIN
## 20: THUNDERSTORM WINDS53
## 21: THUNDERSTORM WINDS 13
## 22: THUNDERSTORM WINDS URBAN FLOOD
## 23: THUNDERSTORM WINDS SMALL STREA
## 24: THUNDERSTORM WINDS 2
## 25: THUNDERSTORM WINDS 61
## 26: THUNDERSTORM DAMAGE
## 27: THUNDERSTORMW 50
## 28: THUNDERSTORMS WIND
## 29: THUNDERSTORM WINDS
## 30: THUNDERSTORM WINDS/ HAIL
## 31: THUNDERSTORM WIND/LIGHTNING
## 32: THUNDERSTORM WIND G50
## 33: THUNDERSTORM WINDS/HEAVY RAIN
## 34: THUNDERSTORM WINDS LE CEN
## 35: THUNDERSTORM WINDS G
## 36: THUNDERSTORM WIND G60
## 37: THUNDERSTORM WINDS.
## 38: THUNDERSTORM WIND G55
## 39: THUNDERSTORM WINDS G60
## 40: THUNDERSTORM WINDS FUNNEL CLOU
## 41: THUNDERSTORM WINDS 62
## 42: THUNDERSTORM WINDS/FLASH FLOOD
## 43: THUNDERSTORM WINDS 53
## 44: THUNDERSTORM WIND 59
## 45: THUNDERSTORM WIND 52
## 46: THUNDERSTORM WIND 69
## 47: THUNDERSTORMW WINDS
## 48: THUNDERSTORM WIND 60 MPH
## 49: THUNDERSTORM WIND 65MPH
## 50: THUNDERSTORM WIND/ TREES
## 51: THUNDERSTORM WIND/AWNING
## 52: THUNDERSTORM WIND 98 MPH
## 53: THUNDERSTORM WIND TREES
## 54: THUNDERSTORM WIND 59 MPH
## 55: THUNDERSTORM WINDS 63 MPH
## 56: THUNDERSTORM WIND/ TREE
## 57: THUNDERSTORM DAMAGE TO
## 58: THUNDERSTORM WIND 65 MPH
## 59: THUNDERSTORM WIND.
## 60: THUNDERSTORM WIND 59 MPH.
## 61: THUNDERSTORM HAIL
## 62: THUNDERSTORM WINDSHAIL
## 63: THUNDERSTORM WINDS AND
## 64: THUNDERSTORM WINDS 50
## 65: THUNDERSTORM WIND G52
## 66: THUNDERSTORM WINDS 52
## 67: THUNDERSTORM WIND G51
## 68: THUNDERSTORM WIND G61
## 69: THUNDERSTORM WINDS/FLOODING
## 70: THUNDERSTORM W INDS
## 71: THUNDERSTORM WIND 50
## 72: THUNDERSTORM WIND 56
## 73: THUNDERSTORM WIND/HAIL
## 74: THUNDERSTORMW
## 75: THUNDERSTORM WINDS/ FLOOD
## 76: THUNDERSTORMWINDS
## 77: THUNDERSTORM WINDS HEAVY RAIN
## 78: THUNDERSTORM WIND (G40)
## 79: GUSTY THUNDERSTORM WINDS
## 80: GUSTY THUNDERSTORM WIND
## 81: MARINE THUNDERSTORM WIND
## EVTYPE
storm.merge[x,EVTYPE := "THUNDERSTORMS"]
x = grep("LIGHTNING",storm.merge$EVTYPE)
unique(subset(storm.merge[x,],select = c("EVTYPE")))
## EVTYPE
## 1: LIGHTNING
## 2: LIGHTNING AND HEAVY RAIN
## 3: HEAVY RAIN/LIGHTNING
## 4: LIGHTNING/HEAVY RAIN
## 5: LIGHTNING INJURY
## 6: LIGHTNING.
## 7: LIGHTNING FIRE
## 8: LIGHTNING DAMAGE
## 9: LIGHTNING AND WINDS
## 10: LIGHTNING WAUSEON
## 11: TSTM WIND AND LIGHTNING
## 12: LIGHTNING
storm.merge[x,EVTYPE := "LIGHTNING"]
x = grep("COLD",storm.merge$EVTYPE)
unique(subset(storm.merge[x,],select = c("EVTYPE")))
## EVTYPE
## 1: RECORD COLD
## 2: COLD
## 3: EXTREME COLD
## 4: RECORD COLD AND HIGH WIND
## 5: UNSEASONABLY COLD
## 6: EXTREME/RECORD COLD
## 7: SEVERE COLD
## 8: COLD WAVE
## 9: COLD AND WET CONDITIONS
## 10: COLD AIR FUNNEL
## 11: COLD AIR FUNNELS
## 12: COLD AIR TORNADO
## 13: PROLONG COLD
## 14: FOG AND COLD TEMPERATURES
## 15: SNOW/COLD
## 16: RECORD COLD/FROST
## 17: RECORD SNOW/COLD
## 18: HIGH WINDS/COLD
## 19: COLD/WINDS
## 20: SNOW/ BITTER COLD
## 21: COLD WEATHER
## 22: SNOW AND COLD
## 23: PROLONG COLD/SNOW
## 24: SNOW\\COLD
## 25: COLD AND SNOW
## 26: COLD AND FROST
## 27: COLD TEMPERATURES
## 28: COLD WIND CHILL TEMPERATURES
## 29: RECORD COLD
## 30: UNUSUALLY COLD
## 31: EXTREME COLD/WIND CHILL
## 32: COLD/WIND CHILL
## EVTYPE
storm.merge[x,EVTYPE := "COLD"]
x = grep("HURRICANE|TYPHOON",storm.merge$EVTYPE)
unique(subset(storm.merge[x,],select = c("EVTYPE")))
## EVTYPE
## 1: HURRICANE OPAL/HIGH WINDS
## 2: HURRICANE ERIN
## 3: HURRICANE OPAL
## 4: HURRICANE
## 5: HURRICANE-GENERATED SWELLS
## 6: HURRICANE EMILY
## 7: HURRICANE GORDON
## 8: HURRICANE FELIX
## 9: TYPHOON
## 10: HURRICANE/TYPHOON
storm.merge[x,EVTYPE := "HURRICANE/TYPHOON"]
x = grep("CURRENT",storm.merge$EVTYPE)
unique(subset(storm.merge[x,],select = c("EVTYPE")))
## EVTYPE
## 1: RIP CURRENT
## 2: RIP CURRENTS HEAVY SURF
## 3: RIP CURRENTS/HEAVY SURF
## 4: RIP CURRENTS
storm.merge[x,EVTYPE := "CURRENT"]
x = grep("SNOW|BLIZZARD|WINTER",storm.merge$EVTYPE)
unique(subset(storm.merge[x,],select = c("EVTYPE")))
## EVTYPE
## 1: SNOW
## 2: SNOW/ICE
## 3: WINTER STORM
## 4: BLIZZARD
## 5: BLIZZARD WEATHER
## ---
## 117: SNOW ADVISORY
## 118: UNUSUALLY LATE SNOW
## 119: ACCUMULATED SNOWFALL
## 120: FALLING SNOW/ICE
## 121: WINTER WEATHER/MIX
storm.merge[x,EVTYPE := "WINTER WEATHER"]
x = grep("WIND",storm.merge$EVTYPE)
unique(subset(storm.merge[x,],select = c("EVTYPE")))
## EVTYPE
## 1: TSTM WIND
## 2: HIGH WINDS
## 3: WIND
## 4: HIGH WIND
## 5: WIND CHILL
## ---
## 110: NON TSTM WIND
## 111: MARINE TSTM WIND
## 112: WHIRLWIND
## 113: MARINE HIGH WIND
## 114: MARINE STRONG WIND
storm.merge[x,EVTYPE := "WIND"]
x = grep("FIRE",storm.merge$EVTYPE)
unique(subset(storm.merge[x,],select = c("EVTYPE")))
## EVTYPE
## 1: WILD FIRES
## 2: WILDFIRE
## 3: WILD/FOREST FIRE
## 4: GRASS FIRES
## 5: FOREST FIRES
## 6: WILDFIRES
## 7: WILD/FOREST FIRES
## 8: BRUSH FIRES
## 9: BRUSH FIRE
## 10: RED FLAG FIRE WX
storm.merge[x,EVTYPE := "FIRE"]
x = grep("FLOOD",storm.merge$EVTYPE)
unique(subset(storm.merge[x,],select = c("EVTYPE")))
## EVTYPE
## 1: ICE STORM/FLASH FLOOD
## 2: FLASH FLOOD
## 3: FLASH FLOODING
## 4: FLOODING
## 5: FLOOD
## 6: BREAKUP FLOODING
## 7: RIVER FLOOD
## 8: COASTAL FLOOD
## 9: FLOOD WATCH/
## 10: FLASH FLOODS
## 11: FLOODING/HEAVY RAIN
## 12: HEAVY SURF COASTAL FLOODING
## 13: URBAN FLOODING
## 14: URBAN/SMALL FLOODING
## 15: LOCAL FLOOD
## 16: FLOOD/FLASH FLOOD
## 17: URBAN/SMALL STREAM FLOODING
## 18: STREAM FLOODING
## 19: FLASH FLOOD/
## 20: SMALL STREAM URBAN FLOOD
## 21: URBAN FLOOD
## 22: HEAVY RAIN/FLOODING
## 23: COASTAL FLOODING
## 24: URBAN/SMALL STREAM FLOOD
## 25: MINOR FLOODING
## 26: URBAN/SMALL STREAM FLOOD
## 27: URBAN AND SMALL STREAM FLOOD
## 28: SMALL STREAM FLOODING
## 29: FLOODS
## 30: SMALL STREAM AND URBAN FLOODIN
## 31: SMALL STREAM/URBAN FLOOD
## 32: SMALL STREAM AND URBAN FLOOD
## 33: RURAL FLOOD
## 34: MAJOR FLOOD
## 35: ICE JAM FLOODING
## 36: STREET FLOOD
## 37: SMALL STREAM FLOOD
## 38: LAKE FLOOD
## 39: URBAN AND SMALL STREAM FLOODIN
## 40: RIVER AND STREAM FLOOD
## 41: MINOR FLOOD
## 42: RIVER FLOODING
## 43: FLOOD/RIVER FLOOD
## 44: MUD SLIDES URBAN FLOODING
## 45: HAIL FLOODING
## 46: HEAVY RAIN AND FLOOD
## 47: LOCAL FLASH FLOOD
## 48: FLOOD/FLASH FLOODING
## 49: COASTAL/TIDAL FLOOD
## 50: FLASH FLOOD/FLOOD
## 51: FLASH FLOOD FROM ICE JAMS
## 52: FLASH FLOOD - HEAVY RAIN
## 53: FLASH FLOOD/ STREET
## 54: FLASH FLOOD/HEAVY RAIN
## 55: FLOOD FLASH
## 56: FLOOD FLOOD/FLASH
## 57: TIDAL FLOOD
## 58: FLOOD/FLASH
## 59: HEAVY RAINS/FLOODING
## 60: HIGHWAY FLOODING
## 61: FLASH FLOOD/ FLOOD
## 62: HEAVY RAIN/MUDSLIDES/FLOOD
## 63: BEACH EROSION/COASTAL FLOOD
## 64: FLASH FLOODING/FLOOD
## 65: BEACH FLOOD
## 66: FLOOD & HEAVY RAIN
## 67: FLOOD/FLASHFLOOD
## 68: URBAN SMALL STREAM FLOOD
## 69: URBAN FLOOD LANDSLIDE
## 70: URBAN FLOODS
## 71: HEAVY RAIN/URBAN FLOOD
## 72: FLASH FLOOD/LANDSLIDE
## 73: LANDSLIDE/URBAN FLOOD
## 74: FLASH FLOOD LANDSLIDES
## 75: COASTALFLOOD
## 76: STREET FLOODING
## 77: TIDAL FLOODING
## 78: COASTAL FLOOD
## 79: COASTAL FLOODING/EROSION
## 80: URBAN/STREET FLOODING
## 81: COASTAL FLOODING/EROSION
## 82: FLOOD/FLASH/FLOOD
## 83: FLASH FLOOD
## 84: CSTL FLOODING/EROSION
## 85: LAKESHORE FLOOD
## EVTYPE
storm.merge[x,EVTYPE := "FLOOD"]
library(plyr)
options(scipen = 999)
#Review the new groups
event.summary2 = ddply(.data = storm.merge,
.variables = .(EVTYPE),
summarize,
count = sum(!(is.na(EVTYPE))),
fatality_sum = as.numeric(sum(FATALITIES,na.rm = TRUE)),
injuries_sum = as.numeric(sum(INJURIES,na.rm = TRUE)),
fatal_inj_total = as.numeric(sum(FATALITIES,na.rm = TRUE) +
sum(INJURIES,na.rm = TRUE)),
cost_total = as.numeric(sum(PROPDMG,na.rm = TRUE)) +
as.numeric(sum(CROPDMG,na.rm = TRUE))
)
summary(event.summary2)
## EVTYPE count fatality_sum
## HIGH SURF ADVISORY: 1 Min. : 1 Min. : 0.00
## WATERSPOUT : 1 1st Qu.: 1 1st Qu.: 0.00
## ? : 1 Median : 2 Median : 0.00
## ABNORMAL WARMTH : 1 Mean : 1759 Mean : 29.52
## ABNORMALLY DRY : 1 3rd Qu.: 5 3rd Qu.: 0.00
## ABNORMALLY WET : 1 Max. :288661 Max. :5633.00
## (Other) :507
## injuries_sum fatal_inj_total cost_total
## Min. : 0.0 Min. : 0.0 Min. : 0
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0
## Median : 0.0 Median : 0.0 Median : 0
## Mean : 273.9 Mean : 303.5 Mean : 23903
## 3rd Qu.: 0.0 3rd Qu.: 0.0 3rd Qu.: 16
## Max. :91346.0 Max. :96979.0 Max. :3312277
##
#Get top 10 events with 200 or more casualties
fatality.top10 = subset(event.summary2,fatality_sum > 200)
#Create bar graph of events
library(ggplot2)
ggplot(fatality.top10,
aes(x=reorder(EVTYPE,fatality_sum), y=fatality_sum)) +
coord_flip() +
geom_bar(stat='identity') +
xlab("Event") +
ylab("Fatalities") +
ggtitle("Weather Event Fatality totals")
#Get top 10 events with 200 or more casualties
cost.top10 = subset(event.summary2,cost_total > 50000)
#Create bar graph of events
library(ggplot2)
ggplot(cost.top10,
aes(x=reorder(EVTYPE,cost_total), y=cost_total)) +
coord_flip() +
geom_bar(stat='identity') +
xlab("Event") +
ylab("Cost") +
ggtitle("Weather Event Cost totals (thousands)")