Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. The objective is to establish which type of event group have the most cost in damage, fatalities and injuries.
From 1950 to 2011 a total of 902,297 events were reported, which the 50% (448,877) are related to events of Wind, Funnel, Tornados the 37% (335,609) of the events are related to cold weather such as Hail, Heavy Snow, Winter storm/Blizzards and the remaining 13% are events related to rain, heat weather and sea (like heavy rains, wildfire and hurricanes).
The group of events that has higher cost accumulated in damage during the period of 61 years has been the Rain and floods related events with an average of 5 million of dollars per event and the most damaging per event has been the sea related events (hurricanes for example) with an average of 94 millions of dollar in damage.
Related to fatalities reported, the events related with Winds/Tornados have the higher sum of deaths in the same period with 7,909 cases with an average of 2.2 deaths per event, following by the heat related events with 3,072 deaths but with an average of almost 4 deaths.
According to injuries, as fatalities, the wind event group also has the higher number of injuries reported with 108,209 cases with an average of 7 injuries per event. The heat related events has an average of 18 injuries per event and the group of rain and floods has an average of 13 injuries per event.
In conclusion, the events related to Sea and coast has higher cost per event, but overall, since are more frequent the rain and floods has the most accumulated direct damages in cost; the most deadly group of events are the wind related but per event, the high temperature events cause more deaths and finally the injuries also the wind related events affects more than the others event group.
A limitation of this analysis is the re-clasification of the event type since in the dataset the event type is an open field type, to overcome this problem, a new re coded variable was created and also a group of event type with 6 groups.
Bellow see the code and the result graph to enrich the previous summary.
db_mod <-db %>%
select(
state,state_2,
evtype,
bgn_date,
end_date,
length,
width,
mag,
fatalities,
injuries,
propdmg,
propdmgexp,
cropdmg,
cropdmgexp) %>%
mutate(
bgn_date2=as.Date(mdy_hms(bgn_date)),
year_evt=year(bgn_date2),
propdmgexp=toupper(propdmgexp),
real_value=as.numeric(case_when(propdmgexp=="H" & propdmg>0~propdmg*100,
propdmgexp=="K" & propdmg>0~propdmg*1000,
propdmgexp=="M" & propdmg>0~propdmg*1000000,
propdmgexp=="B" & propdmg>0~propdmg*1000000000,
propdmg==0~0,
TRUE~0)),
evtype=toupper(evtype),
evtype_recode=case_when((str_detect(evtype, "WIND")|
str_detect(evtype, "TURBULENCE")|
str_detect(evtype, "TORN")|
str_detect(evtype, "GUSTNADO")|
str_detect(evtype, "THUNDERSTORM")|
str_detect(evtype, "APACHE")|
str_detect(evtype, "LIGHT")|
str_detect(evtype, "LIGNTNING")|
str_detect(evtype, "MICROBURST")|
str_detect(evtype, "LANDSPOUT")|
str_detect(evtype, "FUNNEL"))~"WIND, FUNNEL AND TORNADO \n RELATED EVENTS",
(str_detect(evtype, "HURRICANE")|
str_detect(evtype, "TROPICAL")|
str_detect(evtype, "MARINE")|
str_detect(evtype, "RIP CURRENT")|
str_detect(evtype, "TSUNAMI")|
str_detect(evtype, "WATER")|
str_detect(evtype, "WAVE")|
str_detect(evtype, "BEACH")|
str_detect(evtype, "DAM ")|
str_detect(evtype, "STORM SURGE")|
str_detect(evtype, "COASTAL")|
str_detect(evtype, "LANDSLIDE")|
str_detect(evtype, "SEAS")|
str_detect(evtype, "SWELL")|
str_detect(evtype, "SEICHE")|
str_detect(evtype, "TIDE")|
str_detect(evtype, "SURF")|
str_detect(evtype, "DROWNING")|
str_detect(evtype, "TYPHOON"))~"SEA, LAKE, TIDE, TROPICAL STORM, \n TYPHON OR HURRICANE RELATED EVENTS",
(str_detect(evtype, "FLOO")|
str_detect(evtype, "RAIN")|
str_detect(evtype, "PRECIP")|
str_detect(evtype, "DOWNBURST")|
str_detect(evtype, "LIGHTNING")|
str_detect(evtype, "RAINFALL")|
str_detect(evtype, "HEAVY MIX")|
str_detect(evtype, "RAINSTORM")|
str_detect(evtype, "STREAM")|
str_detect(evtype, "MUDSLIDE")|
str_detect(evtype, "MUD SLIDE")|
str_detect(evtype, "LANDSLUMP")|
str_detect(evtype, "ROCK")|
str_detect(evtype, "URBAN")|
str_detect(evtype, "SHOWER"))~"RAIN AND FLOODS \n RELATED EVENTS",
(str_detect(evtype, "HEAT")|
str_detect(evtype, "DROUGHT")|
str_detect(evtype, "DRY")|
str_detect(evtype, "VOLCANIC")|
str_detect(evtype, "HYPERTHERMIA")|
str_detect(evtype, "HIGH TEMPERATURE")|
str_detect(evtype, "HOT")|
str_detect(evtype, "SMOKE")|
str_detect(evtype, "FIRE")|
str_detect(evtype, "DUST"))~"HEAT WEATHER, FIRE, SMOKE \n OR VOLCANO RELATED EVENTS",
(str_detect(evtype, "COLD")|
str_detect(evtype, "COOL")|
str_detect(evtype, "ICY")|
str_detect(evtype, "SNOW")|
str_detect(evtype, "FROST")|
str_detect(evtype, "FREEZ")|
str_detect(evtype, "SNOW")|
str_detect(evtype, "BLIZZARD")|
str_detect(evtype, "GLAZE")|
str_detect(evtype, "WINTRY MIX")|
str_detect(evtype, "HEAVY MIX")|
str_detect(evtype, "ICE")|
str_detect(evtype,"LOW TEMPERATURE")|
str_detect(evtype, "WINTER")|
str_detect(evtype, "FOG")|
str_detect(evtype, "SLEET")|
str_detect(evtype, "HYPOTHERMIA")|
str_detect(evtype, "HAIL")|
str_detect(evtype, "AVALA"))~"COLD WEATHER \n RELATED EVENTS",
TRUE~"OTHERS"))
#total of events per group
event_clasification=db_mod %>%
tabyl(evtype_recode)
total_events=nrow(db_mod)
maxyear=max(db_mod$year_evt)
minyear=min(db_mod$year_evt)
events_with_cost=nrow(db_mod %>% filter(real_value>0))
events_with_fatalities=nrow(db_mod %>% filter(fatalities>0))
events_with_injuries=nrow(db_mod %>% filter(injuries>0))
#Cost
dba=db_mod %>%
group_by(evtype_recode) %>%
filter(real_value>0) %>%
summarise(tot_event=n(),
tot_cost=sum(real_value),
mean_cost=mean(real_value,na.rm=T),
min_cost=min(real_value, na.rm=T),
max_cost=max(real_value, na.rm=T),
.groups = "drop") %>%
left_join(event_clasification, by="evtype_recode") %>%
mutate(label=paste0(evtype_recode,"\n (",comma(tot_event)," of ",comma(n),")"))
ggplot(dba , aes(y=tot_cost, x=reorder(as.factor(label), desc(tot_cost))))+
geom_col(color="black", fill="lightblue")+
scale_y_continuous(label=dollar)+
geom_text(aes(y=tot_cost, x=reorder(as.factor(label), desc(tot_cost)),
label=paste0(dollar(mean_cost),"\n","(Min=",dollar(min_cost),", Max=",dollar(max_cost),")")),
vjust=-0.2)+
labs(y="Total Cost in US dollars",
x="Event groups (n=Total of events)",
title = paste0("Estimated cost in damages per weather events type from ",minyear," to ",maxyear),
subtitle = paste0("Average cost per event in each group (with min and max cost) of ",comma(events_with_cost),
" events with reported damage cost of ",
comma(total_events)," total events"))+
theme_classic()+
theme(axis.text=element_text(face="bold"))
#fatalities
dbb=db_mod %>%
group_by(evtype_recode) %>%
filter(fatalities>0) %>%
summarise(tot_event=n(),
tot_fatalities=sum(fatalities,na.rm = T),
mean_fat=mean(fatalities,na.rm=T),
min_fat=min(fatalities, na.rm=T),
max_fat=max(fatalities, na.rm=T),
.groups = "drop") %>%
left_join(event_clasification, by="evtype_recode") %>%
mutate(label=paste0(evtype_recode,"\n (",comma(tot_event)," of ",comma(n),")"))
ggplot(dbb , aes(y=tot_fatalities, x=reorder(as.factor(label), desc(tot_fatalities))))+
geom_col(color="black", fill="lightgreen")+
scale_y_continuous(label=comma)+
geom_text(aes(y=tot_fatalities, x=reorder(as.factor(label), desc(tot_fatalities)),
label=paste0(comma(mean_fat),"\n","(Min=",comma(min_fat),", Max=",comma(max_fat),")")),
vjust=-0.2)+
labs(y="Total Fatalities registered",
x="Event groups (n=Total of events)",
title = paste0("Fatalities reported by Weather event group from ",minyear," to ",maxyear),
subtitle = paste0("Average fatalities in each group (with min and max deaths) of events ",comma(events_with_fatalities), " events with reported deaths of ", comma(total_events)," total events"))+
theme_classic()+
theme(axis.text=element_text(face="bold"))
dbc=db_mod %>%
group_by(evtype_recode) %>%
filter(injuries>0) %>%
summarise(tot_event=n(),
tot_injuries=sum(injuries,na.rm = T),
mean_fat=mean(injuries,na.rm=T),
min_fat=min(injuries, na.rm=T),
max_fat=max(injuries, na.rm=T),
.groups = "drop") %>%
left_join(event_clasification, by="evtype_recode") %>%
mutate(label=paste0(evtype_recode,"\n (",comma(tot_event)," of ",comma(n),")"))
ggplot(dbc , aes(y=tot_injuries, x=reorder(as.factor(label), desc(tot_injuries))))+
geom_col(color="black", fill="brown1")+
scale_y_continuous(label=comma)+
geom_text(aes(y=tot_injuries, x=reorder(as.factor(label), desc(tot_injuries)),
label=paste0(comma(mean_fat),"\n","(Min=",comma(min_fat),", Max=",comma(max_fat),")")),
vjust=-0.2)+
labs(y="Total injuries registered",
x="Event groups (n=Total of events)",
title = paste0("injuries reported by Weather event group from ",minyear," to ",maxyear),
subtitle = paste0("Average injuries in each group (with min and max injuries) of events ",comma(events_with_injuries), " events with reported injuries of ", comma(total_events)," total events"))+
theme_classic()+
theme(axis.text=element_text(face="bold"))