Synopsis

The NOAA Storm Database was explored to answer some basic questions about severe weather events and other natural occurrences, specifically which ones have the most negative impact on public health, and the economy (in dollars expended to recover from property and agricultural damage). Natural disasters, extreme weather events, and adverse conditions can have a significant effect on public health, safety, and agriculture. Understanding which events are most likely to cause negative effects are key to planning and preparation. Tornadoes have the greatest impact on public safety, with over 150 combined injuries and fatalities resulting per tornado rated as Severe or Catastrophic (F4 or F5 on the Fujita Intensity Scale); nearly twice as many casualties as the next most frequent cause (excessive heat/hyperthermia). Floods of various types have the greatest collective economic impacts, with most of the damage being to property (but significant agricultural effects can be seen when occurring in farm areas), followed by storm surges/tides, and drought being the most negatively impactful to crop growth.

Data Processing

if (!file.exists("storm_data.csv")){
  download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "storm_data.csv")}
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## Warning: package 'lubridate' was built under R version 3.6.2
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(data.table)
## Warning: package 'data.table' was built under R version 3.6.2
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(ggplot2)
storm_data <- read.csv("storm_data.csv", stringsAsFactors = FALSE)
colnames(storm_data)[21] <- "FUJITA"
storm_data$BGN_DATE <- as.Date(strptime(storm_data$BGN_DATE, "%m/%d/%Y"))
storm_data <- storm_data %>% filter(BGN_DATE >= "1990-01-01")
storm_data <- mutate(storm_data, HAIL.SIZE = MAG/100)
storm_data <- data.table(filter(storm_data, EVTYPE != "?"))
colnames(storm_data)[22] <- "WIND.SPEED(KNOTS)"
health_effects <- storm_data %>%
select(BGN_DATE, BGN_TIME, EVTYPE, FUJITA, `WIND.SPEED(KNOTS)`, HAIL.SIZE, FATALITIES, INJURIES, WFO, STATEOFFIC, REMARKS)

economic_effects <- storm_data %>%
select(BGN_DATE, BGN_TIME, EVTYPE, FUJITA, `WIND.SPEED(KNOTS)`, HAIL.SIZE, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP, WFO, STATEOFFIC, REMARKS)

Further preparing the “health effects” table

  • The total number of casualties (defined here as fatalities + injuries) may be of interest, so that column was created.

    • Additionally, when reporting on the overall effect of weather events on public health, only significant events should be considered. Therefore, this data set was restricted to only events resulting in at least 1 fatality OR 5 total casualties.
  • Furthermore, even some of the valid event types, as cleaned up before, are still redundant in the information they give (e.g. “EXCESSIVE HEAT and”HEAT"), so more consolidation is needed.

  • Finally, the data can be grouped, summarized, and sorted to get the total of each casualty type for each event type. And the Top 15 event types should paint a stark enough picture to get the point across

health_effects$TOTAL.CASUALTIES <- health_effects$FATALITIES + health_effects$INJURIES
health_effects <- filter(health_effects, FATALITIES >=1|TOTAL.CASUALTIES >=5)

health_effects <- health_effects %>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "HEAT","EXCESSIVE HEAT"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "EXCESSIVE HEAT/DROUGHT","EXCESSIVE HEAT"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "EXTREME HEAT","EXCESSIVE HEAT"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "UNSEASONABLY WARM","EXCESSIVE HEAT"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "HEAT WAVES","EXCESSIVE HEAT"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "UNSEASONABLY WARM AND DRY","EXCESSIVE HEAT"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "RECORD HEAT","EXCESSIVE HEAT"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "RECORD/EXCESSIVE HEAT","EXCESSIVE HEAT"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "MARINE THUNDERSTORM WIND","THUNDERSTORM WIND"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "THUNDERSTORM WIND","DAMAGING WIND"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "STROND WIND","DAMAGING WIND"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "STRONG WIND","DAMAGING WIND"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "MARINE STRONG WIND","DAMAGING WIND"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "HIGH WIND","DAMAGING WIND"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "TORNADO F2","TORNADO"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "Heat Wave","EXCESSIVE HEAT"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "FLASH FLOOD","FLOOD"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "COASTAL FLOOD","FLOOD"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "TROPICAL STORM GORDON","TROPICAL STORM"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "TORNADOES, TSTM WIND, HAIL","TORNADO"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "STORM SURGE","STORM SURGE/TIDE"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "HEAVY SNOW","WINTER STORM"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "SLEET","WINTER STORM"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "FROST/FREEZE","WINTER STORM"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "COLD/WIND CHILL","EXTREME COLD/WIND CHILL"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "HYPERTHERMIA/EXPOSURE","EXCESSIVE HEAT"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "WINTER WEATHER MIX","WINTER WEATHER"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "HEAT WAVE DROUGHT","EXCESSIVE HEAT"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "MARINE HIGH WIND","DAMAGING WIND"))

combined_health_effects <- group_by(health_effects,EVTYPE)
combined_health_effects <- combined_health_effects %>% summarize(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), TOTAL.CASUALTIES = sum(TOTAL.CASUALTIES))

combined_health_effects <- combined_health_effects%>%
  arrange(desc(combined_health_effects$TOTAL.CASUALTIES))

top_15_health_effects <- data.table(combined_health_effects[1:15,])
EVTYPE <- rep(top_15_health_effects$EVTYPE, 3)
CASUALTY.TYPE <- c(rep("TOTAL.CASUALTIES",15),rep("INJURIES",15),rep("FATALITIES",15))
VICTIMS <- c(top_15_health_effects$TOTAL.CASUALTIES,top_15_health_effects$INJURIES,top_15_health_effects$FATALITIES)
top_15_health_effects <- data.table(cbind(EVTYPE, CASUALTY.TYPE, VICTIMS))
top_15_health_effects$CASUALTY.TYPE <- as.factor(top_15_health_effects$CASUALTY.TYPE)
top_15_health_effects$EVTYPE <- as.factor(top_15_health_effects$EVTYPE)
top_15_health_effects$VICTIMS <- as.numeric(top_15_health_effects$VICTIMS)
  • An additional examination of the leading threat to public health and safety (tornadoes, specifically) was done, showing the relationship between tornado intensity and the casualties they cause.
tornado_effects <- filter(health_effects, EVTYPE == "TORNADO")
fujitacount <- count(tornado_effects, FUJITA)
tornado_effects <- group_by(tornado_effects,FUJITA)
tornado_effects <- tornado_effects %>% summarize(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), TOTAL.CASUALTIES = sum(TOTAL.CASUALTIES))
tornado_effects <- data.table(cbind(fujitacount$n,tornado_effects))
tornado_effects <- data.table(tornado_effects[1:6,])
tornado_effects$AVG.FATALITIES <- tornado_effects$FATALITIES/tornado_effects$`fujitacount$n`
tornado_effects$AVG.INJURIES <- tornado_effects$INJURIES/tornado_effects$`fujitacount$n`
tornado_effects$AVG.TOTAL.CASUALTIES <- tornado_effects$TOTAL.CASUALTIES/tornado_effects$`fujitacount$n`
FUJITA <- rep(tornado_effects$FUJITA,3)
CASUALTY.TYPE <- c(rep("AVG.TOTAL.CASUALTIES",6),rep("AVG.INJURIES",6),rep("AVG.FATALITIES",6))
AVG.VICTIMS <- c(tornado_effects$AVG.TOTAL.CASUALTIES,tornado_effects$AVG.INJURIES,tornado_effects$AVG.FATALITIES)
tornado_effects <- data.table(cbind(FUJITA, CASUALTY.TYPE, AVG.VICTIMS))
tornado_effects$CASUALTY.TYPE <- as.factor(tornado_effects$CASUALTY.TYPE)
tornado_effects$FUJITA <- as.factor(tornado_effects$FUJITA)
tornado_effects$AVG.VICTIMS <- as.numeric(tornado_effects$AVG.VICTIMS)

tornado_effects <- tornado_effects%>%
  arrange(desc(CASUALTY.TYPE),desc(FUJITA))
  • Finally, a separate subset of the “storm_data” database was prepared to convey the adverse economic impacts of different natural events and disasters, specifically for dmages to property and crops. As was the case with the health impacts dataset, additional cleaning and binning (specifically, of the “EVTYPE”, “PROPDMG”, “PROPDMGEXP”,“CROPDMG”, and “CROPDMGEXP” columns) was necessary here with the economic impacts dataset.

  • From there, three tables branching from the “economic effects” branch were created, to more easily handle property and agricultural damage on their own, as well as one to easily show the total damage. They will all be combined into one data set for easier graphing.

economic_effects <- filter(economic_effects, economic_effects$PROPDMG != 0 | economic_effects$CROPDMG != 0)
economic_effects <- filter(economic_effects, economic_effects$PROPDMGEXP == "M"| economic_effects$PROPDMGEXP == "K" | economic_effects$PROPDMGEXP == "B"| economic_effects$PROPDMGEXP == "m"| economic_effects$CROPDMG != 0)
property_table <- data.table(select(economic_effects,EVTYPE,FUJITA,'WIND.SPEED(KNOTS)',HAIL.SIZE,PROPDMG,PROPDMGEXP,REMARKS))
prop_billions <- property_table %>%
filter(PROPDMGEXP == "B")%>%
mutate(DAMAGE.DOLLARS = PROPDMG * 1000000000)

prop_millions <- property_table %>%
filter(PROPDMGEXP == "M" | PROPDMGEXP == "m")%>%
mutate(DAMAGE.DOLLARS = PROPDMG * 1000000)

prop_thousands <- property_table %>%
filter(PROPDMGEXP == "K")%>%
mutate(DAMAGE.DOLLARS = PROPDMG * 1000)

property_table <- data.table(rbind(prop_billions,prop_millions,prop_thousands))
property_table <- arrange(property_table, property_table$DAMAGE.DOLLARS)
property_table <- data.table(filter(property_table, DAMAGE.DOLLARS != 0))

property_table <- property_table %>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "THUNDERSTORM WIND","HIGH WIND"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "FLASH FLOOD","FLOOD"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "MARINE STRONG WIND","STRONG WIND"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "MARINE HIGH WIND","STRONG WIND"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "HEAVY SNOW","WINTER STORM"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "COASTAL FLOOD","FLOOD"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "ICE STORM","WINTER STORM"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "TROPICAL DEPRESSION","TROPICAL STORM"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "MARINE HAIL","HAIL"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "FREEZING FOG","WINTER WEATHER CONDITIONS"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "HEAT","EXCESSIVE HEAT"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "LAKE-EFFECT SNOW","WINTER STORM"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "LAKESHORE FLOOD","FLOOD"))

property_effects <- group_by(property_table,EVTYPE)
property_effects <- property_effects %>% summarize(DAMAGE.DOLLARS = sum(DAMAGE.DOLLARS))
property_effects <- arrange(property_effects,desc(DAMAGE.DOLLARS))
property_effects <- mutate(property_effects,DAMAGE.BILLIONS = DAMAGE.DOLLARS/1000000000)
property_effects[30,1] <- "FROST/FREEZE"
property_effects <- group_by(property_effects,EVTYPE)
property_effects <- property_effects %>% summarize(DAMAGE.DOLLARS = sum(DAMAGE.DOLLARS))
property_effects <- mutate(property_effects,TYPE = rep("PROPERTY",length(EVTYPE)))
property_effects <- arrange(property_effects,desc(DAMAGE.DOLLARS))
property_effects <- mutate(property_effects,DAMAGE.BILLIONS = DAMAGE.DOLLARS/1000000000)

crop_table <- data.table(select(economic_effects,EVTYPE,FUJITA,'WIND.SPEED(KNOTS)',HAIL.SIZE,CROPDMG,CROPDMGEXP,REMARKS))

crop_billions <- crop_table %>%
filter(CROPDMGEXP == "B")%>%
mutate(DAMAGE.DOLLARS = CROPDMG * 1000000000)

crop_millions <- crop_table %>%
filter(CROPDMGEXP == "M" | CROPDMGEXP == "m")%>%
mutate(DAMAGE.DOLLARS = CROPDMG * 1000000)

crop_thousands <- crop_table %>%
filter(CROPDMGEXP == "K" | CROPDMGEXP == "k")%>%
mutate(DAMAGE.DOLLARS = CROPDMG * 1000)

crop_table <- data.table(rbind(crop_billions,crop_millions,crop_thousands))
crop_table <- arrange(crop_table, crop_table$DAMAGE.DOLLARS)
crop_table <- data.table(filter(crop_table, DAMAGE.DOLLARS != 0))

crop_table <- crop_table %>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "THUNDERSTORM WIND","HIGH WIND"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "FLASH FLOOD","FLOOD"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "MARINE STRONG WIND","STRONG WIND"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "MARINE HIGH WIND","STRONG WIND"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "HEAVY SNOW","WINTER STORM"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "COASTAL FLOOD","FLOOD"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "ICE STORM","WINTER STORM"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "TROPICAL DEPRESSION","TROPICAL STORM"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "MARINE HAIL","HAIL"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "FREEZING FOG","WINTER WEATHER CONDITIONS"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "HEAT","EXCESSIVE HEAT"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "LAKE-EFFECT SNOW","WINTER STORM"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "LAKESHORE FLOOD","FLOOD"))%>%
mutate(EVTYPE = replace(EVTYPE, EVTYPE == "EXTREME COLD/WIND CHILL","COLD/WIND CHILL"))
  
crop_effects <- group_by(crop_table,EVTYPE)
crop_effects <- crop_effects %>% summarize(DAMAGE.DOLLARS = sum(DAMAGE.DOLLARS))
crop_effects <- arrange(crop_effects,desc(DAMAGE.DOLLARS))
crop_effects <- mutate (crop_effects,DAMAGE.BILLIONS = DAMAGE.DOLLARS/1000000000)
crop_effects <- mutate(crop_effects,TYPE = rep("CROP",length(EVTYPE)))

combined_effects <- data.table(rbind(property_effects,crop_effects))
combined_effects <- group_by(combined_effects, EVTYPE)
combined_effects <- combined_effects %>% summarize(DAMAGE.DOLLARS = sum(DAMAGE.DOLLARS))
combined_effects <- arrange(combined_effects,desc(DAMAGE.DOLLARS))
combined_effects <- mutate(combined_effects,DAMAGE.BILLIONS = DAMAGE.DOLLARS/1000000000)
combined_effects <- mutate(combined_effects,TYPE = rep("COMBINED",length(EVTYPE)))

all_effects <- data.frame(rbind(property_effects,crop_effects,combined_effects))
all_effects <- arrange(all_effects, desc(DAMAGE.BILLIONS))
all_effects <-  filter(all_effects, EVTYPE == "FLOOD" | EVTYPE == "HURRICANE(TYPHOON)"|EVTYPE == "STORM SURGE/TIDE" |EVTYPE == "TORNADO" |EVTYPE == "HAIL" |EVTYPE == "HIGH WIND" |EVTYPE == "WINTER STORM" |EVTYPE == "DROUGHT" |EVTYPE == "WILDFIRE" |EVTYPE == "TROPICAL STORM" |EVTYPE == "HEAVY RAIN" |EVTYPE == "FROST/FREEZE" |EVTYPE == "COLD/WIND CHILL")

Results

dodge <- position_dodge(width = 0.4)

healthplot1 <- ggplot(data = top_15_health_effects, aes(x = reorder(EVTYPE,-VICTIMS), y = VICTIMS, fill = CASUALTY.TYPE))+
  geom_bar(stat = "identity", position = dodge, width = 0.4)+
    labs(title = "Figure 1 - Injuries and Fatalities Caused By Various\nNatural Disasters or Weather Events - Since 1990", x = "Natural Disaster/Weather Event Type", y = "Number of Casualties, By Type")+
  theme(axis.text.x=element_text(face = "bold", size = 4.25,angle = 0,vjust = 0.5),axis.text.y=element_text(size = 8,angle = 45))+
  scale_fill_manual(values=c("#009900", "#FF3300", "#003366"),
                    name = "Casualty Type",
                    labels = c("Fatalities", "Injuries", "Total Casualties\n(Injuries + Fatalities)"))+
  scale_x_discrete(labels = c("Tornadoes\n(All)","Extreme\nCold","Flood","Damaging\nWinds","Lightning","Winter\nStorm","Ice\nStorm","Hurricane\n(Typhoon)","Wildfire","Dense\nFog","Hail","Rip\nCurrent","Blizzard","Winter\nWeather","Excess.\nHeat"))
print(healthplot1)

fit <- lm(log(tornado_effects$AVG.VICTIMS[1:6]) ~ tornado_effects$FUJITA[1:6], data = tornado_effects[1:6,])

tornadoplot <- ggplot(data = tornado_effects, aes(x = reorder(FUJITA,AVG.VICTIMS), y = AVG.VICTIMS, fill = CASUALTY.TYPE))+
  geom_bar(stat = "identity", position = "dodge")+
  stat_function(fun = function(x) exp(fit$coefficients[1] + x*fit$coefficients[2]))+
  labs(title = "Injuries and Fatalities Caused By Tornadoes\nBy Fujita Scale Intensity, Per Event - Since 1990", x = "Fujita Scale Intensity", y = "Average Casualties Per Tornado Occurrence, By Type")+
  scale_fill_discrete(name = "Average...",labels = c("Fatalities", "Injuries", "Total Casualties"))+
  theme(axis.text.x=element_text(face = "bold", size=8,angle = 0,vjust = 0.65))
print(tornadoplot)

dodge2 <- position_dodge(width = 0.3)

prop_ag_plot <- ggplot(data = all_effects, aes(x = reorder(EVTYPE,-DAMAGE.BILLIONS), y = DAMAGE.BILLIONS, fill = TYPE))+
  geom_bar(stat = "identity", position = dodge2, width = 0.4)+
  labs(title = "Figure 3 - Property and Agricultural Damage Caused\nNatural Disasters or Weather Events - Since 1990", x = "Natural Disaster/Weather Event Type", y = "Damages(Billions of U.S.Dollars)")+
  scale_fill_manual(values=c("#009900", "#FF3300", "#003366"),
                  name = "Type of Damage",
                  labels = c("Combined", "Crop", "Property"))+
  theme(axis.text.x=element_text(size = 4,angle = 0,vjust = 0.5),axis.text.y=element_text(size = 8,angle = 45))+
  scale_x_discrete(labels = c("Flood","Storm Surge/\nTide","Tornado","Hail","High\nWind","Winter\nStorm","Drought","Wildfire","Tropical\nStorm","Heavy\nRain","Frost\nFreeze","Extreme\nCold"))
  print(prop_ag_plot)