US Public Health and Economic Impacts of Severe Weather Events

Synopsis

Storms and other severe weather events can cause both public health and economic problems. These events can result in a variety of damages - in terms of fatalities and injuries or in terms of crop and property damage. However, it is also important to recognize that some events happen more frequently than others. When we make our cost-benefit analysis on which storms to be most prepared for, or how much of an issue we should take each event, we should be cognizant of both their total impact but also of the average impact each storm brings.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Data Processing

The data for this examination was taken from this website. Data was read in, and event types were cleaned and collapsed to a more interpretable form. We gathered the mean damages, total amount of events, total amount of damages, as well as their standard errors for each of the 29 analyzed events - 10 were removed after the first clean due to only happening once in the dataset.

setwd("~/Desktop")
library(ggplot2)
library(readr)
library(plyr)
library(dplyr)
library(RColorBrewer)
library(cowplot)
gsubbing <- function(data){
  data$EVTYPE <- gsub('.*flood.*', 'flood', data$EVTYPE)
  data$EVTYPE <- gsub('.*urban.*', 'flood', data$EVTYPE)
  data$EVTYPE <- gsub('.*stream.*', 'flood', data$EVTYPE)
  data$EVTYPE <- gsub('.*floooding.*', 'flood', data$EVTYPE)
  data$EVTYPE <- gsub('.*water.*', 'flood', data$EVTYPE)
  data$EVTYPE <- gsub('.*wayter.*', 'flood', data$EVTYPE)
  data$EVTYPE <- gsub('.*urban.*', 'flood', data$EVTYPE)
  data$EVTYPE <- gsub('.*dam.*', 'flood', data$EVTYPE)
  data$EVTYPE <- gsub('.*wind.*', 'wind', data$EVTYPE)
  data$EVTYPE <- gsub('.*torn.*', 'tornado', data$EVTYPE)
  data$EVTYPE <- gsub('.*tornado.*', 'tornado', data$EVTYPE)
  data$EVTYPE <- gsub('.*-tornado.*', 'tornado', data$EVTYPE)
  data$EVTYPE <- gsub('.*/tornado.*', 'tornado', data$EVTYPE)
  data$EVTYPE <- gsub('.*gustnado.*', 'tornado', data$EVTYPE)
  data$EVTYPE <- gsub('.*wall cloud*', 'tornado', data$EVTYPE)
  data$EVTYPE <- gsub('.*landspout*', 'tornado', data$EVTYPE)
  data$EVTYPE <- gsub('.*hurricane.*', 'hurricane', data$EVTYPE)
  data$EVTYPE <- gsub('.*remnants of floyd*', 'hurricane', data$EVTYPE)
  data$EVTYPE <- gsub('.*rain.*', 'rain', data$EVTYPE)
  data$EVTYPE <- gsub('.*precipitation.*', 'rain', data$EVTYPE)
  data$EVTYPE <- gsub('.*precip.*', 'rain', data$EVTYPE)
  data$EVTYPE <- gsub('tstm wnd', 'thunderstorm', data$EVTYPE)
  data$EVTYPE <- gsub('.*thunderstorm.*', 'thunderstorm', data$EVTYPE)
  data$EVTYPE <- gsub('.*thunderstormw.*', 'thunderstorm', data$EVTYPE)
  data$EVTYPE <- gsub('tstm', 'thunderstorm', data$EVTYPE)
  data$EVTYPE <- gsub('.*downburst.*', 'thunderstorm', data$EVTYPE)
  data$EVTYPE <- gsub('.*fire.*', 'fire', data$EVTYPE)
  data$EVTYPE <- gsub('[0-9][0-9]$', '', data$EVTYPE)
  data$EVTYPE <- gsub('unseasonably ', '', data$EVTYPE)
  data$EVTYPE <- gsub('unusally ', '', data$EVTYPE)
  data$EVTYPE <- gsub('unusual', '', data$EVTYPE)
  data$EVTYPE <- gsub('abnormally ', '', data$EVTYPE)
  data$EVTYPE <- gsub('abnormal ', '', data$EVTYPE)
  data$EVTYPE <- gsub('astronomical ', '', data$EVTYPE)
  data$EVTYPE <- gsub('^ ', '', data$EVTYPE)
  data$EVTYPE <- gsub('.*wet.*', 'wet', data$EVTYPE)
  data$EVTYPE <- gsub('wnd', 'wind', data$EVTYPE)
  data$EVTYPE <- gsub('.*lighting.*', 'lightning', data$EVTYPE)
  data$EVTYPE <- gsub('.*lightning.*', 'lightning', data$EVTYPE)
  data$EVTYPE <- gsub('.*ligntning.*', 'lightning', data$EVTYPE)
  data$EVTYPE <- gsub('.*dry.*', 'dryness', data$EVTYPE)
  data$EVTYPE <- gsub('.*driest.*', 'dryness', data$EVTYPE)
  data$EVTYPE <- gsub('.*winter.*', 'winter storm', data$EVTYPE)
  data$EVTYPE <- gsub('.*wintry.*', 'winter storm', data$EVTYPE)
  data$EVTYPE <- gsub('.*blizzard.*', 'winter storm', data$EVTYPE)
  data$EVTYPE <- gsub('.*snow.*', 'winter storm', data$EVTYPE)
  data$EVTYPE <- gsub('.*hail.*', 'winter storm', data$EVTYPE)
  data$EVTYPE <- gsub('.*snow.*', 'winter storm', data$EVTYPE)
  data$EVTYPE <- gsub('.*freeze.*', 'freeze', data$EVTYPE)
  data$EVTYPE <- gsub('.*freezing.*', 'freeze', data$EVTYPE)
  data$EVTYPE <- gsub('.*freeze.*', 'freeze', data$EVTYPE)
  data$EVTYPE <- gsub('.*ice.*', 'freeze', data$EVTYPE)
  data$EVTYPE <- gsub('.*icy.*', 'freeze', data$EVTYPE)
  data$EVTYPE <- gsub('.*sleet.*', 'freeze', data$EVTYPE)
  data$EVTYPE <- gsub('.*cold.*', 'freeze', data$EVTYPE)
  data$EVTYPE <- gsub('.*low.*temper.*', 'freeze', data$EVTYPE)
  data$EVTYPE <- gsub('.*frost.*', 'freeze', data$EVTYPE)
  data$EVTYPE <- gsub('.*glaze.*', 'freeze', data$EVTYPE)
  data$EVTYPE <- gsub('.*high.*', 'seas', data$EVTYPE)
  data$EVTYPE <- gsub('.*tide.*', 'seas', data$EVTYPE)
  data$EVTYPE <- gsub('.*rough.*', 'seas', data$EVTYPE)
  data$EVTYPE <- gsub('.*heavy.*', 'seas', data$EVTYPE)
  data$EVTYPE <- gsub('.*current.*', 'seas', data$EVTYPE)
  data$EVTYPE <- gsub('.*marine.*', 'seas', data$EVTYPE)
  data$EVTYPE <- gsub('.*coastalstorm.*', 'seas', data$EVTYPE)
  data$EVTYPE <- gsub('.*coastal.*', 'seas', data$EVTYPE)
  data$EVTYPE <- gsub('.*beach.*', 'seas', data$EVTYPE)
  data$EVTYPE <- gsub('.*wave.*', 'seas', data$EVTYPE)
  data$EVTYPE <- gsub('.*surf.*', 'seas', data$EVTYPE)
  data$EVTYPE <- gsub('.*seiche.*', 'seas', data$EVTYPE)
  data$EVTYPE <- gsub('.*temperature.*', 'temperature', data$EVTYPE)
  data$EVTYPE <- gsub('.*record.*', 'temperature', data$EVTYPE)
  data$EVTYPE <- gsub('.*hot.*', 'temperature', data$EVTYPE)
  data$EVTYPE <- gsub('.*hyperthermia/exposure.*', 'temperature', data$EVTYPE)
  data$EVTYPE <- gsub('.*high.*temper.*', 'temperature', data$EVTYPE)
  data$EVTYPE <- gsub('.*heat.*', 'temperature', data$EVTYPE)
  data$EVTYPE <- gsub('.*warm.*', 'temperature', data$EVTYPE)
  data$EVTYPE <- gsub('.*hypothermia/exposure.*', 'temperature', data$EVTYPE)
  data$EVTYPE <- gsub('.*hypothermia.*', 'temperature', data$EVTYPE)
  data$EVTYPE <- gsub('.*hot.*', 'temperature', data$EVTYPE)
  data$EVTYPE <- gsub('.*heat.*', 'temperature', data$EVTYPE)
  data$EVTYPE <- gsub('.*cool.*', 'temperature', data$EVTYPE)
  data$EVTYPE <-gsub('.*volcanic.*', 'volcano', data$EVTYPE)
  data$EVTYPE <- gsub('.*funnel.*', 'funnel', data$EVTYPE)
  data$EVTYPE <- gsub('.*avalance.*', 'avalanche', data$EVTYPE)
  data$EVTYPE <- gsub('.*thunderstorm.*', 'thunderstorm', data$EVTYPE)
  data$EVTYPE <- gsub('.*tropical.*', 'tropical storm', data$EVTYPE)
  data$EVTYPE <- gsub('.*slide.*', 'mud or rock slide', data$EVTYPE)
  data$EVTYPE <- gsub('.*dust.*', 'dust', data$EVTYPE)
  data <- data[!(data$EVTYPE=="no severe weather"),]
  data <- data[!(data$EVTYPE=="none"),]
  data <- data[!(data$EVTYPE=="apache county"),]
  data <- data[!(data$EVTYPE=="?"),]
  
data
}

data <- readr::read_csv("repdata%2Fdata%2FStormData.csv")

data$EVTYPE <- stringr::str_to_lower(data$EVTYPE)
data$isUSA <- mapply( function(st) st %in% state.abb, data$STATE)
#Here we subset only to those events in the United States.
data <- data[data$isUSA==TRUE,]
data <- subset(data, !grepl("summary", data$EVTYPE))
length(unique(data$EVTYPE))
## [1] 793
#This function is just a ton of different gsubs, to bring down our
#different categories (793) to something more reasonable (39).
data <- gsubbing(data)
data$EVTYPE <- as.factor(data$EVTYPE)
length(unique(data$EVTYPE))
## [1] 39
levels(data$EVTYPE)
##  [1] "avalanche"         "dense fog"         "dense smoke"      
##  [4] "drowning"          "dryness"           "dust"             
##  [7] "excessive"         "fire"              "flood"            
## [10] "fog"               "freeze"            "funnel"           
## [13] "hurricane"         "landslump"         "lightning"        
## [16] "metro storm, may " "microburst"        "mild pattern"     
## [19] "mud or rock slide" "northern lights"   "other"            
## [22] "patchy dense fog"  "rain"              "red flag criteria"
## [25] "seas"              "severe turbulence" "smoke"            
## [28] "southeast"         "storm surge"       "temperature"      
## [31] "thunderstorm"      "tornado"           "tropical storm"   
## [34] "tsunami"           "vog"               "volcano"          
## [37] "wet"               "wind"              "winter storm"

The first question we may be interested in is:

Question 1: Which types of events are most harmful with respect to population health?

And we define that as both injuries and fatalities.

#sum() doesn't actually give the total number, but instead, actually gives the number of times the cell appeared. You can test it, if you're curious, by calling data %>% count(EVTYPE) and find the exact same numbers. We also can see this is true because n_injuries=n_fatalities for all events, even though that's not true. So, if we're interested in total, we need to manually calculate that from the N*Mean.

#Below, first we collapse the data by EVTYPE and get some useful information for graphing.
sum_data <- plyr::ddply(data, c("EVTYPE"), summarise,
                     n_injuries      = sum(!is.na(INJURIES)),
                     mean_injuries   = mean(INJURIES, na.rm=TRUE),
                     sd_injuries     = sd(INJURIES, na.rm=TRUE),
                     se_injuries     = sd_injuries / sqrt(n_injuries),
                     tot_injuries    = (mean_injuries * n_injuries),
                     n_fatalities    = sum(!is.na(FATALITIES)),
                     mean_fatalities = mean(FATALITIES, na.rm=TRUE),
                     sd_fatalities   = sd(FATALITIES, na.rm=TRUE),
                     se_fatalities   = sd_fatalities/sqrt(n_fatalities),
                     tot_fatalities  = (mean_fatalities * n_fatalities)
)

#I then split the data between injury only and fatality only, looking at the top 10 events for each. We also break it down by total and average. We finally also reduce our dataset to events that can at least provide us a standard error - so no event that only happens once. 
sum_data <- sum_data[sum_data$n_injuries>2,]
injury_data <- head(sum_data[order(desc(sum_data$tot_injuries)),],10)
injury_data <-   injury_data %>% dplyr::mutate(EVTYPE = factor(EVTYPE,EVTYPE))
fatal_data <- head(sum_data[order(desc(sum_data$tot_fatalities)),],10)
fatal_data <-   fatal_data %>% dplyr::mutate(EVTYPE = factor(EVTYPE,EVTYPE))
sum_data<- sum_data[order(desc(sum_data$tot_injuries)),]
sum_data <-   sum_data %>% dplyr::mutate(EVTYPE = factor(EVTYPE,EVTYPE))
injury_mean_data <- head(sum_data[order(desc(sum_data$mean_injuries)),],10)
injury_mean_data <-   injury_mean_data %>% dplyr::mutate(EVTYPE = factor(EVTYPE,EVTYPE))
fatal_mean_data <- head(sum_data[order(desc(sum_data$mean_fatalities)),],10)
fatal_mean_data <-   fatal_mean_data %>% dplyr::mutate(EVTYPE = factor(EVTYPE,EVTYPE))
#First, we assign each event a different color that will remain constant throughout all graphs. 
colors <- colorRampPalette(brewer.pal(9,"Set1"))(29)
names(colors) <- levels(sum_data$EVTYPE)

#Next, we plot all four graphs next to each other using cowplot.
g<- ggplot2::ggplot(data = injury_data, aes(x = EVTYPE, y = tot_injuries, fill = EVTYPE)) 
injury_graph <- g + geom_bar(stat="identity") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme(legend.position="none", plot.margin = unit(c(0, 0, -1, 0), "cm"))+
  scale_fill_manual(name = "EVTYPE",values = colors)+
  labs(y = "Injuries",  x="", title = "Total Injuries\nPer Event")

g2 <- ggplot2::ggplot(data = fatal_data, aes(x = EVTYPE, y = tot_fatalities,fill = EVTYPE)) 
fatal_graph <- g2 +  geom_bar(stat="identity") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme(legend.position="none", plot.margin =  unit(c(0, 0, -1, 0), "cm"))+
  scale_fill_manual(name = "EVTYPE",values = colors)+
  labs(y = "Fatalities", x="",  title = "Total Fatalities\nPer Event")

g3 <- ggplot2::ggplot(data=injury_mean_data, aes(x=EVTYPE, y=mean_injuries, fill=EVTYPE))
injury_mean_graph <- g3+ geom_bar(stat="identity", position="dodge", colour="black")+
  scale_fill_manual(name = "EVTYPE", values = colors)+
  labs(y="Injuries", x="", title="Average Injuries\nPer Event")+
  theme(legend.position="none", plot.margin =  unit(c(0, 0, -.5, 0), "cm"))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  geom_errorbar(aes(ymin=mean_injuries-se_injuries, ymax=mean_injuries+se_injuries),
                width=.3,                    # Width of the error bars
                position=position_dodge(.9))

g4 <- ggplot2::ggplot(data=fatal_mean_data, aes(x=EVTYPE, y=mean_fatalities, fill=EVTYPE))
fatal_mean_graph <- g4+ geom_bar(stat="identity", position="dodge", colour="black")+
  scale_fill_manual(name = "EVTYPE", values = colors)+
  labs(y="Fatalities", x="",  title="Average Fatalities\nPer Event")+
  theme(legend.position="none", plot.margin =  unit(c(0, 0, -.5, 0), "cm"))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  geom_errorbar(aes(ymin=mean_fatalities-se_fatalities, ymax=mean_fatalities+se_fatalities),
                width=.3,                    # Width of the error bars
                position=position_dodge(.9))
cowplot::plot_grid(injury_graph, fatal_graph, injury_mean_graph, fatal_mean_graph, align = "h", ncol = 2, nrow=2, labels = c("A", "B", "C","D"))

Figure 1 Results : What events are most damaging?

Figure 1 shows us something very interesting. While tornadoes are clearly causing the most total damage in terms of both injuries and deaths (A & B), it may be because there are MANY tornadoes that occur. If, instead, we look at the average number of injuries and fatalities each time an event occurs, we actually find that hurricanes and high temperatures are even more dangerous (C & D).

Yet, things happen that aren’t just human-related, but instead, focus on the economic damages of a given event. A second question may be:

Across the United States, which types of events have the greatest economic consequences?

#Overall, we repeat the same steps for question 2. We add this step prior to make sense of the property damage and crop damage number, which have multipliers we need to account for. When we summarise, we're also reducing it down to millions of dollars in damages to make the graphs easier to read.

data$multiplier_p <- 1
data$multiplier_p[data$PROPDMGEXP =="H"] <- 100
data$multiplier_p[data$PROPDMGEXP =="K"] <- 1000
data$multiplier_p[data$PROPDMGEXP =="M"] <- 1000000
data$multiplier_p[data$PROPDMGEXP =="B"] <- 1000000000

data$multiplier_c <- 1
data$multiplier_c[data$CROPDMGEXP =="H"] <- 100
data$multiplier_c[data$CROPDMGEXP =="K"] <- 1000
data$multiplier_c[data$CROPDMGEXP =="M"] <- 1000000
data$multiplier_c[data$CROPDMGEXP =="B"] <- 1000000000

data$cropdamage <- data$multiplier_c*data$CROPDMG
data$propdamage <- data$multiplier_p*data$PROPDMG
data$totdamage <- data$cropdamage+data$propdamage

sum_data_econ <- plyr::ddply(data, c("EVTYPE"), summarise,
                        n_cropdmg      = sum(!is.na(cropdamage)),
                        mean_cropdmg   = mean(cropdamage, na.rm=TRUE)/1000000,
                        sd_cropdmg     = sd(cropdamage, na.rm=TRUE)/1000000,
                        se_cropdmg     = sd_cropdmg / sqrt(n_cropdmg),
                        tot_cropdmg    = (mean_cropdmg * n_cropdmg),
                        n_propdmg    = sum(!is.na(propdamage)),
                        mean_propdmg = mean(propdamage, na.rm=TRUE)/1000000,
                        sd_propdmg   = sd(propdamage, na.rm=TRUE)/1000000,
                        se_propdmg   = sd_propdmg/sqrt(n_propdmg),
                        tot_propdmg  = (mean_propdmg * n_propdmg),
                        n_totdmg    = sum(!is.na(totdamage)),
                        mean_totdmg = mean(totdamage, na.rm=TRUE),
                        sd_totdmg   = sd(totdamage, na.rm=TRUE),
                        se_totdmg   = sd_totdmg/sqrt(n_totdmg),
                        tot_totdmg  = (mean_totdmg * n_totdmg)
)

cropdmg_data <- head(sum_data_econ[order(desc(sum_data_econ$tot_cropdmg)),],10)
cropdmg_data <-   cropdmg_data %>% dplyr::mutate(EVTYPE = factor(EVTYPE,EVTYPE))
propdmg_data <- head(sum_data_econ[order(desc(sum_data_econ$tot_propdmg)),],10)
propdmg_data <-   propdmg_data %>% dplyr::mutate(EVTYPE = factor(EVTYPE,EVTYPE))
sum_data_econ<- sum_data_econ[order(desc(sum_data_econ$tot_cropdmg)),]
sum_data_econ <-   sum_data_econ %>% dplyr::mutate(EVTYPE = factor(EVTYPE,EVTYPE))
cropdmg_mean_data <- head(sum_data_econ[order(desc(sum_data_econ$mean_cropdmg)),],10)
cropdmg_mean_data <-   cropdmg_mean_data %>% dplyr::mutate(EVTYPE = factor(EVTYPE,EVTYPE))
propdmg_mean_data <- head(sum_data_econ[order(desc(sum_data_econ$mean_propdmg)),],10)
propdmg_mean_data <-   propdmg_mean_data %>% dplyr::mutate(EVTYPE = factor(EVTYPE,EVTYPE))
g5<- ggplot2::ggplot(data = cropdmg_data, aes(x = EVTYPE, y = tot_cropdmg, fill = EVTYPE)) 
cropdmg_graph <- g5 + geom_bar(stat="identity") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme(legend.position="none", plot.margin = unit(c(0, 0, -1, 0), "cm"))+
  scale_fill_manual(name = "EVTYPE",values = colors)+
  labs(y = "Damage",  x="", title = "Total Crop damage\nIn Millions of $")

g6 <- ggplot2::ggplot(data = propdmg_data, aes(x = EVTYPE, y = tot_propdmg,fill = EVTYPE)) 
propdmg_graph <- g6 +  geom_bar(stat="identity") + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  theme(legend.position="none", plot.margin = unit(c(0, 0, -1, 0), "cm"))+
  scale_fill_manual(name = "EVTYPE",values = colors)+
  labs(y = "Damage", x="",  title = "Total Property Damage\nIn Millions of $")

#cowplot::plot_grid(injury_graph, fatal_graph, align = "h", ncol = 2, rel_heights = c(1/2, 1/2))

g7 <- ggplot2::ggplot(data=cropdmg_mean_data, aes(x=EVTYPE, y=mean_cropdmg, fill=EVTYPE))
crop_mean_graph <- g7+ geom_bar(stat="identity", position="dodge", colour="black")+
  scale_fill_manual(name = "EVTYPE", values = colors)+
  labs(y="Damage", x="", title="Average Crop Damage\nIn Millions of $")+
  theme(legend.position="none", plot.margin = unit(c(0, 0, -.5, 0), "cm"))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  geom_errorbar(aes(ymin=mean_cropdmg-se_cropdmg, ymax=mean_cropdmg+se_cropdmg),
                width=.3,                    # Width of the error bars
                position=position_dodge(.9))

g8 <- ggplot2::ggplot(data=propdmg_mean_data, aes(x=EVTYPE, y=mean_propdmg, fill=EVTYPE))
prop_mean_graph <- g8 + geom_bar(stat="identity", position="dodge", colour="black")+
  scale_fill_manual(name = "EVTYPE", values = colors)+
  labs(y="Damage", x="",  title="Average Property Damage\nIn Millions of $")+
  theme(legend.position="none", plot.margin = unit(c(0, 0, -.5, 0), "cm"))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  geom_errorbar(aes(ymin=mean_propdmg-se_propdmg, ymax=mean_propdmg+se_propdmg),
                width=.3,                    # Width of the error bars
                position=position_dodge(.9))

cowplot::plot_grid(cropdmg_graph, propdmg_graph, crop_mean_graph, prop_mean_graph, align = "h", ncol = 2, nrow=2, labels = c("E", "F", "G","H"))

Results Figure 2

Overall, we see that hurricanes cause the most average damage, and are even relatively highly ranked in terms of total damages. Floods and events from seas also cause significant damages to crops and property, however. We can clearly see that water is a dangerous resource for economic interests.