Synopsis

Extreme weather events caused substantial harms to public health and the economy between 1950 and 2011. Related to public health, this dataset includes data on fatalities and injuries for each extreme weather event. Related to the economy, this dataset includes crop and property damage for each extreme weather event. Based on the available data, tornados have by far the biggest impact on public health, followed by Excessive Heat as a distant second. Floods have caused the most economic damage overall due to high levels of property damage, while droughts caused the most crop damage.

Data Processing

library(tidyverse)
library(lubridate)
library(ggplot2)

#loading data
data <-"repdata-data-StormData.csv.bz2"
if (!file.exists(data)){
    url<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
    download.file(url, data, mode="wb")
}

dataframe <-read.csv(data, header = TRUE, sep=",")

#cleaning data
dataframe <- dataframe %>%
  mutate(BGN_DATE = mdy_hms(BGN_DATE)) %>%
  mutate(BGN_TIME = hm(BGN_TIME))
## Warning in .parse_hms(..., order = "HM", quiet = quiet): Some strings failed to
## parse, or all strings are NAs
#function for exponent multiplication
EXP_Conversion <- function(expoChar) {
  if (expoChar == "H") {return(100)}
  if (expoChar == "h") {return(100)}
  if (expoChar == "K") {return(1000)}
  if (expoChar == "k") {return(1000)}
  if (expoChar == "M") {return(1000000)}
  if (expoChar == "m") {return(1000000)}
  if (expoChar == "B") {return(1000000000)}
  if (expoChar == "b") {return(1000000000)}
}


#property damage data
propertyDam <- dataframe %>%
  filter(PROPDMG >0, PROPDMGEXP == "K" | PROPDMGEXP == "M" | 
  PROPDMGEXP == "B" | PROPDMGEXP == "m"| PROPDMGEXP == "H"| PROPDMGEXP == "h") 

exponentsProperty <- lapply(X = propertyDam$PROPDMGEXP, FUN = EXP_Conversion)
exponentsProperty[sapply(exponentsProperty, is.null)]<- NA
exponentsPropertyVect <- unlist(exponentsProperty)
propertyDamPlotData <- propertyDam %>%
  mutate(property.damage = exponentsPropertyVect*PROPDMG) %>%
  group_by(EVTYPE) %>%
  summarise(total.property.damage = sum(property.damage)) 

#crop damage data
CROPDam <- dataframe %>%
  filter(CROPDMG >0, CROPDMGEXP == "K" | CROPDMGEXP == "M" | 
  CROPDMGEXP == "B" | CROPDMGEXP == "m"| CROPDMGEXP == "H"| CROPDMGEXP == "h") 

exponentsCROP <- lapply(X = CROPDam$CROPDMGEXP, FUN = EXP_Conversion)
exponentsCROP[sapply(exponentsCROP, is.null)]<- NA
exponentsCROPVect <- unlist(exponentsCROP)
CROPDamPlotData <- CROPDam %>%
  mutate(CROP.damage = exponentsCROPVect*CROPDMG) %>%
  group_by(EVTYPE) %>%
  summarise(total.crop.damage = sum(CROP.damage))

#merge dataframes
econData <- merge(CROPDamPlotData,propertyDamPlotData) %>%
  mutate(total.damage = total.crop.damage + total.property.damage) %>%
  arrange(desc(total.damage))%>%
  top_n(10) %>%
  arrange(-desc(total.damage)) %>%
  select(-total.damage)%>%
  pivot_longer(!EVTYPE, names_to = "Damage.Type", values_to = "Total.Damage")

#heath data cleaning
healthData <- dataframe %>%
  group_by(EVTYPE) %>%
  summarise(FATALITIES = sum(FATALITIES),INJURIES = sum(INJURIES)) %>%
  mutate(CASUALTIES = FATALITIES + INJURIES) %>%
  arrange(desc(CASUALTIES))%>%
  top_n(10) %>%
  arrange(-desc(CASUALTIES)) %>%
  select(-CASUALTIES)%>%
  pivot_longer(!EVTYPE, names_to = "Casualty.Type", values_to = "Total.Casualties")

Results

Public Health Consequences

Tornados have caused the greatest number of fatalities & injuries by far. Excessive heat is a distant second. Injuries are much more common than fatalities for all event types.

healthPlot <- ggplot(healthData, aes(x=fct_inorder(EVTYPE), y = Total.Casualties, fill = Casualty.Type)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  ylab("Total Casualties") +
  xlab("Event Type") +
  scale_y_continuous(labels = scales::comma) +
  ggtitle("Top Ten Event Types By Injuries & Fatalities", subtitle = "1950-2011, USA Only")

healthPlot

Economic Consequences

Property damage is more severe than crop damage. Floods are by far the most costly event type due to high levels of property and crop damage. Droughts are the seventh most costly event type due to high levels of crop damage. It is worth noting that economic consquences could go beyond property and crop damage; for instance a storm could lower output if it lead to people missing work.

econPlot <- ggplot(econData, aes(x=fct_inorder(EVTYPE), y = Total.Damage, fill = Damage.Type)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  ylab("Total Crop & Property Damage ($USD)") +
  xlab("Event Type") +
  scale_y_continuous(labels = scales::comma) +
  ggtitle("Top Ten Event Types By Property & Crop Damage", subtitle = "1950-2011, USA Only")

econPlot