Synopsis:

Severe weather is responsible for property and crop damage and for many injuries and fatalities every year. Since 1950, data has been collected by the National Oceanic and Atmospheric Administration which documents the severe weather, storms, and other significant weather phenomona which caused loss of life, injuries, significant property damage, and/or disruption to commerce. The ability to determine the greatest cost of damage due to a specific severe weather event type, could influence decisions made by municipalities on relocation of property or preparation for the severe weather events which could save lives. For population health, the most harmful severe weather events are tornadoes. The greatest economic consequences due to property and crop damage are from flooding, hurricanes, tornadoes and drought.

Data Processing:

Setting the Global options:

knitr::opts_chunk$set(echo=TRUE ,cache = TRUE)

Load R packages

library(ggplot2)
library(knitr)
library(dplyr)
library(tidyr)
library(scales)

Read in the data

storm_data <- read.csv("repdata-data-StormData.csv.bz2")
#Look at the structure and contents of the file
#str(storm_data)
nrow(storm_data)
## [1] 902297
#names(storm_data)

Cleaning and Preparing the Data

The event types contain many duplicates and similar types. These need to be simplified to the main categories of the data set.

#Change the Event Type names to be consistant
# unique(storm_data$EVTYPE)
storm_data$evtype_simplified <- tolower(storm_data$EVTYPE)
storm_data$evtype_simplified[grep("avalan",storm_data$evtype_simplified)] = "Avalanche"
storm_data$evtype_simplified[grep("snow|winter|blizzard",storm_data$evtype_simplified)] = "WinterStorm"
storm_data$evtype_simplified[grep("wild|brush",storm_data$evtype_simplified)] = "Wildfire"
storm_data$evtype_simplified[grep("torn|funnel|spout",storm_data$evtype_simplified)] = "Tornado"
storm_data$evtype_simplified[grep("hurricane|typhoon",storm_data$evtype_simplified)] = "Hurricane"
storm_data$evtype_simplified[grep("floo?d",storm_data$evtype_simplified)] = "Flood"
storm_data$evtype_simplified[grep("hail",storm_data$evtype_simplified)] = "Hail"
storm_data$evtype_simplified[grep("wi?nd",storm_data$evtype_simplified)] = "Wind"
storm_data$evtype_simplified[grep("thunderstorm",storm_data$evtype_simplified)] = "Thunderstorm"
storm_data$evtype_simplified[grep("cold",storm_data$evtype_simplified)] = "Cold"
storm_data$evtype_simplified[grep("surge|surf",storm_data$evtype_simplified)] = "StormSurge"
storm_data$evtype_simplified[grep("ic[ey]|sleet|wintry",storm_data$evtype_simplified)] = "Ice"
storm_data$evtype_simplified[grep("seas|swell",storm_data$evtype_simplified)] = "Seas"
storm_data$evtype_simplified[grep("tropical",storm_data$evtype_simplified)] = "TropicalStorm"
storm_data$evtype_simplified[grep("rain|wet|shower|precip",storm_data$evtype_simplified)] = "HeavyRain"
storm_data$evtype_simplified[grep("heat|warm|hot",storm_data$evtype_simplified)] = "Heat"
storm_data$evtype_simplified[grep("dr",storm_data$evtype_simplified)] = "Drought"
storm_data$evtype_simplified[grep("frost|freeze",storm_data$evtype_simplified)] = "Frost"
storm_data$evtype_simplified[grep("lightning",storm_data$evtype_simplified)] = "Lightning"
storm_data$evtype_simplified[grep("f?og",storm_data$evtype_simplified)] = "Fog"
storm_data$evtype_simplified[grep("tsunami",storm_data$evtype_simplified)] = "Tsunami"
storm_data$evtype_simplified[grep("slide",storm_data$evtype_simplified)] = "Landslides"
storm_data$evtype_simplified[grep("ash|volc",storm_data$evtype_simplified)] = "HeavyRain"
storm_data$evtype_simplified[grep("coastal",storm_data$evtype_simplified)] = "CoastalStorm"

#unique(storm_data$evtype_simplified)

Analysis of Crop and Property Damage in the US from Severe Weather Events

Across the United States, which types of events have the greatest economic consequences?

The relevant data for analysis is the subset which includes the property and crop damage values.

#Combine the storm crop damage amount and exp into US dollars
Cropdamage_in_usd = ifelse( storm_data$CROPDMGEXP == "B", storm_data$CROPDMG * 1e9, ifelse(storm_data$CROPDMGEXP == "M", storm_data$CROPDMG * 1e6, ifelse(storm_data$CROPDMGEXP == "K", storm_data$CROPDMG * 1e3, storm_data$CROPDMG ) ) )
storm_data$Cropdamage_in_usd <- Cropdamage_in_usd

#Combine the storm property damage amount and exp into US dollars
Propdamage_in_usd = ifelse( storm_data$PROPDMGEXP == "B", storm_data$PROPDMG * 1e9, ifelse(storm_data$PROPDMGEXP == "M", storm_data$PROPDMG * 1e6, ifelse(storm_data$PROPDMGEXP == "K", storm_data$PROPDMG * 1e3, storm_data$PROPDMG ) ) )
storm_data$Propdamage_in_usd <- Propdamage_in_usd

Group the property and crop damage by the simplified event types and by sum of the damage.

storm_data_DMG <- storm_data %>% 
    select(evtype_simplified, Propdamage_in_usd, Cropdamage_in_usd) %>% 
    group_by(evtype_simplified) %>%
    summarise(TotalPropdamage_in_usd = sum(Propdamage_in_usd), TotalCropdamage_in_usd = sum(Cropdamage_in_usd)) 

Arrange the data in decreasing order by the sum of the total economic damage and keep the top 10.

stormdata_EconDam_10 <- storm_data_DMG %>%
    mutate(Totaldamage_in_usd = TotalPropdamage_in_usd + TotalCropdamage_in_usd) %>%
    arrange(desc(Totaldamage_in_usd)) %>% slice(1:10)

Now the crop and property damage are grouped by event type and the total sums of the damage recorded.

kable(stormdata_EconDam_10)
evtype_simplified TotalPropdamage_in_usd TotalCropdamage_in_usd Totaldamage_in_usd
Flood 167523190932 12380079110 179903270042
Hurricane 85336410030 5506117810 90842527840
Tornado 58591369840 417461520 59008831360
StormSurge 48080199000 2355000 48082554000
Hail 16015091077 3111296290 19126387367
Wind 15925508030 1964120147 17889628177
Drought 1052983600 13972581000 15025564600
Ice 3958908060 5022114300 8981022360
Wildfire 8491618500 402781630 8894400130
WinterStorm 8464008953 294187100 8758196053

Analysis of Fatalities and Injuries in the US from Severe Weather Events

Across the United States, which types of events are most harmful with respect to population health?

#Subset the data for fatality and injury occurances higher than the zero and summarize.
storm_data_FatInj <- storm_data %>%
    select(evtype_simplified, FATALITIES, INJURIES) %>%
    filter(FATALITIES > 0 | INJURIES > 0) %>% 
    group_by(evtype_simplified) %>%
    summarise(total_fatalities = sum(FATALITIES), total_injuries = sum(INJURIES)) 

storm_data_FatInj <- storm_data_FatInj %>% 
    mutate(total_fatalities_injuries = total_fatalities + total_injuries)

Arrange the data in decreasing order by the sum of the total fatalities and injuries, keeping only the top 10.

storm_data_FatInj_10 <- storm_data_FatInj %>%
    arrange(desc(total_fatalities_injuries)) %>% 
    slice(1:10)

The simplified total of Fatalities and Injuries by event type is:

kable(storm_data_FatInj_10)
evtype_simplified total_fatalities total_injuries total_fatalities_injuries
Tornado 5664 91439 97103
Wind 1413 11344 12757
Heat 3138 9226 12364
Flood 1525 8604 10129
Lightning 817 5231 6048
WinterStorm 548 3861 4409
Ice 105 2260 2365
Wildfire 90 1608 1698
Hail 20 1467 1487
Hurricane 135 1333 1468

Results:

Economic Consequences Due to Crop and Property Damage:

ggplot(stormdata_EconDam_10, 
       aes(x=reorder(evtype_simplified, Totaldamage_in_usd), Totaldamage_in_usd))+
    geom_bar(stat = "identity") +
    coord_flip() +
    ggtitle("Economic Damage Due to Severe Weather Events") +
    xlab("") +
    ylab("Total Damage in US Dollars") +
    scale_y_continuous(labels=dollar) +
    theme_classic() +
    theme(axis.text.x=element_text(angle=45, hjust=1))

The greatest overall economic consequence due to severe weather events in the US is caused by damage from flooding events.

damage_names <- list(
  'TotalPropdamage_in_usd'="Property Damage",
  'TotalCropdamage_in_usd'="Crop Damage"
)

damage_labeller <- function(variable,value){
  return(damage_names[value])
}

stormdata_EconDam_10 %>% 
    gather(key=damage_type, value=measurement, TotalPropdamage_in_usd, TotalCropdamage_in_usd) %>%
    ggplot(aes(x=reorder(evtype_simplified, Totaldamage_in_usd), measurement)) +
    facet_grid(damage_type ~ ., labeller=damage_labeller) +
    geom_bar(stat = "identity") +
    coord_flip() +
    ggtitle("Economic Damage by Type") +
    xlab("") +
    ylab("Total Damage in US Dollars") +
    scale_y_continuous(labels=dollar) +
    theme_classic() +
    theme(axis.text.x=element_text(angle=45, hjust=1))
## Warning: The labeller API has been updated. Labellers taking `variable`and
## `value` arguments are now deprecated. See labellers documentation.

When split apart, we see that while flooding, hurricanes and tornadoes cause the greatest damage to property, crop damage is mainly due to drought and flooding events.

Harm to Population Health from Severe Weather Events:

ggplot(storm_data_FatInj_10, 
       aes(x=reorder(evtype_simplified, total_fatalities_injuries), total_fatalities_injuries))+
    geom_bar(stat = "identity") +
    coord_flip() +
    ggtitle("Damage to Population Health Due to Severe Weather Events") +
    xlab("") +
    ylab("Total Injuries and Fatalities") +
    theme_classic() +
    theme(axis.text.x=element_text(angle=45, hjust=1))

The greatest damage to population health from severe weather events is caused by tornadoes.

harm_names <- list(
  'total_fatalities'="Fatalities",
  'total_injuries'="Injuries"
)

harm_labeller <- function(variable,value){
  return(harm_names[value])
}

storm_data_FatInj_10 %>% 
    gather(key=harm_type, value=measurement, total_fatalities, total_injuries) %>%             
ggplot(aes(x=reorder(evtype_simplified, total_fatalities_injuries), measurement))+
    facet_grid(harm_type ~ ., labeller=harm_labeller) +
    geom_bar(stat = "identity") +
    coord_flip() +
    ggtitle("Fatalities and Injuries Due to Severe Weather Events") +
    xlab("") +
    ylab("Number of People Harmed") +
    scale_y_continuous() +
    theme_classic() +
    theme(axis.text.x=element_text(angle=45, hjust=1))
## Warning: The labeller API has been updated. Labellers taking `variable`and
## `value` arguments are now deprecated. See labellers documentation.