Synopsis

In this report, I aim to describe impact of the weather in the economy and the population heath in the United States between the years 1950 and 2011. The goal of this report is to inform governmental or municipal authorities the types of severe weather events that demands more resources. For this investigation, I obtained the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database, which tracks characteristics of major storms. The database contains weather events in the United States, when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Data Processing

I first download the data from U.S. National Oceanic and Atmospheric Administration’s (NOAA) website. The data is a delimited file were fields are delimited with the “,” character. Later, I read the raw data.

# Get file url
td <- tempdir()
tf <- tempfile(tmpdir=td,fileext=".bz2")
file_url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"

# Download bz2 file file and documentation
download.file(file_url, tf, method = "curl")
dateDownloaded <- date()
if(!dir.exists("./data")){
    dir.create("./data")
}

# Copy and read file
file.copy(tf, "./data/repdata-data-StormData.csv.bz2")
stormData <- read.csv("./data/repdata-data-StormData.csv.bz2")

The columns that contain dates are formated to the appropriate date format. One of the fields of the dataset contain the names of the weather events, however, they are not standardized. To facilitate the analyzes, only the events that are in accordance with the standard names are considered. This simplification does not affect the overall analysis, since most of the events are created in accordance to the standards.

Pre-processing the raw data

stormData$BGN_DATE <- as.Date(as.character(stormData$BGN_DATE), 
                              format = "%m/%d/%Y %H:%M:%S")
# Make types uppercase
stormData$EVTYPE <- toupper(stormData$EVTYPE)

# Create a list with the 48 standard events from NATIONAL WEATHER SERVICE INSTRUCTION
events <- toupper(c("Astronomical Low Tide", "Avalanche", "Blizzard", "Coastal Flood", 
"Cold/Wind Chill", "Debris Flow", "Dense Fog", "Dense Smoke", "Drought", "Dust Devil", 
"Dust Storm", "Excessive Heat", "Extreme Cold/Wind Chill", "Flash Flood", "Flood", 
"Freezing Fog", "Frost/Freeze", "Funnel Cloud", "Hail", "Heat", "Heavy Rain", "Heavy Snow", 
"High Surf", "High Wind", "Hurricane/Typhoon", "Ice Storm", "Lakeshore Flood", 
"Lake-Effect Snow", "Lightning", "Marine Hail", "Marine High Wind", "Marine Strong Wind", 
"Marine Thunderstorm Wind", "Rip Current", "Seiche", "Sleet", "Storm Tide", "Strong Wind", 
"Thunderstorm Wind", "Tornado", "Tropical Depression", "Tropical Storm", "Tsunami", 
"Volcanic Ash", "Waterspout", "Wildfire", "Winter Storm", "Winter Weather"))

stormData_standardized <- subset(stormData, stormData$EVTYPE %in% events)

Results

In order to show the most harmful weather events, I combined the total number of fatalities with the total number of injuries.

The table and plot below shows across the United States, the types of events that are most harmful with respect to population health.

library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lattice) 
# COMBINED_HARM is the combination of fatalities and injuries
stormData_standardized <- mutate(stormData_standardized, COMBINED_HARM = FATALITIES + INJURIES)

victims_per_event <- stormData_standardized %>% group_by(EVTYPE) %>% 
                           summarize(total_victims = sum(COMBINED_HARM, 
                                                        na.rm = TRUE))
# sort by events
victims_per_event <- victims_per_event[order(-victims_per_event$total_victims),] 
victims_per_event
## Source: local data frame [46 x 2]
## 
##               EVTYPE total_victims
## 1            TORNADO         96979
## 2     EXCESSIVE HEAT          8428
## 3              FLOOD          7259
## 4          LIGHTNING          6046
## 5               HEAT          3037
## 6        FLASH FLOOD          2755
## 7          ICE STORM          2064
## 8  THUNDERSTORM WIND          1621
## 9       WINTER STORM          1527
## 10         HIGH WIND          1385
## ..               ...           ...
barchart(EVTYPE ~ total_victims, data = victims_per_event)

In order to show the events that have the greatest economic consequences, I combined the total number of of dolars worth of damage in both properties and crops.

The graph below shows across the United States, which types of events have the greatest economic consequences.

# Make types uppercase
stormData_standardized$CROPDMGEXP <- toupper(stormData_standardized$CROPDMGEXP)
stormData_standardized$PROPDMGEXP <- toupper(stormData_standardized$PROPDMGEXP)

stormData_standardized$CROPDMGEXP <- sub("^$", " ", stormData_standardized$CROPDMGEXP)
stormData_standardized$PROPDMGEXP <- sub("^$", " ", stormData_standardized$PROPDMGEXP)

multiplier <- c('K' = 1000, 'M' = 1000000, 'B' = 1000000000, '0' = 10, '1' = 10,
                '2' = 10, '3' = 10, '4' = 10, '5' = 10, '6' = 10, '7' = 10, 
                '8' = 10, '+' = 1, '-' = 0, '?' = 0, ' ' = 0)

stormData_standardized$PROP_DAMAGE <- multiplier[stormData_standardized$PROPDMGEXP] * stormData_standardized$PROPDMG
stormData_standardized$CROP_DAMAGE <- multiplier[stormData_standardized$CROPDMGEXP] * stormData_standardized$CROPDMG

# COMBINED_DAMAGE is the combination of property damage and crop damage
stormData_standardized <- mutate(stormData_standardized, COMBINED_DAMAGE = PROP_DAMAGE + CROP_DAMAGE)

damage_per_event <- stormData_standardized %>% group_by(EVTYPE) %>% 
                           summarize(total_damage = sum(COMBINED_DAMAGE, 
                                                        na.rm = TRUE))
# sort by events
damage_per_event <- damage_per_event[order(-damage_per_event$total_damage),] 
damage_per_event
## Source: local data frame [46 x 2]
## 
##               EVTYPE total_damage
## 1              FLOOD 150319678250
## 2  HURRICANE/TYPHOON  71913712800
## 3            TORNADO  57352117607
## 4               HAIL  18758224027
## 5        FLASH FLOOD  17562132111
## 6            DROUGHT  15018672000
## 7          ICE STORM   8967041810
## 8     TROPICAL STORM   8382236550
## 9       WINTER STORM   6715441260
## 10         HIGH WIND   5908617580
## ..               ...          ...
barchart(EVTYPE ~ total_damage, data = damage_per_event, xlab = "total damage in dollars")