This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. This project analyzes the effects of Storms and other severe weather events which cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

The analysis aims to answer these two questions:

Data Processing

echo = TRUE  # Always make code visible
options(scipen = 1)  # Turn off scientific notations for numbers
library(utils)
library(ggplot2)
library(plyr)
require(gridExtra)
## Loading required package: gridExtra
## Warning in library(package, lib.loc = lib.loc, character.only = TRUE,
## logical.return = TRUE, : there is no package called 'gridExtra'
library(reshape2)

Download and extract (.bz2) the storm data inside the data folder. Read the data from the working directory.

rawdata <- read.csv("data/repdata-data-StormData.csv", header = TRUE, stringsAsFactors = FALSE)
dim(rawdata)
## [1] 902297     37

There are 902297 rows and 37 columns in total.

if (dim(rawdata)[2] == 37) {
rawdata$year <- as.numeric(format(as.Date(rawdata$BGN_DATE, format = "%m/%d/%Y %H:%M:%S"), "%Y"))
}
hist(rawdata$year, 
     breaks = 30,
     col = "purple")

Normalizing the data.

convert the data to lower case

colnames(rawdata) <- tolower(colnames(rawdata))
rawdata$evtype <- tolower(rawdata$evtype)
rawdata$cropdmgexp <- tolower(rawdata$cropdmgexp)
rawdata$propdmgexp <- tolower(rawdata$propdmgexp)
set the multiplier e.g. h is 100, k is 1000 etc.
rawdata$cropdmgmag <- 1
rawdata$cropdmgmag[rawdata$cropdmgexp == 'h'] <- 1e2
rawdata$cropdmgmag[rawdata$cropdmgexp == 'k'] <- 1e3
rawdata$cropdmgmag[rawdata$cropdmgexp == 'm'] <- 1e6
rawdata$cropdmgmag[rawdata$cropdmgexp == 'b'] <- 1e9

rawdata$propdmgmag <- 1
rawdata$propdmgmag[rawdata$propdmgexp == 'h'] <- 1e2
rawdata$propdmgmag[rawdata$propdmgexp == 'k'] <- 1e3
rawdata$propdmgmag[rawdata$propdmgexp == 'm'] <- 1e6
rawdata$propdmgmag[rawdata$propdmgexp == 'b'] <- 1e9

#get the damage value by applying multiplier
rawdata$cropdmgval <- rawdata$cropdmg * rawdata$cropdmgmag
rawdata$propdmgval <- rawdata$propdmg * rawdata$propdmgmag

Unique events in the dataset.

unique(rawdata$evtype)[1:25]
##  [1] "tornado"                      "tstm wind"                   
##  [3] "hail"                         "freezing rain"               
##  [5] "snow"                         "ice storm/flash flood"       
##  [7] "snow/ice"                     "winter storm"                
##  [9] "hurricane opal/high winds"    "thunderstorm winds"          
## [11] "record cold"                  "hurricane erin"              
## [13] "hurricane opal"               "heavy rain"                  
## [15] "lightning"                    "thunderstorm wind"           
## [17] "dense fog"                    "rip current"                 
## [19] "thunderstorm wins"            "flash flood"                 
## [21] "flash flooding"               "high winds"                  
## [23] "funnel cloud"                 "tornado f0"                  
## [25] "thunderstorm winds lightning"
  • How does the data look like?
# summary(rawdata) # Uncomment the summary output
head(rawdata, n = 3)
##   state__          bgn_date bgn_time time_zone county countyname state
## 1       1 4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1 4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1 2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
##    evtype bgn_range bgn_azi bgn_locati end_date end_time county_end
## 1 tornado         0                                               0
## 2 tornado         0                                               0
## 3 tornado         0                                               0
##   countyendn end_range end_azi end_locati length width f mag fatalities
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
##   injuries propdmg propdmgexp cropdmg cropdmgexp wfo stateoffic zonenames
## 1       15    25.0          k       0                                    
## 2        0     2.5          k       0                                    
## 3        2    25.0          k       0                                    
##   latitude longitude latitude_e longitude_ remarks refnum year cropdmgmag
## 1     3040      8812       3051       8806              1 1950          1
## 2     3042      8755          0          0              2 1950          1
## 3     3340      8742          0          0              3 1951          1
##   propdmgmag cropdmgval propdmgval
## 1       1000          0      25000
## 2       1000          0       2500
## 3       1000          0      25000

Results

Aggregate the data to form complete dataset.

newStormData <- aggregate(cbind(injuries, fatalities, cropdmgval, propdmgval) ~ evtype, rawdata, sum)

Public Health Impact

#calculate the health impact (incl. injuries and fatalities) due to storms
newStormData$hlthdmg <- newStormData$injuries   + newStormData$fatalities
public_health_impact_data <- newStormData[order(newStormData$hlthdmg, 
                                                decreasing = TRUE), 
                                                c('evtype', 'injuries', 'fatalities', 'hlthdmg')]
major_health_impact <- public_health_impact_data[1:10,]

Economic Impact

#calculate economic impact (incl. crop and property damage) due to storms
newStormData$econdmg <- newStormData$cropdmgval + newStormData$propdmgval
economic_impact_data <- newStormData[order(newStormData$econdmg, 
                                        decreasing = TRUE), 
                                        c('evtype', 'cropdmgval', 'propdmgval', 'econdmg')]
major_economic_impact <- economic_impact_data[1:10,]
# breakdown by evtype and variable,value pair.

major_health_impact <- major_health_impact[, -4]
major_economic_impact <- major_economic_impact[, -4]

major_health_impact <- melt(major_health_impact, id.vars = 'evtype', variable.name = 'dmgctgry')
major_economic_impact <- melt(major_economic_impact, id.vars = 'evtype', variable.name = 'dmgctgry')

View the Health Impact Data

str(major_health_impact)
## 'data.frame':    20 obs. of  3 variables:
##  $ evtype  : chr  "tornado" "excessive heat" "tstm wind" "flood" ...
##  $ dmgctgry: Factor w/ 2 levels "injuries","fatalities": 1 1 1 1 1 1 1 1 1 1 ...
##  $ value   : num  91346 6525 6957 6789 5230 ...

View the Economic Impact Data

str(major_economic_impact)
## 'data.frame':    20 obs. of  3 variables:
##  $ evtype  : chr  "flood" "hurricane/typhoon" "tornado" "storm surge" ...
##  $ dmgctgry: Factor w/ 2 levels "cropdmgval","propdmgval": 1 1 1 1 1 1 1 1 1 1 ...
##  $ value   : num  5.66e+09 2.61e+09 4.15e+08 5.00e+03 3.03e+09 ...

Intepreting the results

Events (as indicated in the EVTYPE variable) most harmful with respect to population health.

ggplot(major_health_impact, 
        aes(x = reorder(evtype, value), y = value, fill = dmgctgry)) + 
        geom_bar(stat = "identity", position = "stack") + 
        labs(title = 'Health Impacting Weather Events',
        color = "black",
        x = 'Weather Events', 
        y = 'Most Health Impact | Health Damage (in thousands)') + coord_flip() 

Events have the greatest economic consequences.

ggplot(major_economic_impact, 
        aes(x = reorder(evtype, value), y = value, fill = dmgctgry)) + 
        geom_bar(stat = "identity", position = "stack") + 
        labs(title = 'Health Impacting Weather Events', 
        color = "black",
        x = 'Weather Events', 
        y = 'Most Economic Impact | Property and Crop Damage (in thousands)') + coord_flip() 

economic_impact_data[1:5,]
##                evtype cropdmgval   propdmgval      econdmg
## 148             flood 5661968450 144657709807 150319678257
## 367 hurricane/typhoon 2607872800  69305840000  71913712800
## 754           tornado  414953270  56937160779  57352114049
## 595       storm surge       5000  43323536000  43323541000
## 206              hail 3025954473  15732267543  18758222016

Floods have the greatest economic damage of all weather events accounting for a total economic damage of 150,319,678,257 USD

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.