Weathers Events with Most Severe Population Health and Economy Impact

Synopsis

Based on storm data gathered by the National oceanic and Atmosphereic Administration (NOAA), the study compares severe weathers impact on population health, as measured by injuries and fatalities, and on economy, as measured by estimated damage on properties and crops. Based on data beginning January of 2011, the study finds, with regard to their effect on population health, TORNADO has the highest impact, with Heat and Thuderstorm Wind being distant 2nd and 3rd. Additionally, FLOOD and TORNADO have the highest impact on economy.

Data Processing

library(RCurl)
library(ggplot2)
library(lattice) 
library(reshape2)

#download file
if(!file.exists("StormData.csv.bz2")) {
  Data_URL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
  download.file(url=Data_URL, destfile="StormData.csv.bz2", method='libcurl')
}

#read data, this may take awhile
StormData <- read.csv("StormData.csv.bz2", stringsAsFactors=F)

#examine data, 
# via head, to get a quick view of column names and snippet of data
head(StormData)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6
# via Summary, note Fatalities (mean 0.0168 / max 583) & Injuries (mean 0.1557 / max 17000)
summary(StormData)
##     STATE__       BGN_DATE           BGN_TIME          TIME_ZONE        
##  Min.   : 1.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.:19.0   Class :character   Class :character   Class :character  
##  Median :30.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :31.2                                                           
##  3rd Qu.:45.0                                                           
##  Max.   :95.0                                                           
##                                                                         
##      COUNTY       COUNTYNAME           STATE              EVTYPE         
##  Min.   :  0.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.: 31.0   Class :character   Class :character   Class :character  
##  Median : 75.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :100.6                                                           
##  3rd Qu.:131.0                                                           
##  Max.   :873.0                                                           
##                                                                          
##    BGN_RANGE          BGN_AZI           BGN_LOCATI       
##  Min.   :   0.000   Length:902297      Length:902297     
##  1st Qu.:   0.000   Class :character   Class :character  
##  Median :   0.000   Mode  :character   Mode  :character  
##  Mean   :   1.484                                        
##  3rd Qu.:   1.000                                        
##  Max.   :3749.000                                        
##                                                          
##    END_DATE           END_TIME           COUNTY_END COUNTYENDN    
##  Length:902297      Length:902297      Min.   :0    Mode:logical  
##  Class :character   Class :character   1st Qu.:0    NA's:902297   
##  Mode  :character   Mode  :character   Median :0                  
##                                        Mean   :0                  
##                                        3rd Qu.:0                  
##                                        Max.   :0                  
##                                                                   
##    END_RANGE          END_AZI           END_LOCATI       
##  Min.   :  0.0000   Length:902297      Length:902297     
##  1st Qu.:  0.0000   Class :character   Class :character  
##  Median :  0.0000   Mode  :character   Mode  :character  
##  Mean   :  0.9862                                        
##  3rd Qu.:  0.0000                                        
##  Max.   :925.0000                                        
##                                                          
##      LENGTH              WIDTH                F               MAG         
##  Min.   :   0.0000   Min.   :   0.000   Min.   :0.0      Min.   :    0.0  
##  1st Qu.:   0.0000   1st Qu.:   0.000   1st Qu.:0.0      1st Qu.:    0.0  
##  Median :   0.0000   Median :   0.000   Median :1.0      Median :   50.0  
##  Mean   :   0.2301   Mean   :   7.503   Mean   :0.9      Mean   :   46.9  
##  3rd Qu.:   0.0000   3rd Qu.:   0.000   3rd Qu.:1.0      3rd Qu.:   75.0  
##  Max.   :2315.0000   Max.   :4400.000   Max.   :5.0      Max.   :22000.0  
##                                         NA's   :843563                    
##    FATALITIES          INJURIES            PROPDMG       
##  Min.   :  0.0000   Min.   :   0.0000   Min.   :   0.00  
##  1st Qu.:  0.0000   1st Qu.:   0.0000   1st Qu.:   0.00  
##  Median :  0.0000   Median :   0.0000   Median :   0.00  
##  Mean   :  0.0168   Mean   :   0.1557   Mean   :  12.06  
##  3rd Qu.:  0.0000   3rd Qu.:   0.0000   3rd Qu.:   0.50  
##  Max.   :583.0000   Max.   :1700.0000   Max.   :5000.00  
##                                                          
##   PROPDMGEXP           CROPDMG         CROPDMGEXP       
##  Length:902297      Min.   :  0.000   Length:902297     
##  Class :character   1st Qu.:  0.000   Class :character  
##  Mode  :character   Median :  0.000   Mode  :character  
##                     Mean   :  1.527                     
##                     3rd Qu.:  0.000                     
##                     Max.   :990.000                     
##                                                         
##      WFO             STATEOFFIC         ZONENAMES            LATITUDE   
##  Length:902297      Length:902297      Length:902297      Min.   :   0  
##  Class :character   Class :character   Class :character   1st Qu.:2802  
##  Mode  :character   Mode  :character   Mode  :character   Median :3540  
##                                                           Mean   :2875  
##                                                           3rd Qu.:4019  
##                                                           Max.   :9706  
##                                                           NA's   :47    
##    LONGITUDE        LATITUDE_E     LONGITUDE_       REMARKS         
##  Min.   :-14451   Min.   :   0   Min.   :-14455   Length:902297     
##  1st Qu.:  7247   1st Qu.:   0   1st Qu.:     0   Class :character  
##  Median :  8707   Median :   0   Median :     0   Mode  :character  
##  Mean   :  6940   Mean   :1452   Mean   :  3509                     
##  3rd Qu.:  9605   3rd Qu.:3549   3rd Qu.:  8735                     
##  Max.   : 17124   Max.   :9706   Max.   :106220                     
##                   NA's   :40                                        
##      REFNUM      
##  Min.   :     1  
##  1st Qu.:225575  
##  Median :451149  
##  Mean   :451149  
##  3rd Qu.:676723  
##  Max.   :902297  
## 

Studying weather impact on population health

# via unique, take a look at type of events in table for the purpose of this analysis what's "harmful with respect to population health" will be defined by mortality (fatality) & injuries an event has caused

# via aggregate, do all storms have equal amout of historical data? (no)
StormData.EarliestTracking <- aggregate(BGN_DATE ~ EVTYPE, data=StormData, FUN=min)
head(StormData.EarliestTracking)
##                  EVTYPE           BGN_DATE
## 1    HIGH SURF ADVISORY  12/3/2001 0:00:00
## 2         COASTAL FLOOD 12/13/1996 0:00:00
## 3           FLASH FLOOD   4/2/2000 0:00:00
## 4             LIGHTNING  8/24/1998 0:00:00
## 5             TSTM WIND   6/4/1999 0:00:00
## 6       TSTM WIND (G45)   1/8/1998 0:00:00
tail(StormData.EarliestTracking)
##                 EVTYPE           BGN_DATE
## 980 WINTER WEATHER/MIX   1/1/2003 0:00:00
## 981        WINTERY MIX  3/21/1998 0:00:00
## 982         Wintry mix   1/8/1997 0:00:00
## 983         Wintry Mix 11/22/1996 0:00:00
## 984         WINTRY MIX  1/13/1999 0:00:00
## 985                WND 11/22/2001 0:00:00
# since not all weather events are not track w/ the same start time, naturally making weathers with longer tracking history more impactful overall. as a result, this study will  use max of BGN_DATE to find a more recent cutoff date for more even analysis
StormData.EarliestTracking$BGN_DATE <- as.Date(StormData.EarliestTracking$BGN_DATE , "%m/%d/%Y")
max(StormData.EarliestTracking$BGN_DATE)
## [1] "2011-01-01"
# create smaller dataset for analysis: BGN_DATE (>= 1/1/2011), EVTYPE, INJURIES, FATALITIES
StormRecent <- subset(StormData,as.Date(BGN_DATE,"%m/%d/%Y")>= as.Date(max(StormData.EarliestTracking$BGN_DATE)))

# further filter out events causing no injuries and no fatalities (i.e. keeping inj > 0 OR fat > 0)
StormPopEffect <- subset(StormRecent,(INJURIES>0 | FATALITIES>0))

# aggregat data into two sets, 
# one w/ sum of injuries by event
StormPopEffect.INJ <- aggregate(INJURIES ~ EVTYPE, data=StormPopEffect, FUN=sum)
# one w/ sum of fatalities by event
StormPopEffect.FAT <- aggregate(FATALITIES ~ EVTYPE, data=StormPopEffect, FUN=sum)

# merge injuries and fatalities by event
StormPopEffect.Overall <- merge(StormPopEffect.FAT,StormPopEffect.INJ,by=c("EVTYPE"))
nrow(StormPopEffect.Overall)
## [1] 28
#28 events are too many to plot, take the top six events from each set
StormPopEffect.Overall$SUM <- StormPopEffect.Overall$FATALITIES+StormPopEffect.Overall$INJURIES
StormPopEffect.Overall <- head(StormPopEffect.Overall[order(-StormPopEffect.Overall$SUM),])

# melt it for charting
StormPopEffect.Overall <- melt(StormPopEffect.Overall, id.vars = "EVTYPE", measure.vars = c("FATALITIES", "INJURIES"))

Studying weather impact on economy

# create subset for storm effect by property & crop damage, still looking at post 9/9/1998 data
StormEconEffect <- subset(StormRecent,(PROPDMG>0 | CROPDMG>0))

# need to reformat data by PROPDMGEXP & CROPDMGEXP by following description - 
#
# "Estimates should be rounded to three significant digits, followed by an alphabetical 
#  character signifying the magnitude of the number, i.e., 1.55B for $1,550,000,000. 
#  Alphabetical characters used to signify magnitude include “K” for thousands, “M” for 
#  millions, and “B” for billions." 
#  - https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf, Pg. 12
#

# via unique, take a look at types of estimates used in table 
# K & M used for PROPDMG
unique(unlist(StormEconEffect$PROPDMGEXP, use.names = FALSE))
## [1] "K" "M" "B"
# K used for CROPDMG
unique(unlist(StormEconEffect$CROPDMGEXP, use.names = FALSE))
## [1] "K" "M"
# determine if the data set make damage estimate below K (thousands)?  (no)
subset(StormEconEffect,(PROPDMGEXP==''))$PROPDMG
## numeric(0)
subset(StormEconEffect,(CROPDMGEXP==''))$CROPDMG
## numeric(0)
# Since damage estimates are made by thousands (K) or millions (M), data will be normalized by 
# the lowest denominator, thousands (K). 
# Further, normalization is only needed for PROPDMG as CROPDMG are only in the thousands range

StormEconEffect.Norm <- within(StormEconEffect, PROPDMG[PROPDMGEXP=='M']<-PROPDMG[PROPDMGEXP=='M']*1000)

StormEconEffect.Norm <- aggregate(PROPDMG ~ EVTYPE, data=StormEconEffect.Norm, FUN=sum)

StormEconEffect.Norm <- cbind(StormEconEffect.Norm, setNames(aggregate(CROPDMG ~ EVTYPE, data=StormEconEffect, FUN=sum), list("EVT2","CROPDMG")))
nrow(StormEconEffect.Norm)
## [1] 38
#38 rows are too much to plot, take top 6 instead
StormEconEffect.Norm$TTLDMG <- StormEconEffect.Norm$PROPDMG + StormEconEffect.Norm$CROPDMG
StormEconEffect.Norm <- head(StormEconEffect.Norm[order(-StormEconEffect.Norm$TTLDMG),])
StormEconEffect.Norm <- melt(StormEconEffect.Norm, id.vars = "EVTYPE", measure.vars = c("CROPDMG", "PROPDMG"))

Results

Weathers with Most Severe Impact on Population Health

## Chart Populaton Health Impact Result
ggplot(data=StormPopEffect.Overall, aes(x=EVTYPE, y=value, fill=variable)) + 
  geom_bar(stat="identity", position=position_dodge()) +
  ylab("Frequency") +
  xlab("Event Type") +
  scale_fill_discrete(name="Effect to\nPopulation Health") +
  ggtitle("Severe Weather Events Most Harmful to Population Health\n(normalized, since 1/1/2011)") +
  theme(plot.title = element_text(lineheight=.8, face="bold"), axis.text.x = element_text(angle = 30, hjust = 1) )

Weathers with Most Severe Impact on Economy

## Chart Economy Impact Result
ggplot(data=StormEconEffect.Norm, aes(x=EVTYPE, y=value, fill=variable)) + 
  geom_bar(stat="identity") +
  ylab("Damage Est. (K)") +
  xlab("Event Type") +
  scale_fill_discrete(name="Effect to\nEconomy") +
  ggtitle("Severe Weather Events Most Harmful to Economy\n(normalized, since 1/1/2011)") +
  theme(plot.title = element_text(lineheight=.8, face="bold"), axis.text.x = element_text(angle = 30, hjust = 1) )