Natural Disasters in EEUU, Health And Economy Impact

Danilo Verdugo C
24-6-2016

Synopsis

Severe natural disasters have important and dangerous effects on people, property and public health. As an attempt to preparation and study of future disasters NOAA agency has prepared a historical database with severe weather events.

In the present study we will study such a database and answer two key questions:

  1. At the national level, type of event that has the greatest impact on public health.
  2. At the national level, type of event that has the greatest economic impact.

Detailed description of the NOAA database can be found in code blook

Data Processing

Set Libraries

Load libraries, set some parameter.

library(dplyr)
library(stringr)
library(maps)
library(choroplethr)
library(choroplethrMaps)
library(ggplot2)
library(gridExtra)

data("county.fips")

Data Loading

We load data from website, then unzip in local directory.

setwd("C:/Users/danilo.verdugo/Dropbox/personal/COURSERA DATA SCIENTIST/curso 5/semana 4/trabajo")
if(!file.exists("StormData.csv.bz2")){
      download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "StormData.csv.bz2")
}

stormdata <- read.csv(bzfile("StormData.csv.bz2"), sep = ",", header = TRUE, stringsAsFactors = FALSE)

str(stormdata)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...

Data Preproccesing

Select some columns that are relevant and discard the rest.

storm <- select(stormdata, COUNTYNAME, STATE, EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
#NOTE: I use counti.fips data to build column "state,county" for mapping.!!
storm$STATE <- state.name[match(storm$STATE,state.abb)]
storm <- mutate(storm,FIPSNAME=str_c(tolower(STATE),",",tolower(COUNTYNAME)))
#remove not used col.
storm[,"STATE"] <- NULL
storm[,"COUNTYNAME"] <- NULL

#remove from memory original data for preserve some memory.
rm(stormdata)

In the original database, some characters in PROPDMGEXP and CROPDMGEXP must be removed. Then convert alphanumeric exponents to numeric for the convenience of data aggregation.

storm$PROPDMGEXP[storm$PROPDMGEXP==""|storm$PROPDMGEXP=="-"|storm$PROPDMGEXP=="+"|storm$PROPDMGEXP=="?"] = 0

storm[(storm$PROPDMGEXP == "H" | storm$PROPDMGEXP == "h"), ]$PROPDMGEXP <- 2
storm[(storm$PROPDMGEXP == "K" | storm$PROPDMGEXP == "k"), ]$PROPDMGEXP <- 3
storm[(storm$PROPDMGEXP == "M" | storm$PROPDMGEXP == "m"), ]$PROPDMGEXP <- 6
storm[(storm$PROPDMGEXP == "B" | storm$PROPDMGEXP == "b"), ]$PROPDMGEXP <- 9
storm$CROPDMGEXP[storm$CROPDMGEXP==""|storm$CROPDMGEXP=="?"] = 0

storm[(storm$CROPDMGEXP == "K" | storm$CROPDMGEXP == "k"), ]$CROPDMGEXP <- 3
storm[(storm$CROPDMGEXP == "M" | storm$CROPDMGEXP == "m"), ]$CROPDMGEXP <- 6
storm[(storm$CROPDMGEXP == "B" | storm$CROPDMGEXP == "b"), ]$CROPDMGEXP <- 9
#set to numeric for next calculation 
storm <- transform(storm,EVTYPE = as.character(EVTYPE), FATALITIES = as.numeric(FATALITIES), INJURIES = as.numeric(INJURIES), PROPDMG = as.numeric(PROPDMG), PROPDMGEXP = as.numeric(PROPDMGEXP), CROPDMG = as.numeric(CROPDMG), CROPDMGEXP = as.numeric(CROPDMGEXP)) 
#make a new column with total damage
storm <- mutate(storm, DMGTOT = PROPDMG * (10 ^ PROPDMGEXP) + CROPDMG * (10 ^ CROPDMGEXP))

#check exponent
table(storm$PROPDMGEXP)
## 
##      0      1      2      3      4      5      6      7      8      9 
## 466164     25     20 424669      4     28  11341      5      1     40
table(storm$CROPDMGEXP)
## 
##      0      2      3      6      9 
## 618439      1 281853   1995      9
#remove col.
storm[,"PROPDMG"] <- NULL
storm[,"CROPDMG"] <- NULL
storm[,"PROPDMGEXP"] <- NULL
storm[,"CROPDMGEXP"] <- NULL

#building index to mapping by county (we need FIPS county code)
storm <- merge(storm, county.fips, by.x = "FIPSNAME", by.y = "polyname")
storm[,"FIPSNAME"] <- NULL

Data Proccesing

Now we calculate the total of deaths and injuries by event type and exploratory graphs to answer question number one.

fatalities <- aggregate(FATALITIES ~ EVTYPE  , data = storm, FUN = sum)
fatalities <- fatalities[order(fatalities$FATALITIES, decreasing = TRUE), ]
MaxFatalities <- fatalities[1:10, ]
MaxFatalities
##                EVTYPE FATALITIES
## 347           TORNADO       5439
## 56        FLASH FLOOD        908
## 187         LIGHTNING        774
## 361         TSTM WIND        482
## 70              FLOOD        298
## 231      RIP CURRENTS        165
## 277 THUNDERSTORM WIND        128
## 230       RIP CURRENT        105
## 135              HEAT         80
## 137         HEAT WAVE         65
injuries <- aggregate(INJURIES ~ EVTYPE, data = storm, FUN = sum)
injuries <- injuries[order(injuries$INJURIES, decreasing = TRUE), ]
MaxInjuries <- injuries[1:10, ]
MaxInjuries
##                 EVTYPE INJURIES
## 347            TORNADO    87904
## 361          TSTM WIND     6746
## 187          LIGHTNING     5032
## 70               FLOOD     4716
## 56         FLASH FLOOD     1754
## 277  THUNDERSTORM WIND     1436
## 108               HAIL     1356
## 303 THUNDERSTORM WINDS      861
## 413   WILD/FOREST FIRE      530
## 231       RIP CURRENTS      249
par(mfrow = c(1, 2), mar = c(15, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(MaxFatalities$FATALITIES, las = 3, names.arg = MaxFatalities$EVTYPE, main = "Weather Events With\n The Top 10 Highest Fatalities", ylab = "Number of Fatalities", col = "grey")
barplot(MaxInjuries$INJURIES, las = 3, names.arg = MaxInjuries$EVTYPE, main = "Weather Events With\n The Top 10 Highest Injuries", ylab = "Number of Injuries", col = "grey")

Now we calculate the total of damage by event type and exploratory graphs to answer question number dos.

par(mfrow = c(1, 1))
propdmg <- aggregate(DMGTOT ~ EVTYPE, data = storm, FUN = sum)
propdmg <- propdmg[order(propdmg$DMGTOT, decreasing = TRUE), ]
propdmgMax <- propdmg[1:10, ]
propdmgMax <- mutate(propdmgMax,DMGTOT=DMGTOT/1e9)
propdmgMax
##                        EVTYPE     DMGTOT
## 70                      FLOOD 135.460561
## 347                   TORNADO  56.028802
## 56                FLASH FLOOD  17.677727
## 108                      HAIL  17.519106
## 361                 TSTM WIND   4.975960
## 277         THUNDERSTORM WIND   3.807605
## 413          WILD/FOREST FIRE   3.084086
## 146 HEAVY RAIN/SEVERE WEATHER   2.500000
## 414                  WILDFIRE   2.357301
## 303        THUNDERSTORM WINDS   1.849197
barplot(propdmgMax$DMGTOT, las = 3, names.arg = propdmgMax$EVTYPE, main = "Weather Events With\n The Top 10 Highest Economical Damage", ylab = "Damage (U$ Billion)", col = "grey")

Results

Questions 1.
From the above tables and figure, we obtain Top 10 event types that led to most severe damages of population health, counts of fatalities and injuries. Among them, Tornado is the major one.

Questions 2.
From the above tables and figure, we obtain Top 10 event types are most expensive. Among them, Flood is the major one.

Appendix

Spatial distribution of phenomenon

datamap <- select(storm,fips,EVTYPE,FATALITIES,DMGTOT)
tornadofatal <- filter(datamap,EVTYPE=="TORNADO")
trnfatal <- aggregate(FATALITIES ~ fips, data = tornadofatal, FUN = sum)
names(trnfatal)[names(trnfatal)=="fips"] <- "region"
names(trnfatal)[names(trnfatal)=="FATALITIES"] <- "value"


choro = CountyChoropleth$new(trnfatal)
choro$title = "Fatalities by Tornado"
choro$ggplot_scale = scale_fill_brewer(name="N° Fatalities", palette=3, drop=FALSE)
suppressWarnings(g1 <- choro$render())


flooddmg <- filter(datamap,EVTYPE=="FLOOD")
flooddmg <- mutate(datamap,DMGTOT= DMGTOT/1e9)
floodfatal <- aggregate(DMGTOT ~ fips, data = flooddmg, FUN = sum)
names(floodfatal)[names(floodfatal)=="fips"] <- "region"
names(floodfatal)[names(floodfatal)=="DMGTOT"] <- "value"

choro = CountyChoropleth$new(floodfatal)
choro$title = "National Damage by Flood"
choro$ggplot_scale = scale_fill_brewer(name="U$ Billions", palette=2, drop=FALSE)
suppressWarnings(g2 <- choro$render())

grid.arrange(g1, g2, ncol=1, nrow=2)