Reproducible Research Peer Assessment 2

Title

Impact of Severe Weather Events on Population Health and Economy in the United States

Synopsis

This report highlights the major weather events in the U.S. that caused health hazards and economy loss between year 1950 and 2008. The data comes from U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This report shows that tornadoes caused majority of deaths and injuries to the population. Flood and drought caused most of the property damage and crop losses respectively. Focusing on these key weather events may help address some public healh and economic problems.

Data processing

The following code shows how the storm data was read into and processed in R. Weather events were re-categorized to include similar events under a broader term (e.g. FLOOD includes flash flood and other flood). All expanded event types were checked to include as many specific yet related events as possible.

Impact of severe weather was measured in terms of health hazards and damage to economy. Health hazards were summarized as total number of fatalities and injuries, while damage to economy was summarized as property and crop losses in million dollars.

if (!file.exists("./data")) dir.create("./data")

#Download zip file and unzip it if it is not in data folder
if (!file.exists("./dataStormData.csv.bz2")) {
  
  fileURL <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
  download.file(url = fileURL, destfile = "./data/StormData.csv.bz2")
  
  # unzip("./data/repdata-data-StormData.csv.bz2", exdir = "./data")
}
storm <- read.csv('./data/StormData.csv.bz2', stringsAsFactors = F)  #902297 obs. 37 cols
str(storm)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
EVT_freq <- as.data.frame(table(storm$EVTYPE), stringsAsFactors = F, row.names = NULL)
names(EVT_freq)[1] <- 'EVTYPE'

## Check top 20 event types
x <-EVT_freq[order(EVT_freq$Freq, decreasing = T), ]
str(x)
## 'data.frame':    985 obs. of  2 variables:
##  $ EVTYPE: chr  "HAIL" "TSTM WIND" "THUNDERSTORM WIND" "TORNADO" ...
##  $ Freq  : int  288661 219940 82563 60652 54277 25326 20843 20212 15754 15708 ...
head(x, 20)
##                       EVTYPE   Freq
## 244                     HAIL 288661
## 856                TSTM WIND 219940
## 760        THUNDERSTORM WIND  82563
## 834                  TORNADO  60652
## 153              FLASH FLOOD  54277
## 170                    FLOOD  25326
## 786       THUNDERSTORM WINDS  20843
## 359                HIGH WIND  20212
## 464                LIGHTNING  15754
## 310               HEAVY SNOW  15708
## 290               HEAVY RAIN  11723
## 972             WINTER STORM  11433
## 978           WINTER WEATHER   7026
## 216             FUNNEL CLOUD   6839
## 490         MARINE TSTM WIND   6175
## 489 MARINE THUNDERSTORM WIND   5812
## 936               WATERSPOUT   3796
## 676              STRONG WIND   3566
## 919     URBAN/SML STREAM FLD   3392
## 957                 WILDFIRE   2761
##Group event types
storm1<-storm
storm1$Event <- storm1$EVTYPE
storm1$Event <- NA

storm1[grep('^(?!NON).*(TSTM|THUNDERSTORM) WIND', storm1$EVTYPE, perl = T, ignore.case = T), 'Event'] <- 'THUNDERSTORM WIND'
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('FLD|FLOOD', storm1$EVTYPE, ignore.case = T) >0, 'FLOOD', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('HAIL', storm1$EVTYPE, ignore.case = T) >0, 'HAIL', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('TORNADO', storm1$EVTYPE, ignore.case = T) >0, 'TORNADO', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('HIGH WIND', storm1$EVTYPE, ignore.case = T) >0, 'HIGH WIND', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('LIGHTNING', storm1$EVTYPE, ignore.case = T) >0, 'LIGHTNING', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('HEAVY SNOW', storm1$EVTYPE, ignore.case = T) >0, 'HEAVY SNOW', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('HEAVY RAIN', storm1$EVTYPE, ignore.case = T) >0, 'HEAVY RAIN', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('WINTER', storm1$EVTYPE, ignore.case = T) >0, 'WINTER WEATHER', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('CLOUD', storm1$EVTYPE, ignore.case = T) >0, 'TORNADO CLOUD', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event), storm1$EVTYPE, storm1$Event)

##Check grouping
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
sqldf("select distinct EVTYPE from storm1 where Event = 'WINTER WEATHER'")
## Loading required package: tcltk
##                   EVTYPE
## 1           WINTER STORM
## 2          WINTER STORMS
## 3         WINTER WEATHER
## 4  BLIZZARD/WINTER STORM
## 5             WINTER MIX
## 6         Winter Weather
## 7     Record Winter Snow
## 8            WINTERY MIX
## 9     WINTER WEATHER MIX
## 10    WINTER WEATHER/MIX
EVT_freq <- as.data.frame(table(storm1$Event), stringsAsFactors = F, row.names = NULL)
names(EVT_freq)[1] <- 'EVTYPE'
x <-EVT_freq[order(EVT_freq$Freq, decreasing = T), ]
x$pct <- x$Freq/sum(x$Freq)*100
head(x, 10)
##                EVTYPE   Freq        pct
## 519 THUNDERSTORM WIND 336673 37.3128803
## 177              HAIL 289283 32.0607294
## 128             FLOOD  86122  9.5447508
## 534           TORNADO  60699  6.7271641
## 217         HIGH WIND  21950  2.4326801
## 615    WINTER WEATHER  19600  2.1722338
## 197        HEAVY SNOW  15792  1.7501998
## 276         LIGHTNING  15765  1.7472074
## 193        HEAVY RAIN  11789  1.3065543
## 535     TORNADO CLOUD   6945  0.7697022
sum(x[1:10, ]$Freq) #864618
## [1] 864618
sum(x[1:10, ]$pct)  #95.8241
## [1] 95.8241
######### Top 10 events already account for >95% of the data    ##########

Results

1. Health hazards

The follwing code shows the 10 most common weather events that caused health hazards. Although the top 10 events were slightly different, at least in terms of ranking, for fatalities and injuries, tornadoes always topped the list for both types of hazards, accounting for >97,000 (62.3%) hazard cases.

#Q1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

## Number of fatalities and # injuries per event type
fat <- aggregate(FATALITIES ~ Event, data = storm1, FUN = sum )
fat <- fat[order(fat$FATALITIES, decreasing = T), ]
fat$pct <- round(fat$FATALITIES/sum(fat$FATALITIES)*100, 2)
inj <- aggregate(INJURIES ~ Event, data = storm1, FUN = sum )
inj <- inj[order(inj$INJURIES, decreasing = T), ]
inj$pct <- round(inj$INJURIES/sum(inj$INJURIES)*100, 2)

head(fat, 10)
##                 Event FATALITIES   pct
## 534           TORNADO       5636 37.21
## 104    EXCESSIVE HEAT       1903 12.57
## 128             FLOOD       1553 10.25
## 180              HEAT        937  6.19
## 276         LIGHTNING        817  5.39
## 519 THUNDERSTORM WIND        753  4.97
## 371       RIP CURRENT        368  2.43
## 217         HIGH WIND        299  1.97
## 615    WINTER WEATHER        277  1.83
## 14          AVALANCHE        224  1.48
head(inj, 10)
##                 Event INJURIES   pct
## 534           TORNADO    91407 65.05
## 519 THUNDERSTORM WIND     9492  6.75
## 128             FLOOD     8683  6.18
## 104    EXCESSIVE HEAT     6525  4.64
## 276         LIGHTNING     5232  3.72
## 180              HEAT     2100  1.49
## 245         ICE STORM     1975  1.41
## 615    WINTER WEATHER     1876  1.33
## 217         HIGH WIND     1523  1.08
## 177              HAIL     1371  0.98
harm <- merge(fat[1:2], inj[1:2], by = 'Event', all = T)
harm$TOTAL <- harm$FATALITIES + harm$INJURIES
harm$pct <- round(harm$TOTAL/sum(harm$TOTAL)*100, 2)
harm <- harm[order(harm$TOTAL, decreasing = T), ]

### Total # fatalities/injuries due to hazardous events ordered by total # cases
head(harm, 10)
##                 Event FATALITIES INJURIES TOTAL   pct
## 534           TORNADO       5636    91407 97043 62.34
## 519 THUNDERSTORM WIND        753     9492 10245  6.58
## 128             FLOOD       1553     8683 10236  6.58
## 104    EXCESSIVE HEAT       1903     6525  8428  5.41
## 276         LIGHTNING        817     5232  6049  3.89
## 180              HEAT        937     2100  3037  1.95
## 615    WINTER WEATHER        277     1876  2153  1.38
## 245         ICE STORM         89     1975  2064  1.33
## 217         HIGH WIND        299     1523  1822  1.17
## 177              HAIL         15     1371  1386  0.89
## Barchart to show figures
options(scipen=99)
par(mar = c(4, 4, 4, 2))
end_pt = 0.5 + nrow(harm[1:10, ]) + nrow(harm[1:10, ])-1
barplot(harm[1:10, ]$TOTAL, names.arg = harm[1:10, ]$Event, space = 1, col = 'light green',
        xlab = 'Event type', ylab = 'Number of cases',
        main = 'Top 10 weather events from 1950 to 2008',
        xaxt='n',
        ylim = c(0, 100000))
text(x=seq(1.5, end_pt, by = 2), par("usr")[3]-0.25,
     srt = -20, adj = 0, labels = harm[1:10, ]$Event,
     xpd = T, cex = 0.8, offset = 0.5)

2. Monetary losses in properties and crops

The following code shows how the property and crop damage values were calculated. The major cause for property damage was flood ($168 billion, 39.2%). The major cause for crop losses was drought ($14.0 billion, 28.5%). Flood accounted for most of the total damage combined ($180 billion, 37.8%).

#Q2. Across the United States, which types of events have the greatest economic consequences?

##Check units of damage amount
table(storm1$PROPDMGEXP)
## 
##             -      ?      +      0      1      2      3      4      5 
## 465934      1      8      5    216     25     13      4      4     28 
##      6      7      8      B      h      H      K      m      M 
##      4      5      1     40      1      6 424665      7  11330
table(storm1$CROPDMGEXP)
## 
##             ?      0      2      B      k      K      m      M 
## 618413      7     19      1      9     21 281832      1   1994
econ <- storm1[c('Event', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]

econ$prop <- econ$PROPDMG
econ$prop <- ifelse(toupper(econ$PROPDMGEXP) == 'K', econ$PROPDMG*1000, econ$prop)
econ$prop <- ifelse(toupper(econ$PROPDMGEXP) == 'M', econ$PROPDMG*1000000, econ$prop)
econ$prop <- ifelse(toupper(econ$PROPDMGEXP) == 'B', econ$PROPDMG*1000000000, econ$prop)

econ$crop <- econ$CROPDMG
econ$crop <- ifelse(toupper(econ$CROPDMGEXP) == 'K', econ$CROPDMG*1000, econ$crop)
econ$crop <- ifelse(toupper(econ$CROPDMGEXP) == 'M', econ$CROPDMG*1000000, econ$crop)
econ$crop <- ifelse(toupper(econ$CROPDMGEXP) == 'B', econ$CROPDMG*1000000000, econ$crop)

## Check calculation
library(sqldf)
unique(econ[toupper(econ$CROPDMGEXP) == 'B', c('CROPDMG', 'crop')])
##        CROPDMG       crop
## 188633    0.40  400000000
## 198389    5.00 5000000000
## 199733    0.50  500000000
## 201256    0.20  200000000
## 581537    1.51 1510000000
## 639347    1.00 1000000000
## 899222    0.00          0
## Total amount of damage per event type
prop <- aggregate(prop ~ Event, data = econ, FUN = sum )
prop <- prop[order(prop$prop, decreasing = T), ]
prop$prop <- round(prop$prop/1000000, 2)
prop$pct <- round(prop$prop/sum(prop$prop)*100, 2)
crop <- aggregate(crop ~ Event, data = econ, FUN = sum )
crop <- crop[order(crop$crop, decreasing = T), ]
crop$pct <- round(crop$crop/sum(crop$crop)*100, 2)
crop$crop <- round(crop$crop/1000000, 2)

head(prop, 10)
##                 Event      prop   pct
## 128             FLOOD 167588.03 39.22
## 231 HURRICANE/TYPHOON  69305.84 16.22
## 534           TORNADO  56993.10 13.34
## 434       STORM SURGE  43323.54 10.14
## 177              HAIL  15974.57  3.74
## 223         HURRICANE  11868.32  2.78
## 519 THUNDERSTORM WIND  11366.56  2.66
## 540    TROPICAL STORM   7703.89  1.80
## 615    WINTER WEATHER   6716.80  1.57
## 217         HIGH WIND   6159.80  1.44
head(crop, 10)
##                 Event     crop   pct
## 71            DROUGHT 13972.57 28.45
## 128             FLOOD 12388.57 25.23
## 245         ICE STORM  5022.11 10.23
## 177              HAIL  3046.94  6.21
## 223         HURRICANE  2741.91  5.58
## 231 HURRICANE/TYPHOON  2607.87  5.31
## 114      EXTREME COLD  1292.97  2.63
## 519 THUNDERSTORM WIND  1255.95  2.56
## 153      FROST/FREEZE  1094.09  2.23
## 193        HEAVY RAIN   795.40  1.62
harm_econ <- merge(prop[1:2], crop[1:2], by = 'Event', all= T)
harm_econ$TOTAL <- (harm_econ$prop + harm_econ$crop)
harm_econ$pct <- round(harm_econ$TOTAL/sum(harm_econ$TOTAL)*100, 2)
harm_econ <- harm_econ[order(harm_econ$TOTAL, decreasing = T), ]

## Total damage (in millions) per events ordered by total amount
head(harm_econ, 10)
##                 Event      prop     crop     TOTAL   pct
## 128             FLOOD 167588.03 12388.57 179976.60 37.78
## 231 HURRICANE/TYPHOON  69305.84  2607.87  71913.71 15.09
## 534           TORNADO  56993.10   414.96  57408.06 12.05
## 434       STORM SURGE  43323.54     0.00  43323.54  9.09
## 177              HAIL  15974.57  3046.94  19021.51  3.99
## 71            DROUGHT   1046.11 13972.57  15018.68  3.15
## 223         HURRICANE  11868.32  2741.91  14610.23  3.07
## 519 THUNDERSTORM WIND  11366.56  1255.95  12622.51  2.65
## 245         ICE STORM   3944.93  5022.11   8967.04  1.88
## 540    TROPICAL STORM   7703.89   678.35   8382.24  1.76
## Barchart to show figures
options(scipen=99)
par(mar = c(4, 4, 4, 2))
end_pt = 0.5 + nrow(harm_econ[1:10, ]) + nrow(harm_econ[1:10, ])-1
barplot(harm_econ[1:10, ]$TOTAL, names.arg = harm_econ[1:10, ]$Event, space = 1, col = 'light green',
        xlab = 'Event type', ylab = 'Damage amount ($ million)',
        main = 'Top 10 events related to property and crop damage from 1950 to 2008',
        xaxt='n',
        ylim = c(0, 200000))
text(x=seq(1.5, end_pt, by = 2), par("usr")[3]-0.25,
     srt = -20, adj = 0, labels = harm_econ[1:10, ]$Event,
     xpd = T, cex = 0.8, offset = 0.5)