Impact of Severe Weather Events on Population Health and Economy in the United States
This report highlights the major weather events in the U.S. that caused health hazards and economy loss between year 1950 and 2008. The data comes from U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This report shows that tornadoes caused majority of deaths and injuries to the population. Flood and drought caused most of the property damage and crop losses respectively. Focusing on these key weather events may help address some public healh and economic problems.
The following code shows how the storm data was read into and processed in R. Weather events were re-categorized to include similar events under a broader term (e.g. FLOOD includes flash flood and other flood). All expanded event types were checked to include as many specific yet related events as possible.
Impact of severe weather was measured in terms of health hazards and damage to economy. Health hazards were summarized as total number of fatalities and injuries, while damage to economy was summarized as property and crop losses in million dollars.
if (!file.exists("./data")) dir.create("./data")
#Download zip file and unzip it if it is not in data folder
if (!file.exists("./dataStormData.csv.bz2")) {
fileURL <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url = fileURL, destfile = "./data/StormData.csv.bz2")
# unzip("./data/repdata-data-StormData.csv.bz2", exdir = "./data")
}
storm <- read.csv('./data/StormData.csv.bz2', stringsAsFactors = F) #902297 obs. 37 cols
str(storm)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
EVT_freq <- as.data.frame(table(storm$EVTYPE), stringsAsFactors = F, row.names = NULL)
names(EVT_freq)[1] <- 'EVTYPE'
## Check top 20 event types
x <-EVT_freq[order(EVT_freq$Freq, decreasing = T), ]
str(x)
## 'data.frame': 985 obs. of 2 variables:
## $ EVTYPE: chr "HAIL" "TSTM WIND" "THUNDERSTORM WIND" "TORNADO" ...
## $ Freq : int 288661 219940 82563 60652 54277 25326 20843 20212 15754 15708 ...
head(x, 20)
## EVTYPE Freq
## 244 HAIL 288661
## 856 TSTM WIND 219940
## 760 THUNDERSTORM WIND 82563
## 834 TORNADO 60652
## 153 FLASH FLOOD 54277
## 170 FLOOD 25326
## 786 THUNDERSTORM WINDS 20843
## 359 HIGH WIND 20212
## 464 LIGHTNING 15754
## 310 HEAVY SNOW 15708
## 290 HEAVY RAIN 11723
## 972 WINTER STORM 11433
## 978 WINTER WEATHER 7026
## 216 FUNNEL CLOUD 6839
## 490 MARINE TSTM WIND 6175
## 489 MARINE THUNDERSTORM WIND 5812
## 936 WATERSPOUT 3796
## 676 STRONG WIND 3566
## 919 URBAN/SML STREAM FLD 3392
## 957 WILDFIRE 2761
##Group event types
storm1<-storm
storm1$Event <- storm1$EVTYPE
storm1$Event <- NA
storm1[grep('^(?!NON).*(TSTM|THUNDERSTORM) WIND', storm1$EVTYPE, perl = T, ignore.case = T), 'Event'] <- 'THUNDERSTORM WIND'
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('FLD|FLOOD', storm1$EVTYPE, ignore.case = T) >0, 'FLOOD', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('HAIL', storm1$EVTYPE, ignore.case = T) >0, 'HAIL', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('TORNADO', storm1$EVTYPE, ignore.case = T) >0, 'TORNADO', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('HIGH WIND', storm1$EVTYPE, ignore.case = T) >0, 'HIGH WIND', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('LIGHTNING', storm1$EVTYPE, ignore.case = T) >0, 'LIGHTNING', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('HEAVY SNOW', storm1$EVTYPE, ignore.case = T) >0, 'HEAVY SNOW', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('HEAVY RAIN', storm1$EVTYPE, ignore.case = T) >0, 'HEAVY RAIN', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('WINTER', storm1$EVTYPE, ignore.case = T) >0, 'WINTER WEATHER', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event) & regexpr('CLOUD', storm1$EVTYPE, ignore.case = T) >0, 'TORNADO CLOUD', storm1$Event)
storm1$Event <- ifelse(is.na(storm1$Event), storm1$EVTYPE, storm1$Event)
##Check grouping
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
sqldf("select distinct EVTYPE from storm1 where Event = 'WINTER WEATHER'")
## Loading required package: tcltk
## EVTYPE
## 1 WINTER STORM
## 2 WINTER STORMS
## 3 WINTER WEATHER
## 4 BLIZZARD/WINTER STORM
## 5 WINTER MIX
## 6 Winter Weather
## 7 Record Winter Snow
## 8 WINTERY MIX
## 9 WINTER WEATHER MIX
## 10 WINTER WEATHER/MIX
EVT_freq <- as.data.frame(table(storm1$Event), stringsAsFactors = F, row.names = NULL)
names(EVT_freq)[1] <- 'EVTYPE'
x <-EVT_freq[order(EVT_freq$Freq, decreasing = T), ]
x$pct <- x$Freq/sum(x$Freq)*100
head(x, 10)
## EVTYPE Freq pct
## 519 THUNDERSTORM WIND 336673 37.3128803
## 177 HAIL 289283 32.0607294
## 128 FLOOD 86122 9.5447508
## 534 TORNADO 60699 6.7271641
## 217 HIGH WIND 21950 2.4326801
## 615 WINTER WEATHER 19600 2.1722338
## 197 HEAVY SNOW 15792 1.7501998
## 276 LIGHTNING 15765 1.7472074
## 193 HEAVY RAIN 11789 1.3065543
## 535 TORNADO CLOUD 6945 0.7697022
sum(x[1:10, ]$Freq) #864618
## [1] 864618
sum(x[1:10, ]$pct) #95.8241
## [1] 95.8241
######### Top 10 events already account for >95% of the data ##########
The follwing code shows the 10 most common weather events that caused health hazards. Although the top 10 events were slightly different, at least in terms of ranking, for fatalities and injuries, tornadoes always topped the list for both types of hazards, accounting for >97,000 (62.3%) hazard cases.
#Q1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
## Number of fatalities and # injuries per event type
fat <- aggregate(FATALITIES ~ Event, data = storm1, FUN = sum )
fat <- fat[order(fat$FATALITIES, decreasing = T), ]
fat$pct <- round(fat$FATALITIES/sum(fat$FATALITIES)*100, 2)
inj <- aggregate(INJURIES ~ Event, data = storm1, FUN = sum )
inj <- inj[order(inj$INJURIES, decreasing = T), ]
inj$pct <- round(inj$INJURIES/sum(inj$INJURIES)*100, 2)
head(fat, 10)
## Event FATALITIES pct
## 534 TORNADO 5636 37.21
## 104 EXCESSIVE HEAT 1903 12.57
## 128 FLOOD 1553 10.25
## 180 HEAT 937 6.19
## 276 LIGHTNING 817 5.39
## 519 THUNDERSTORM WIND 753 4.97
## 371 RIP CURRENT 368 2.43
## 217 HIGH WIND 299 1.97
## 615 WINTER WEATHER 277 1.83
## 14 AVALANCHE 224 1.48
head(inj, 10)
## Event INJURIES pct
## 534 TORNADO 91407 65.05
## 519 THUNDERSTORM WIND 9492 6.75
## 128 FLOOD 8683 6.18
## 104 EXCESSIVE HEAT 6525 4.64
## 276 LIGHTNING 5232 3.72
## 180 HEAT 2100 1.49
## 245 ICE STORM 1975 1.41
## 615 WINTER WEATHER 1876 1.33
## 217 HIGH WIND 1523 1.08
## 177 HAIL 1371 0.98
harm <- merge(fat[1:2], inj[1:2], by = 'Event', all = T)
harm$TOTAL <- harm$FATALITIES + harm$INJURIES
harm$pct <- round(harm$TOTAL/sum(harm$TOTAL)*100, 2)
harm <- harm[order(harm$TOTAL, decreasing = T), ]
### Total # fatalities/injuries due to hazardous events ordered by total # cases
head(harm, 10)
## Event FATALITIES INJURIES TOTAL pct
## 534 TORNADO 5636 91407 97043 62.34
## 519 THUNDERSTORM WIND 753 9492 10245 6.58
## 128 FLOOD 1553 8683 10236 6.58
## 104 EXCESSIVE HEAT 1903 6525 8428 5.41
## 276 LIGHTNING 817 5232 6049 3.89
## 180 HEAT 937 2100 3037 1.95
## 615 WINTER WEATHER 277 1876 2153 1.38
## 245 ICE STORM 89 1975 2064 1.33
## 217 HIGH WIND 299 1523 1822 1.17
## 177 HAIL 15 1371 1386 0.89
## Barchart to show figures
options(scipen=99)
par(mar = c(4, 4, 4, 2))
end_pt = 0.5 + nrow(harm[1:10, ]) + nrow(harm[1:10, ])-1
barplot(harm[1:10, ]$TOTAL, names.arg = harm[1:10, ]$Event, space = 1, col = 'light green',
xlab = 'Event type', ylab = 'Number of cases',
main = 'Top 10 weather events from 1950 to 2008',
xaxt='n',
ylim = c(0, 100000))
text(x=seq(1.5, end_pt, by = 2), par("usr")[3]-0.25,
srt = -20, adj = 0, labels = harm[1:10, ]$Event,
xpd = T, cex = 0.8, offset = 0.5)
The following code shows how the property and crop damage values were calculated. The major cause for property damage was flood ($168 billion, 39.2%). The major cause for crop losses was drought ($14.0 billion, 28.5%). Flood accounted for most of the total damage combined ($180 billion, 37.8%).
#Q2. Across the United States, which types of events have the greatest economic consequences?
##Check units of damage amount
table(storm1$PROPDMGEXP)
##
## - ? + 0 1 2 3 4 5
## 465934 1 8 5 216 25 13 4 4 28
## 6 7 8 B h H K m M
## 4 5 1 40 1 6 424665 7 11330
table(storm1$CROPDMGEXP)
##
## ? 0 2 B k K m M
## 618413 7 19 1 9 21 281832 1 1994
econ <- storm1[c('Event', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]
econ$prop <- econ$PROPDMG
econ$prop <- ifelse(toupper(econ$PROPDMGEXP) == 'K', econ$PROPDMG*1000, econ$prop)
econ$prop <- ifelse(toupper(econ$PROPDMGEXP) == 'M', econ$PROPDMG*1000000, econ$prop)
econ$prop <- ifelse(toupper(econ$PROPDMGEXP) == 'B', econ$PROPDMG*1000000000, econ$prop)
econ$crop <- econ$CROPDMG
econ$crop <- ifelse(toupper(econ$CROPDMGEXP) == 'K', econ$CROPDMG*1000, econ$crop)
econ$crop <- ifelse(toupper(econ$CROPDMGEXP) == 'M', econ$CROPDMG*1000000, econ$crop)
econ$crop <- ifelse(toupper(econ$CROPDMGEXP) == 'B', econ$CROPDMG*1000000000, econ$crop)
## Check calculation
library(sqldf)
unique(econ[toupper(econ$CROPDMGEXP) == 'B', c('CROPDMG', 'crop')])
## CROPDMG crop
## 188633 0.40 400000000
## 198389 5.00 5000000000
## 199733 0.50 500000000
## 201256 0.20 200000000
## 581537 1.51 1510000000
## 639347 1.00 1000000000
## 899222 0.00 0
## Total amount of damage per event type
prop <- aggregate(prop ~ Event, data = econ, FUN = sum )
prop <- prop[order(prop$prop, decreasing = T), ]
prop$prop <- round(prop$prop/1000000, 2)
prop$pct <- round(prop$prop/sum(prop$prop)*100, 2)
crop <- aggregate(crop ~ Event, data = econ, FUN = sum )
crop <- crop[order(crop$crop, decreasing = T), ]
crop$pct <- round(crop$crop/sum(crop$crop)*100, 2)
crop$crop <- round(crop$crop/1000000, 2)
head(prop, 10)
## Event prop pct
## 128 FLOOD 167588.03 39.22
## 231 HURRICANE/TYPHOON 69305.84 16.22
## 534 TORNADO 56993.10 13.34
## 434 STORM SURGE 43323.54 10.14
## 177 HAIL 15974.57 3.74
## 223 HURRICANE 11868.32 2.78
## 519 THUNDERSTORM WIND 11366.56 2.66
## 540 TROPICAL STORM 7703.89 1.80
## 615 WINTER WEATHER 6716.80 1.57
## 217 HIGH WIND 6159.80 1.44
head(crop, 10)
## Event crop pct
## 71 DROUGHT 13972.57 28.45
## 128 FLOOD 12388.57 25.23
## 245 ICE STORM 5022.11 10.23
## 177 HAIL 3046.94 6.21
## 223 HURRICANE 2741.91 5.58
## 231 HURRICANE/TYPHOON 2607.87 5.31
## 114 EXTREME COLD 1292.97 2.63
## 519 THUNDERSTORM WIND 1255.95 2.56
## 153 FROST/FREEZE 1094.09 2.23
## 193 HEAVY RAIN 795.40 1.62
harm_econ <- merge(prop[1:2], crop[1:2], by = 'Event', all= T)
harm_econ$TOTAL <- (harm_econ$prop + harm_econ$crop)
harm_econ$pct <- round(harm_econ$TOTAL/sum(harm_econ$TOTAL)*100, 2)
harm_econ <- harm_econ[order(harm_econ$TOTAL, decreasing = T), ]
## Total damage (in millions) per events ordered by total amount
head(harm_econ, 10)
## Event prop crop TOTAL pct
## 128 FLOOD 167588.03 12388.57 179976.60 37.78
## 231 HURRICANE/TYPHOON 69305.84 2607.87 71913.71 15.09
## 534 TORNADO 56993.10 414.96 57408.06 12.05
## 434 STORM SURGE 43323.54 0.00 43323.54 9.09
## 177 HAIL 15974.57 3046.94 19021.51 3.99
## 71 DROUGHT 1046.11 13972.57 15018.68 3.15
## 223 HURRICANE 11868.32 2741.91 14610.23 3.07
## 519 THUNDERSTORM WIND 11366.56 1255.95 12622.51 2.65
## 245 ICE STORM 3944.93 5022.11 8967.04 1.88
## 540 TROPICAL STORM 7703.89 678.35 8382.24 1.76
## Barchart to show figures
options(scipen=99)
par(mar = c(4, 4, 4, 2))
end_pt = 0.5 + nrow(harm_econ[1:10, ]) + nrow(harm_econ[1:10, ])-1
barplot(harm_econ[1:10, ]$TOTAL, names.arg = harm_econ[1:10, ]$Event, space = 1, col = 'light green',
xlab = 'Event type', ylab = 'Damage amount ($ million)',
main = 'Top 10 events related to property and crop damage from 1950 to 2008',
xaxt='n',
ylim = c(0, 200000))
text(x=seq(1.5, end_pt, by = 2), par("usr")[3]-0.25,
srt = -20, adj = 0, labels = harm_econ[1:10, ]$Event,
xpd = T, cex = 0.8, offset = 0.5)