The impact of severe weather on health and economic well-being in the United States is analyzed from 1950 - 2011, using the NOAA Storm Database. The criteria used to determine the severity of the impact on health is based on fatalities and injuries whereas the criteria used to determine the severity of the economic impact is based on damage to crops and property. Overall, the impact on health, and specifically fatalities, of the top 5 events (Tornados, Excessive Heat, Flash Flood, Heat, Lightning) was 10,267 fatalities. The impact on injuries from the top 5 events (Tornado, Tstm Heat, Flood, Excessive Heat, Lightning) was 116,847. The economic impact on property of the top 5 events (Flood, Hurricane/Typhoon, Storm Surge, Hurricane, Tornado) was $241.56 Billion dollars. The economic impact on crops of the top 5 events (Ice Storm, River Flood, Hurricane / Typhoon, Drought, Heat) was $13.41 Billion dollars.
rm(list=ls()) #Clear Workspace
cat("\014") # Clear Console
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.1.2
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd("C:/Users/werner/Dropbox/Programming")
fileurl <- "data/repdata-data-StormData.csv.bz2"
weather <- read.csv(bzfile(description=fileurl), na.strings="")
str(weather)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
## $ BGN_TIME : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
## $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: Factor w/ 29600 levels "5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13512 1872 4597 10591 4371 10093 1972 23872 24417 4597 ...
## $ STATE : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : Factor w/ 34 levels " N"," NW","E",..: NA NA NA NA NA NA NA NA NA NA ...
## $ BGN_LOCATI: Factor w/ 54428 levels "- 1 N Albion",..: NA NA NA NA NA NA NA NA NA NA ...
## $ END_DATE : Factor w/ 6662 levels "1/1/1993 0:00:00",..: NA NA NA NA NA NA NA NA NA NA ...
## $ END_TIME : Factor w/ 3646 levels " 0900CST"," 200CST",..: NA NA NA NA NA NA NA NA NA NA ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : Factor w/ 23 levels "E","ENE","ESE",..: NA NA NA NA NA NA NA NA NA NA ...
## $ END_LOCATI: Factor w/ 34505 levels "- .5 NNW","- 11 ESE Jay",..: NA NA NA NA NA NA NA NA NA NA ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 18 levels "-","?","+","0",..: 16 16 16 16 16 16 16 16 16 16 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 8 levels "?","0","2","B",..: NA NA NA NA NA NA NA NA NA NA ...
## $ WFO : Factor w/ 541 levels " CI","$AC","$AG",..: NA NA NA NA NA NA NA NA NA NA ...
## $ STATEOFFIC: Factor w/ 249 levels "ALABAMA, Central",..: NA NA NA NA NA NA NA NA NA NA ...
## $ ZONENAMES : Factor w/ 25111 levels " "| __truncated__,..: NA NA NA NA NA NA NA NA NA NA ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : Factor w/ 436780 levels "-2 at Deer Park\n",..: NA NA NA NA NA NA NA NA NA NA ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
head(weather)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 <NA> <NA> <NA> <NA> 0
## 2 TORNADO 0 <NA> <NA> <NA> <NA> 0
## 3 TORNADO 0 <NA> <NA> <NA> <NA> 0
## 4 TORNADO 0 <NA> <NA> <NA> <NA> 0
## 5 TORNADO 0 <NA> <NA> <NA> <NA> 0
## 6 TORNADO 0 <NA> <NA> <NA> <NA> 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 <NA> <NA> 14.0 100 3 0 0
## 2 NA 0 <NA> <NA> 2.0 150 2 0 0
## 3 NA 0 <NA> <NA> 0.1 123 2 0 0
## 4 NA 0 <NA> <NA> 0.0 100 2 0 0
## 5 NA 0 <NA> <NA> 0.0 150 2 0 0
## 6 NA 0 <NA> <NA> 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0 <NA> <NA> <NA> <NA>
## 2 0 2.5 K 0 <NA> <NA> <NA> <NA>
## 3 2 25.0 K 0 <NA> <NA> <NA> <NA>
## 4 2 2.5 K 0 <NA> <NA> <NA> <NA>
## 5 2 2.5 K 0 <NA> <NA> <NA> <NA>
## 6 6 2.5 K 0 <NA> <NA> <NA> <NA>
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 <NA> 1
## 2 3042 8755 0 0 <NA> 2
## 3 3340 8742 0 0 <NA> 3
## 4 3458 8626 0 0 <NA> 4
## 5 3412 8642 0 0 <NA> 5
## 6 3450 8748 0 0 <NA> 6
names(weather)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
As noted, the impact on health is determined by the rate of fatalities and injuries caused by the different weather types. As such we only need a subset of the weather data.
weather_health <- select (weather, EVTYPE, FATALITIES, INJURIES)
summary(weather_health)
## EVTYPE FATALITIES INJURIES
## HAIL :288661 Min. : 0.0000 Min. : 0.0000
## TSTM WIND :219940 1st Qu.: 0.0000 1st Qu.: 0.0000
## THUNDERSTORM WIND: 82563 Median : 0.0000 Median : 0.0000
## TORNADO : 60652 Mean : 0.0168 Mean : 0.1557
## FLASH FLOOD : 54277 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## FLOOD : 25326 Max. :583.0000 Max. :1700.0000
## (Other) :170878
fatalities_total <- aggregate(FATALITIES ~ EVTYPE, data=weather_health, FUN="sum")
fatalities_data <- arrange(fatalities_total, desc(FATALITIES))
cutoff_fatalities <- fatalities_data[5, 2]
top5_fatalities <- filter(fatalities_data, FATALITIES>=cutoff_fatalities)
top5_fatalities_sum <- sum(top5_fatalities$FATALITIES)
#
injuries_total <- aggregate(INJURIES ~ EVTYPE, data=weather_health, FUN="sum")
injuries_data <- arrange(injuries_total, desc(INJURIES))
cutoff_injuries <- injuries_data[5, 2]
top5_injuries <- filter(injuries_data, INJURIES>=cutoff_injuries)
top5_injuries_sum <- sum(top5_injuries$INJURIES)
As noted, the economic is based on damage to crops and property.
weather_property <- select(weather, EVTYPE, PROPDMG, PROPDMGEXP)
summary(weather_property)
## EVTYPE PROPDMG PROPDMGEXP
## HAIL :288661 Min. : 0.00 K :424665
## TSTM WIND :219940 1st Qu.: 0.00 M : 11330
## THUNDERSTORM WIND: 82563 Median : 0.00 0 : 216
## TORNADO : 60652 Mean : 12.06 B : 40
## FLASH FLOOD : 54277 3rd Qu.: 0.50 5 : 28
## FLOOD : 25326 Max. :5000.00 (Other): 84
## (Other) :170878 NA's :465934
weather_crop <- select(weather, EVTYPE, CROPDMG, CROPDMGEXP)
summary(weather_crop)
## EVTYPE CROPDMG CROPDMGEXP
## HAIL :288661 Min. : 0.000 K :281832
## TSTM WIND :219940 1st Qu.: 0.000 M : 1994
## THUNDERSTORM WIND: 82563 Median : 0.000 k : 21
## TORNADO : 60652 Mean : 1.527 0 : 19
## FLASH FLOOD : 54277 3rd Qu.: 0.000 B : 9
## FLOOD : 25326 Max. :990.000 (Other): 9
## (Other) :170878 NA's :618413
property_damage <- filter(weather_property, PROPDMGEXP=="B") #select billions
property_total <- aggregate(PROPDMG ~ EVTYPE, data=property_damage, FUN="sum")
property_damage_sorted <- arrange(property_total, desc(PROPDMG))
cutoff_property <- property_damage_sorted[5, 2]
top5_property <- filter(property_damage_sorted, PROPDMG>=cutoff_property)
top5_property_sum <- sum(top5_property$PROPDMG)
#
crop_damage <- filter(weather_crop, CROPDMGEXP=="B") #select billions
crop_total <- aggregate(CROPDMG ~ EVTYPE, data=crop_damage, FUN="sum")
crop_damage_sorted <- arrange(crop_total, desc(CROPDMG))
cutoff_crop <- crop_damage_sorted[5, 2]
top5_crop <- filter(crop_damage_sorted, CROPDMG>=cutoff_crop)
top5_crop_sum <- sum(top5_crop$CROPDMG)
# Figure 1
#
par(mfcol=c(1,2), mar=c(10,4,10,2))
## Barplot of fatalities
with(top5_fatalities, {
q <-barplot(top5_fatalities$FATALITIES, ylim=c(0,6000),
main="Top 5 Fatalities by Event\n (Jan 1950 - Nov 2011)",
ylab="Fatalities")
text(cex=.8, x=q-.01, y=-.01, top5_fatalities$EVTYPE, xpd=TRUE, srt=90, adj=1)
})
## Barplot of Injuries
with(top5_injuries, {
q <-barplot(log10(top5_injuries$INJURIES), ylim=c(0,5),
main="Top 5 Injuries by Event\n (Jan 1950 - Nov 2011)",
ylab="Injuries (log)")
text(cex=.8, x=q-.01, y=-.05, top5_injuries$EVTYPE, xpd=TRUE, srt=90, adj=1)
})
mtext("Figure 1", side=3, outer=TRUE, line=-3)
head(top5_fatalities, 5)
## EVTYPE FATALITIES
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
top5_fatalities_sum
## [1] 10267
head(top5_injuries, 5)
## EVTYPE INJURIES
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
top5_injuries_sum
## [1] 116847
## Figure 1
#
par(mfcol=c(1,2), mar=c(10,4,10,2))
## Barplot of Property Damage
with(top5_property, {
q <-barplot(top5_property$PROPDMG, ylim=c(0,140),
main="Top 5 Property Damage by Event\n (Jan 1950 to Nov 2011)", ylab="Property Damage (Billions)" )
text(cex=.8, x=q-.01, y=-.05, top5_property$EVTYPE, xpd=TRUE, srt=90, adj=1)
})
## Barplot of Crop Damage
with(top5_crop, {
q <-barplot(top5_crop$CROPDMG, ylim=c(0,6),
main="Top 5 Crop Damage by Event\n (Jan 1950 to Nov 2011)", ylab="Crop Damage (Billions)" )
text(cex=.8, x=q-.01, y=-.05, top5_crop$EVTYPE, xpd=TRUE, srt=90, adj=1)
})
mtext("Figure 2", side=3, outer=TRUE, line=-3)
head(top5_property, 5)
## EVTYPE PROPDMG
## 1 FLOOD 122.50
## 2 HURRICANE/TYPHOON 65.50
## 3 STORM SURGE 42.56
## 4 HURRICANE 5.70
## 5 TORNADO 5.30
top5_property_sum
## [1] 241.56
head(top5_crop, 5)
## EVTYPE CROPDMG
## 1 ICE STORM 5.00
## 2 RIVER FLOOD 5.00
## 3 HURRICANE/TYPHOON 1.51
## 4 DROUGHT 1.50
## 5 HEAT 0.40
top5_crop_sum
## [1] 13.41
Code to create the final HTML document:
library(knitr)
setwd("C:/Users/werner/Dropbox/Programming")
knit2html("repdata-010__PA2_WGC.Rmd")
browseURL("repdata-010__PA2_WGC.html")