Title

Identification of storm events in the United States that are the most harmful to the population’s health and have the greatest economic consequences.

Synopsis

This analysis considers data from the National Weather Service - NOAA Storm Data Archive for the years 1950 - 2011 for the purpose of answering 2 questions:
1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health? and
2. Across the United States, which types of events have the greatest economic consequences?

Based on the analysis documented below:

TORNADOS AND EXCESSIVE HEAT are the most harmful to the US populations.

FLOODING results in the most property damage, and FLOODING and DROUGHT result in the most crop damage.

These conclusions are based in the information in the database as downloaded in March 2017.

Data Processing

Load necessary libraries

## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
## Loading required package: knitr

Download and read the storm data into a data.frame

if(!file.exists("StormData.csv.bz2")){
        download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "StormData.csv.bz2")
        }
dfNOAA <- read.csv("StormData.csv.bz2", header=TRUE)

Summarize Fatality and Injury totals by Year and Event Type

dfEvent <- dfNOAA %>% select(BGN_DATE, EVTYPE, FATALITIES, INJURIES) %>% 
        mutate(YEAR = year(as.Date(BGN_DATE, format="%m/%d/%Y %H:%M:%S"))) %>% 
        group_by(YEAR, EVTYPE) %>% summarize(sum(FATALITIES), sum(INJURIES))
names(dfEvent)<- c("YEAR", "EVTYPE", "Total_FATALITIES", 'Total_INJURIES')

Summarize the number of Event Type classifications by Year

dfEventTypeCount <- data.frame(YEAR=unique(dfEvent$YEAR), NUM_EVTYPE = group_size(dfEvent))

Select the Event Type with the most fatalities for a specific year

dfMaxFatalities <- dfEvent %>% group_by(YEAR) %>% filter(Total_FATALITIES == max(Total_FATALITIES)) %>% arrange(desc(YEAR),EVTYPE,Total_FATALITIES)

Select the Event Type with the most injuries for a specific year

dfMaxInjuries <- dfEvent %>% group_by(YEAR) %>% filter(Total_INJURIES == max(Total_INJURIES)) %>% arrange(desc(YEAR),EVTYPE,Total_INJURIES)

Calculating Total Property Damage for Each Event

The value of property damage are represented in the data is maintained in two fields PROPDMG and PROPDMGEXP. PROPDMGEXP is the exponent to be applied to the PROPDMG value. The expected values for the exponent are:
1. b, B = Billions
2. m, M = Millions
3. k, K = Thousands
4. h, H = Hundreds
However, there are additional values in the data:

unique(dfNOAA$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
  1. ?, -, +, {space} = 0 was used as the exponent. In most cases where these special characters appear, the PROPDMG value is 0, only in the 5 cases where the exponent is + are there no zero data in PROPDMG.

  2. 0 - 8: the numeric value was used as the exponent, e.g. 10^PROPDMGEXP

dfEventDamage <- dfNOAA %>% select(BGN_DATE, EVTYPE, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP) %>% 
        mutate(YEAR = year(as.Date(BGN_DATE, format="%m/%d/%Y %H:%M:%S")), Total_PropertyDamage = 0, Total_CropDamage = 0)

dfEventDamage[dfEventDamage$PROPDMGEXP %in% c('+', '?', '-', '', '0'), 8] <-    dfEventDamage$PROPDMG[dfEventDamage$PROPDMGEXP %in% c('+', '?', '-', '', '0')]


dfEventDamage[dfEventDamage$PROPDMGEXP %in% as.character(1:9), 8] <-    dfEventDamage$PROPDMG[dfEventDamage$PROPDMGEXP %in% as.character(1:9)] * 10 ^ as.integer(as.character(dfEventDamage$PROPDMGEXP[dfEventDamage$PROPDMGEXP %in% as.character(1:9)]))

dfEventDamage[dfEventDamage$PROPDMGEXP %in% c("B", "b"), 8] <-    dfEventDamage$PROPDMG[dfEventDamage$PROPDMGEXP %in% c('B', 'b')] * 10 ^ 9

dfEventDamage[dfEventDamage$PROPDMGEXP %in% c("M", "m"), 8] <-    dfEventDamage$PROPDMG[dfEventDamage$PROPDMGEXP %in% c('M', 'm')] * 10 ^ 6

dfEventDamage[dfEventDamage$PROPDMGEXP %in% c("K", "k"), 8] <-    dfEventDamage$PROPDMG[dfEventDamage$PROPDMGEXP %in% c('K', 'k')] * 10 ^ 3

dfEventDamage[dfEventDamage$PROPDMGEXP %in% c("H", "h"), 8] <-    dfEventDamage$PROPDMG[dfEventDamage$PROPDMGEXP %in% c('H', 'h')] * 10 ^ 2

Calcuating Total Crop Damage For Each Event

The value of crop damage are represented in the data is maintained in two fields CROPDMG and CROPDMGEXP. CROPDMGEXP is the exponent to be applied to the CROPDMG value. The expected values for the exponent are:
1. b, B = Billions
2. m, M = Millions
3. k, K = Thousands
4. h, H = Hundreds
However, there are additional values in the data:

unique(dfNOAA$CROPDMGEXP)
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M
  1. ?, -, +, {space} = 0 was used as the exponent.

  2. 0 - 8: the numeric value was used as the exponent, e.g. 10^PROPDMGEXP

dfEventDamage[dfEventDamage$CROPDMGEXP %in% c('+', '?', '-', '', '0'), 9] <-    dfEventDamage$CROPDMG[dfEventDamage$CROPDMGEXP %in% c('+', '?', '-', '', '0')]


dfEventDamage[dfEventDamage$CROPDMGEXP %in% as.character(1:9), 9] <-    dfEventDamage$CROPDMG[dfEventDamage$CROPDMGEXP %in% as.character(1:9)] * 10 ^ as.integer(as.character(dfEventDamage$CROPDMGEXP[dfEventDamage$CROPDMGEXP %in% as.character(1:9)]))

dfEventDamage[dfEventDamage$CROPDMGEXP %in% c("B", "b"), 9] <-    dfEventDamage$CROPDMG[dfEventDamage$CROPDMGEXP %in% c('B', 'b')] * 10 ^ 9

dfEventDamage[dfEventDamage$CROPDMGEXP %in% c("M", "m"), 9] <-    dfEventDamage$CROPDMG[dfEventDamage$CROPDMGEXP %in% c('M', 'm')] * 10 ^ 6

dfEventDamage[dfEventDamage$CROPDMGEXP %in% c("K", "k"), 9] <-    dfEventDamage$CROPDMG[dfEventDamage$CROPDMGEXP %in% c('K', 'k')] * 10 ^ 3

dfEventDamage[dfEventDamage$CROPDMGEXP %in% c("H", "h"), 9] <-    dfEventDamage$CROPDMG[dfEventDamage$CROPDMGEXP %in% c('H', 'h')] * 10 ^ 2

Summarize Damage totals by Year and Event Type

dfEventDamage <- dfEventDamage %>% select(YEAR, EVTYPE, Total_PropertyDamage, Total_CropDamage) %>% group_by(YEAR, EVTYPE) %>% summarize(sum(Total_PropertyDamage), sum(Total_CropDamage))

names(dfEventDamage)<- c("YEAR", "EVTYPE", "Total_PropertyDamage", 'Total_CropDamage')

Select the event type with the most Property damage for a specific year

dfMaxPropDamage <- dfEventDamage %>% group_by(YEAR) %>% filter( Total_PropertyDamage== max(Total_PropertyDamage) & Total_PropertyDamage > 0) %>% arrange(desc(YEAR),EVTYPE,Total_PropertyDamage)

Select the Event Type with the most Crop damage for a specific Year

dfMaxCropDamage <- dfEventDamage %>% group_by(YEAR) %>% filter( Total_CropDamage== max(Total_CropDamage) & Total_CropDamage > 0) %>% arrange(desc(YEAR),EVTYPE,Total_CropDamage)

Data Exploration

The Number of Event type classifications have changed significantly over the years

Summary of event type count by year:

  • For the years 1950 - 1954, there is only 1 event type - Tornados
  • For the years 1955 - 1992, there are 3 event types - Tornados, Hail, Tsunami Wind
  • For the years 1993 - 2002, there are between 99 and 387 event types
  • For the years 2003 - 2011, there are between 38 and 51 event types

Given the change in event classificaitons, it will be difficult to compare data across these 4 periods. For example, to more accurately compare across the last two periods of year listed above, some additional mapping of event types is necessary because the level of granularity in the classification for years 1993 - 2002 is much higher than for 2003 - 2011. For the purpose of this initial analysis, this mapping is NOT present.

plot(dfEventTypeCount$YEAR, dfEventTypeCount$NUM_EVTYPE, main="Event Classification Has Changed Significantly From 1950 to 2011", xlab="Year", ylab = "Number of Event Types")

# Create Year Ranges representing these periods
YearPeriod1 <- seq.int(1950, 1992, 1)
YearPeriod2 <- seq.int(1993, 2002, 1)
YearPeriod3 <- seq.int(2003, 2011, 1)

Fatalities:

For the most recent period 2003 - 2011

TORNADOS are the most fatal, followed by EXCESSIVE HEAT

kable(dfMaxFatalities[dfMaxFatalities$YEAR %in% YearPeriod3, c(1,2,3)])
YEAR EVTYPE Total_FATALITIES
2011 TORNADO 587
2010 FLASH FLOOD 67
2009 RIP CURRENT 39
2008 TORNADO 129
2007 TORNADO 81
2006 EXCESSIVE HEAT 205
2005 EXCESSIVE HEAT 158
2004 FLASH FLOOD 58
2003 FLASH FLOOD 69
dfSumFatalities3 <- dfMaxFatalities[dfMaxFatalities$YEAR %in% YearPeriod3, ] %>% group_by(EVTYPE) %>%  summarize(sum(Total_FATALITIES)) 
names(dfSumFatalities3) <- c("EVTYPE", "SumTotal_FATALITIES")
dfSumFatalities3 <- arrange(dfSumFatalities3, desc(SumTotal_FATALITIES))

kable(dfSumFatalities3)
EVTYPE SumTotal_FATALITIES
TORNADO 797
EXCESSIVE HEAT 363
FLASH FLOOD 194
RIP CURRENT 39

For the period 1993 - 2002

EXCESSIVE HEAT / HEAT are the most fatal, followed by TORNADOS It is necessary to aggregate Event Types in order to see all the Tornado events

kable(dfMaxFatalities[dfMaxFatalities$YEAR %in% YearPeriod2, c(1,2,3)])
YEAR EVTYPE Total_FATALITIES
2002 EXCESSIVE HEAT 167
2001 EXCESSIVE HEAT 165
2000 EXCESSIVE HEAT 157
1999 EXCESSIVE HEAT 500
1998 EXCESSIVE HEAT 168
1997 EXCESSIVE HEAT 80
1996 FLASH FLOOD 92
1995 HEAT 687
1994 LIGHTNING 65
1993 BLIZZARD 30
dfSumFatalities2 <- dfMaxFatalities[dfMaxFatalities$YEAR %in% YearPeriod2, ] %>% group_by(EVTYPE) %>%  summarize(sum(Total_FATALITIES)) 
names(dfSumFatalities2) <- c("EVTYPE", "SumTotal_FATALITIES")
dfSumFatalities2 <- arrange(dfSumFatalities2, desc(SumTotal_FATALITIES))

kable(dfSumFatalities2)
EVTYPE SumTotal_FATALITIES
EXCESSIVE HEAT 1237
HEAT 687
FLASH FLOOD 92
LIGHTNING 65
BLIZZARD 30
Tornados did not appear at the top, as a result of using a classificaiton system that has multi-event types for Tornado.
paste0("Total Tornado events for this period: ", as.character(sum(dfEvent$Total_FATALITIES[dfEvent$YEAR %in% YearPeriod2 & grepl("tornado", dfEvent$EVTYPE, ignore.case = TRUE)])))
## [1] "Total Tornado events for this period: 592"

For the period 1950 - 1992

TORNADOS are the most fatal, followed by TSUNAMI WIND in 1986; however, there are only 3 event types for this period, Tornados, Hail, and Tsunami Wind

kable(dfMaxFatalities[dfMaxFatalities$YEAR %in% YearPeriod1, c(1,2,3)])
YEAR EVTYPE Total_FATALITIES
1992 TORNADO 39
1991 TORNADO 39
1990 TORNADO 53
1989 TORNADO 50
1988 TORNADO 32
1987 TORNADO 59
1986 TSTM WIND 36
1985 TORNADO 94
1984 TORNADO 122
1983 TORNADO 34
1982 TORNADO 64
1981 TORNADO 24
1980 TORNADO 28
1979 TORNADO 84
1978 TORNADO 53
1977 TORNADO 43
1976 TORNADO 44
1975 TORNADO 60
1974 TORNADO 366
1973 TORNADO 89
1972 TORNADO 27
1971 TORNADO 159
1970 TORNADO 73
1969 TORNADO 66
1968 TORNADO 131
1967 TORNADO 114
1966 TORNADO 98
1965 TORNADO 301
1964 TORNADO 73
1963 TORNADO 31
1962 TORNADO 30
1961 TORNADO 52
1960 TORNADO 46
1959 TORNADO 58
1958 TORNADO 67
1957 TORNADO 193
1956 TORNADO 83
1955 TORNADO 129
1954 TORNADO 36
1953 TORNADO 519
1952 TORNADO 230
1951 TORNADO 34
1950 TORNADO 70
dfSumFatalities1 <- dfMaxFatalities[dfMaxFatalities$YEAR %in% YearPeriod1, ] %>% group_by(EVTYPE) %>%  summarize(sum(Total_FATALITIES)) 
names(dfSumFatalities1) <- c("EVTYPE", "SumTotal_FATALITIES")
dfSumFatalities1 <- arrange(dfSumFatalities1, desc(SumTotal_FATALITIES))

kable(dfSumFatalities1)
EVTYPE SumTotal_FATALITIES
TORNADO 3997
TSTM WIND 36

Injuries:

For the most recent period 2003 - 2011

TORNADOS are the most injurous, followed by EXCESSIVE HEAT, then by HURRICANE/TYPHOON

kable(dfMaxInjuries[dfMaxInjuries$YEAR %in% YearPeriod3, c(1,2,4)])
YEAR EVTYPE Total_INJURIES
2011 TORNADO 6163
2010 TORNADO 699
2009 TORNADO 397
2008 TORNADO 1690
2007 TORNADO 659
2006 EXCESSIVE HEAT 993
2005 TORNADO 537
2004 HURRICANE/TYPHOON 839
2003 TORNADO 1087
dfSumInjuries3 <- dfMaxInjuries[dfMaxInjuries$YEAR %in% YearPeriod3, ] %>% group_by(EVTYPE) %>%  summarize(sum(Total_INJURIES)) 
names(dfSumInjuries3) <- c("EVTYPE", "SumTotal_INJURIES")
dfSumInjuries3 <- arrange(dfSumInjuries3, desc(SumTotal_INJURIES))

kable(dfSumInjuries3)
EVTYPE SumTotal_INJURIES
TORNADO 11232
EXCESSIVE HEAT 993
HURRICANE/TYPHOON 839

For the period 1993 - 2002

TORNADOS are the most injurious, followed by FLOODS and by ICE STORMS

kable(dfMaxInjuries[dfMaxInjuries$YEAR %in% YearPeriod2, c(1,2,4)])
YEAR EVTYPE Total_INJURIES
2002 TORNADO 968
2001 TORNADO 743
2000 TORNADO 882
1999 TORNADO 1842
1998 FLOOD 6130
1997 TORNADO 1033
1996 TORNADO 705
1995 TORNADO 1098
1994 ICE STORM 1614
1993 TORNADO 739
dfSumInjuries2 <- dfMaxInjuries[dfMaxInjuries$YEAR %in% YearPeriod2, ] %>% group_by(EVTYPE) %>%  summarize(sum(Total_INJURIES)) 
names(dfSumInjuries2) <- c("EVTYPE", "SumTotal_INJURIES")
dfSumInjuries2 <- arrange(dfSumInjuries2, desc(SumTotal_INJURIES))

kable(dfSumInjuries2)
EVTYPE SumTotal_INJURIES
TORNADO 8010
FLOOD 6130
ICE STORM 1614
Tornados did appear at the top, but as a result of using a classificaiton system that has multi-event types for Tornado, not all injuries are represented above.

Total Fatalities for all Event types that contain “tornado” case insensitive

paste0("Total Tornado events for this period: ", as.character(sum(dfEvent$Total_INJURIES[dfEvent$YEAR %in% YearPeriod2 & grepl("tornado", dfEvent$EVTYPE, ignore.case = TRUE)])))
## [1] "Total Tornado events for this period: 10751"

For the period 1950 - 1992

TORNADOS are the most injurious; however, there are only 3 event types for this period, Tornados, Hail, and Tsunami Wind

kable(dfMaxInjuries[dfMaxInjuries$YEAR %in% YearPeriod1, c(1,2,4)])
YEAR EVTYPE Total_INJURIES
1992 TORNADO 1323
1991 TORNADO 864
1990 TORNADO 1177
1989 TORNADO 1270
1988 TORNADO 688
1987 TORNADO 1018
1986 TORNADO 536
1985 TORNADO 1299
1984 TORNADO 2499
1983 TORNADO 756
1982 TORNADO 1276
1981 TORNADO 798
1980 TORNADO 1157
1979 TORNADO 3014
1978 TORNADO 919
1977 TORNADO 771
1976 TORNADO 1195
1975 TORNADO 1457
1974 TORNADO 6824
1973 TORNADO 2406
1972 TORNADO 976
1971 TORNADO 2723
1970 TORNADO 1355
1969 TORNADO 1311
1968 TORNADO 2522
1967 TORNADO 2144
1966 TORNADO 2030
1965 TORNADO 5197
1964 TORNADO 1148
1963 TORNADO 538
1962 TORNADO 551
1961 TORNADO 1087
1960 TORNADO 737
1959 TORNADO 734
1958 TORNADO 535
1957 TORNADO 1976
1956 TORNADO 1355
1955 TORNADO 926
1954 TORNADO 715
1953 TORNADO 5131
1952 TORNADO 1915
1951 TORNADO 524
1950 TORNADO 659
dfSumInjuries1 <- dfMaxInjuries[dfMaxInjuries$YEAR %in% YearPeriod1, ] %>% group_by(EVTYPE) %>%  summarize(sum(Total_INJURIES)) 
names(dfSumInjuries1) <- c("EVTYPE", "SumTotal_INJURIES")
dfSumInjuries1 <- arrange(dfSumInjuries1, desc(SumTotal_INJURIES))

kable(dfSumInjuries1)
EVTYPE SumTotal_INJURIES
TORNADO 68036

Property Damage:

For the most recent period 2003 - 2011

FLOODING is the most costly, followed by HURRICANE/TYPHOON

kable(dfMaxPropDamage[dfMaxPropDamage$YEAR %in% YearPeriod3, c(1,2,3)], format.args = list(big.mark = ','))
YEAR EVTYPE Total_PropertyDamage
2,011 TORNADO 9,819,600,700
2,010 HAIL 3,404,109,950
2,009 HAIL 1,440,000,550
2,008 STORM SURGE/TIDE 4,595,035,000
2,007 TORNADO 1,400,756,650
2,006 FLOOD 116,516,105,440
2,005 HURRICANE/TYPHOON 49,786,635,000
2,004 HURRICANE/TYPHOON 18,356,435,000
2,003 WILDFIRE 2,331,537,330
dfSumPropDamage3 <- dfMaxPropDamage[dfMaxPropDamage$YEAR %in% YearPeriod3, ] %>% group_by(EVTYPE) %>%  summarize(sum(Total_PropertyDamage)) 
names(dfSumPropDamage3) <- c("EVTYPE", "SumTotal_PROPERTYDAMAGE")
dfSumPropDamage3 <- arrange(dfSumPropDamage3, desc(SumTotal_PROPERTYDAMAGE))

kable(dfSumPropDamage3, format.args = list(big.mark = ','))
EVTYPE SumTotal_PROPERTYDAMAGE
FLOOD 116,516,105,440
HURRICANE/TYPHOON 68,143,070,000
TORNADO 11,220,357,350
HAIL 4,844,110,500
STORM SURGE/TIDE 4,595,035,000
WILDFIRE 2,331,537,330

For the period 1993 - 2002

Flooding is the most costly, followed by Storms then by Hurricanes It is necesary to aggregate similar EVTYPE descriptions in order to reach this conclusion…

kable(dfMaxPropDamage[dfMaxPropDamage$YEAR %in% YearPeriod2, c(1,2,3)], format.args = list(big.mark = ','))
YEAR EVTYPE Total_PropertyDamage
2,002 TORNADO 801,254,200
2,001 TROPICAL STORM 5,185,910,000
2,000 WILD/FOREST FIRE 2,109,242,200
1,999 HURRICANE 4,162,191,000
1,998 HURRICANE 3,159,132,000
1,997 FLOOD 6,124,621,200
1,996 HURRICANE 1,382,190,000
1,995 HURRICANE OPAL 3,172,846,000
1,994 TORNADO 694,648,856
1,993 WINTER STORM 5,128,400,000
dfSumPropDamage2 <- dfMaxPropDamage[dfMaxPropDamage$YEAR %in% YearPeriod2, ] %>% group_by(EVTYPE) %>%  summarize(sum(Total_PropertyDamage)) 
names(dfSumPropDamage2) <- c("EVTYPE", "SumTotal_PROPERTYDAMAGE")
dfSumPropDamage2 <- arrange(dfSumPropDamage2, desc(SumTotal_PROPERTYDAMAGE))

kable(dfSumPropDamage2, format.args = list(big.mark = ','))
EVTYPE SumTotal_PROPERTYDAMAGE
HURRICANE 8,703,513,000
FLOOD 6,124,621,200
TROPICAL STORM 5,185,910,000
WINTER STORM 5,128,400,000
HURRICANE OPAL 3,172,846,000
WILD/FOREST FIRE 2,109,242,200
TORNADO 1,495,903,056
Hurricanes and Storms appear on the list twice as a result of using a classification system that has multi-event types for Hurricane. In addition, damage from Flooding and Tornados are also under represented in the above totals.

Total Property Damage for all Event types that contain “hurricane” case insensitive

paste0("Total property damage for Flooding events for this period: ", as.character(format(sum(dfEventDamage$Total_PropertyDamage[dfEventDamage$YEAR %in% YearPeriod2 & grepl("flood", dfEvent$EVTYPE, ignore.case = TRUE)]), big.mark = ',' )))
## [1] "Total property damage for Flooding events for this period: 24,543,692,335"
paste0("Total property damage for Storm events for this period: ", as.character(format(sum(dfEventDamage$Total_PropertyDamage[dfEventDamage$YEAR %in% YearPeriod2 & grepl("storm", dfEvent$EVTYPE, ignore.case = TRUE)]), big.mark = ',' )))
## [1] "Total property damage for Storm events for this period: 17,683,357,176"
paste0("Total property damage for Hurricane events for this period: ", as.character(format(sum(dfEventDamage$Total_PropertyDamage[dfEventDamage$YEAR %in% YearPeriod2 & grepl("hurricane", dfEvent$EVTYPE, ignore.case = TRUE)]), big.mark = ',')))
## [1] "Total property damage for Hurricane events for this period: 13,157,740,010"
paste0("Total property damage for Tornado events for this period: ", as.character(format(sum(dfEventDamage$Total_PropertyDamage[dfEventDamage$YEAR %in% YearPeriod2 & grepl("tornado", dfEvent$EVTYPE, ignore.case = TRUE)]), big.mark = ',' )))
## [1] "Total property damage for Tornado events for this period: 10,399,450,897"

For the period 1950 - 1992

Tornados are the most costly; however, there are only 3 event types classified for these years.

kable(dfMaxPropDamage[dfMaxPropDamage$YEAR %in% YearPeriod1, c(1,2,3)], format.args = list(big.mark = ","))
YEAR EVTYPE Total_PropertyDamage
1,992 TORNADO 1,406,688,900
1,991 TORNADO 1,152,296,430
1,990 TORNADO 1,560,568,070
1,989 TORNADO 1,760,051,050
1,988 TORNADO 1,124,214,650
1,987 TORNADO 407,837,950
1,986 TORNADO 1,034,912,340
1,985 TORNADO 1,608,097,210
1,984 TORNADO 2,131,171,390
1,983 TORNADO 765,855,590
1,982 TORNADO 1,429,218,000
1,981 TORNADO 765,888,310
1,980 TORNADO 2,152,814,350
1,979 TORNADO 946,241,240
1,978 TORNADO 616,068,160
1,977 TORNADO 342,024,230
1,976 TORNADO 308,497,190
1,975 TORNADO 826,723,790
1,974 TORNADO 1,943,517,780
1,973 TORNADO 2,063,066,560
1,972 TORNADO 151,172,570
1,971 TORNADO 202,120,550
1,970 TORNADO 625,205,980
1,969 TORNADO 113,274,230
1,968 TORNADO 210,079,910
1,967 TORNADO 577,019,220
1,966 TORNADO 476,629,360
1,965 TORNADO 1,762,499,040
1,964 TORNADO 165,990,820
1,963 TORNADO 87,230,580
1,962 TORNADO 67,200,730
1,961 TORNADO 179,388,730
1,960 TORNADO 118,224,240
1,959 TORNADO 87,453,040
1,958 TORNADO 128,994,610
1,957 TORNADO 224,388,890
1,956 TORNADO 116,912,350
1,955 TORNADO 82,660,630
1,954 TORNADO 85,805,320
1,953 TORNADO 596,104,700
1,952 TORNADO 94,102,240
1,951 TORNADO 65,505,990
1,950 TORNADO 34,481,650
dfSumPropDamage1 <- dfMaxPropDamage[dfMaxPropDamage$YEAR %in% YearPeriod1, ] %>% group_by(EVTYPE) %>%  summarize(sum(Total_PropertyDamage)) 
names(dfSumPropDamage1) <- c("EVTYPE", "SumTotal_PROPERTYDAMAGE")
dfSumPropDamage1 <- arrange(dfSumPropDamage1, desc(SumTotal_PROPERTYDAMAGE))

kable(dfSumPropDamage1, format.args = list(big.mark = ","))
EVTYPE SumTotal_PROPERTYDAMAGE
TORNADO 30,598,198,570

Crop Damage:

For the most recent period 2003 - 2011

DROUGHT, FLOOD, HURRICANE/TYPHOON are the most costly.

kable(dfMaxCropDamage[dfMaxCropDamage$YEAR %in% YearPeriod3, c(1,2,4)], format.args = list(big.mark = ','))
YEAR EVTYPE Total_CropDamage
2,011 FLOOD 154,872,000
2,010 FLOOD 1,104,709,000
2,009 HAIL 350,994,000
2,008 FLOOD 1,078,292,000
2,007 FLOOD 518,507,000
2,006 DROUGHT 2,375,825,000
2,005 HURRICANE/TYPHOON 2,012,682,000
2,004 HURRICANE/TYPHOON 565,820,800
2,003 DROUGHT 572,480,000
dfSumCropDamage3 <- dfMaxCropDamage[dfMaxCropDamage$YEAR %in% YearPeriod3, ] %>% group_by(EVTYPE) %>%  summarize(sum(Total_CropDamage)) 
names(dfSumCropDamage3) <- c("EVTYPE", "SumTotal_CROPDAMAGE")
dfSumCropDamage3 <- arrange(dfSumCropDamage3, desc(SumTotal_CROPDAMAGE))

kable(dfSumCropDamage3, format.args = list(big.mark = ','))
EVTYPE SumTotal_CROPDAMAGE
DROUGHT 2,948,305,000
FLOOD 2,856,380,000
HURRICANE/TYPHOON 2,578,502,800
HAIL 350,994,000

For the period 1993 - 2002

FLOODING and DROUGHT are the most costly, followed by ICE STORMS It is necesary to aggregate similar EVTYPE descriptions in order to reach this conclusion…

kable(dfMaxCropDamage[dfMaxCropDamage$YEAR %in% YearPeriod2, c(1,2,4)], format.args = list(big.mark = ','))
YEAR EVTYPE Total_CropDamage
2,002 DROUGHT 737,634,000
2,001 DROUGHT 1,273,860,000
2,000 DROUGHT 2,438,100,000
1,999 HURRICANE 1,378,875,000
1,998 DROUGHT 2,141,971,000
1,997 DROUGHT 252,990,000
1,996 DROUGHT 504,080,000
1,995 DROUGHT 554,500,000
1,994 ICE STORM 5,006,453,500
1,993 RIVER FLOOD 5,005,050,000
dfSumCropDamage2 <- dfMaxCropDamage[dfMaxCropDamage$YEAR %in% YearPeriod2, ] %>% group_by(EVTYPE) %>%  summarize(sum(Total_CropDamage)) 
names(dfSumCropDamage2) <- c("EVTYPE", "SumTotal_CROPDAMAGE")
dfSumCropDamage2 <- arrange(dfSumCropDamage2, desc(SumTotal_CROPDAMAGE))

kable(dfSumCropDamage2, format.args = list(big.mark = ','))
EVTYPE SumTotal_CROPDAMAGE
DROUGHT 7,903,135,000
ICE STORM 5,006,453,500
RIVER FLOOD 5,005,050,000
HURRICANE 1,378,875,000
Damage from flooding and hurricanes are unders represented in the above totals, as a result of using a classification system that has multi-event types for Hurricane and Flood
paste0("Total crop damage for Flooding events for this period: ", as.character(format(sum(dfEventDamage$Total_CropDamage[dfEventDamage$YEAR %in% YearPeriod2 & grepl("flood", dfEvent$EVTYPE, ignore.case = TRUE)]), big.mark = ',' )))
## [1] "Total crop damage for Flooding events for this period: 8,058,187,200"
paste0("Total crop damage for Hurricane events for this period: ", as.character(format(sum(dfEventDamage$Total_CropDamage[dfEventDamage$YEAR %in% YearPeriod2 & grepl("hurricane", dfEvent$EVTYPE, ignore.case = TRUE)]), big.mark = ',')))
## [1] "Total crop damage for Hurricane events for this period: 2,726,910,000"

For the period 1950 - 1992 - No Crop Damage data is present in the dataset

Results

Which Events are the Most Harmful with Respect to Population Health?

With respect to Fatalities per year, TORNADOS and EXCESSIVE HEAT are the most harmful. With Tornados being the most harmful in recent years.

With respect to Injuries per year, TORNADOS are consistently the most harmful across all years.

lstYearBeginRange <- c(range(YearPeriod3)[1], range(YearPeriod3)[1], range(YearPeriod2)[1], range(YearPeriod2)[1], range(YearPeriod1)[1])
lstYearEndRange <- c(range(YearPeriod3)[2], range(YearPeriod3)[2], range(YearPeriod2)[2], range(YearPeriod2)[2], range(YearPeriod1)[2])

lstFatalEvents <- c(as.character(dfSumFatalities3$EVTYPE[1]), as.character(dfSumFatalities3$EVTYPE[2]), as.character(dfSumFatalities2$EVTYPE[1]), "TORNADO", as.character(dfSumFatalities1$EVTYPE[1]))

lstFatalities <-  c(dfSumFatalities3$SumTotal_FATALITIES[1]/length(YearPeriod3), dfSumFatalities3$SumTotal_FATALITIES[2]/length(YearPeriod3), dfSumFatalities2$SumTotal_FATALITIES[1]/length(YearPeriod2), sum(dfEvent$Total_FATALITIES[dfEvent$YEAR %in% YearPeriod2 & grepl("tornado", dfEvent$EVTYPE, ignore.case = TRUE)]) / length(YearPeriod2), dfSumFatalities1$SumTotal_FATALITIES[1]/length(YearPeriod1))

dfFatalities <- data.frame(lstYearBeginRange, lstYearEndRange, lstFatalEvents, lstFatalities)
names(dfFatalities) <- c('Period Start', 'Period End', 'Event Type', 'Fatalities Per Year')
kable(dfFatalities, format.args = list(digits=0), caption = "Tornados and Excessive Heat Result in the Most Fatalities Per Year")
Tornados and Excessive Heat Result in the Most Fatalities Per Year
Period Start Period End Event Type Fatalities Per Year
2003 2011 TORNADO 89
2003 2011 EXCESSIVE HEAT 40
1993 2002 EXCESSIVE HEAT 124
1993 2002 TORNADO 59
1950 1992 TORNADO 93
lstInjuryEvents <- c(as.character(dfSumInjuries3$EVTYPE[1]), as.character(dfSumInjuries3$EVTYPE[2]), as.character(dfSumInjuries2$EVTYPE[1]), as.character(dfSumInjuries2$EVTYPE[2]), as.character(dfSumInjuries1$EVTYPE[1]))

lstInjuries <-  c(dfSumInjuries3$SumTotal_INJURIES[1]/length(YearPeriod3), dfSumInjuries3$SumTotal_INJURIES[2]/length(YearPeriod3), sum(dfEvent$Total_INJURIES[dfEvent$YEAR %in% YearPeriod2 & grepl("tornado", dfEvent$EVTYPE, ignore.case = TRUE)]) / length(YearPeriod2), dfSumInjuries2$SumTotal_INJURIES[2]/length(YearPeriod2),  dfSumInjuries1$SumTotal_INJURIES[1]/length(YearPeriod1))

dfInjuries <- data.frame(lstYearBeginRange, lstYearEndRange, lstInjuryEvents, lstInjuries)
names(dfInjuries) <- c('Period Start', 'Period End', 'Event Type', 'Injuries Per Year')
kable(dfInjuries, format.args = list(digits=0), caption = "Tornados Result in the Most Injuries Per Year")
Tornados Result in the Most Injuries Per Year
Period Start Period End Event Type Injuries Per Year
2003 2011 TORNADO 1248
2003 2011 EXCESSIVE HEAT 110
1993 2002 TORNADO 1075
1993 2002 FLOOD 613
1950 1992 TORNADO 1582

Which Events have the greatest Economic consequences?

With respect to Property Damage per year, FLOODING is the most costly, followed by HURRICANES/TYPHOONS and Storms - across multiple varieties.

With respect to Crop Damage per year, FLOODING and DROUGHT are the most costly.

lstYearBeginRange <- c(range(YearPeriod3)[1], range(YearPeriod3)[1], range(YearPeriod2)[1], range(YearPeriod2)[1], range(YearPeriod2)[1], range(YearPeriod1)[1])

lstYearEndRange <- c(range(YearPeriod3)[2], range(YearPeriod3)[2], range(YearPeriod2)[2], range(YearPeriod2)[2], range(YearPeriod2)[2], range(YearPeriod1)[2])

lstPropDamageEvents <- c(as.character(dfSumPropDamage3$EVTYPE[1]), as.character(dfSumPropDamage3$EVTYPE[2]), "FLOOD", "STORM", "HURRICANE", as.character(dfSumPropDamage1$EVTYPE[1]))

lstPropDamage <-  c(dfSumPropDamage3$SumTotal_PROPERTYDAMAGE[1]/length(YearPeriod3), dfSumPropDamage3$SumTotal_PROPERTYDAMAGE[2]/length(YearPeriod3), sum(dfEventDamage$Total_PropertyDamage[dfEvent$YEAR %in% YearPeriod2 & grepl("flood", dfEvent$EVTYPE, ignore.case = TRUE)])/length(YearPeriod2), sum(dfEventDamage$Total_PropertyDamage[dfEvent$YEAR %in% YearPeriod2 & grepl("storm", dfEvent$EVTYPE, ignore.case = TRUE)]) / length(YearPeriod2), sum(dfEventDamage$Total_PropertyDamage[dfEvent$YEAR %in% YearPeriod2 & grepl("hurricane", dfEvent$EVTYPE, ignore.case = TRUE)]) / length(YearPeriod2), dfSumPropDamage1$SumTotal_PROPERTYDAMAGE[1]/length(YearPeriod1))

dfPropDamage <- data.frame(lstYearBeginRange, lstYearEndRange, lstPropDamageEvents, lstPropDamage)
names(dfPropDamage) <- c('Period Start', 'Period End', 'Event Type', 'Property Damage Per Year')
kable(dfPropDamage, format.args = list(digits=0), caption = "Flooding Results in the Most Property Damanage Per Year")
Flooding Results in the Most Property Damanage Per Year
Period Start Period End Event Type Property Damage Per Year
2003 2011 FLOOD 1.294623e+10
2003 2011 HURRICANE/TYPHOON 7.571452e+09
1993 2002 FLOOD 2.454369e+09
1993 2002 STORM 1.768336e+09
1993 2002 HURRICANE 1.315774e+09
1950 1992 TORNADO 7.115860e+08
# Crop Damage Table of Results
lstYearBeginRange <- c(range(YearPeriod3)[1], range(YearPeriod3)[1], range(YearPeriod2)[1], range(YearPeriod2)[1])

lstYearEndRange <- c(range(YearPeriod3)[2], range(YearPeriod3)[2], range(YearPeriod2)[2], range(YearPeriod2)[2])

lstCropDamageEvents <- c(as.character(dfSumCropDamage3$EVTYPE[1]), as.character(dfSumCropDamage3$EVTYPE[2]), "FLOOD", "DROUGHT")

lstCropDamage <-  c(dfSumCropDamage3$SumTotal_CROPDAMAGE[1]/length(YearPeriod3), dfSumCropDamage3$SumTotal_CROPDAMAGE[2]/length(YearPeriod3), sum(dfEventDamage$Total_CropDamage[dfEvent$YEAR %in% YearPeriod2 & grepl("flood", dfEvent$EVTYPE, ignore.case = TRUE)])/length(YearPeriod2), sum(dfEventDamage$Total_CropDamage[dfEvent$YEAR %in% YearPeriod2 & grepl("drought", dfEvent$EVTYPE, ignore.case = TRUE)]) / length(YearPeriod2))

dfCropDamage <- data.frame(lstYearBeginRange, lstYearEndRange, lstCropDamageEvents, lstCropDamage)
names(dfCropDamage) <- c('Period Start', 'Period End', 'Event Type', 'Crop Damage Per Year')
kable(dfCropDamage, format.args = list(digits=0), caption = "Flooding and Drought Result in the Most Crop Damage Per Year")
Flooding and Drought Result in the Most Crop Damage Per Year
Period Start Period End Event Type Crop Damage Per Year
2003 2011 DROUGHT 3.275894e+08
2003 2011 FLOOD 3.173756e+08
1993 2002 FLOOD 8.058187e+08
1993 2002 DROUGHT 9.286631e+08