title: “Study of Casualties and Economic Consequenses Caused by Different Weather Event Types” output: html_document —

Of Different Weather Event Types, Hurricane has the Highest Economic Consequences, Amount of Casulties is Highest in Tsunami.

Synopsis

The U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database tracks characteristics of major storms and weather events in the United States. Database includes estimates of any fatalities, injuries, and property damage caused by the weather events. In this study we try to establish which type of weather event is most harmful to public health and which type of weather event has greatest economic consequences for communities and municipalities. Analysis of the storm database showed that tsunami is a weather event that is the most harmful to public health following with excessive heat and hurricane. On average eight persons are either killed or injured in a tsunami. Hurricanes (followed with tropical storm and tsunami) have the greates economic consequenses. On average, compined economic losses in property damages and crop damages due to a hurricane, is over 300.000.000 $.

Loading and processing the raw data

library(R.utils)
## Warning: package 'R.utils' was built under R version 3.3.3
library(dplyr)
library(ggplot2)
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url, "Stormdata.csv.bz2")
bunzip2("Stormdata.csv.bz2", "Stormdata.csv", remove = FALSE, skip = TRUE)
## [1] "Stormdata.csv"
## attr(,"temporary")
## [1] FALSE
data <- read.csv("Stormdata.csv", header = TRUE, stringsAsFactors = FALSE, na.strings = "")

First the variables needed in the analysis is selected from the full data. The U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database reports 48 different weather event types. Evtype column in data have over 900 different weather event types. Weather event types in the data are narrowed to 48 or less.

# select variables needed in the analysis
data_sel <- data %>% select(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

names(data_sel) <- tolower(names(data_sel))
data_sel$evtype <- tolower(data_sel$evtype)
data_sel$evtype <- str_trim(data_sel$evtype)

# remove words like unseasonable, unusually and record from evtypes.
data_sel$evtype <- gsub("^un.*? ", "", data_sel$evtype)
data_sel$evtype <- gsub("^record.*? ", "", data_sel$evtype)

# HIGH WIND
data_sel[grep("^high.*wind", data_sel$evtype), 1] <- c("high wind")
data_sel[grep("^gusty.* wind.*", data_sel$evtype), 1] <- c("high wind")
data_sel[grep("^wind.*st.*", data_sel$evtype), 1] <- c("high wind")

data_evtypes <- filter(data_sel, evtype == "high wind")
data_sel <- filter(data_sel, evtype != "high wind")

# HURRICANE
data_sel[grep("^hurricane", data_sel$evtype), 1] <- c("hurricane")
data_sel[grep("^typh", data_sel$evtype), 1] <- c("hurricane")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "hurricane"))
data_sel <- filter(data_sel, evtype != "hurricane")

# THUNDERSTORM
data_sel[grep("^thund", data_sel$evtype), 1] <- c("thunderstorm")
data_sel[grep("^thuderstorm", data_sel$evtype), 1] <- c("thunderstorm")
data_sel[grep("^tstm", data_sel$evtype), 1] <- c("thunderstorm")
data_sel[grep("^thunerstorm", data_sel$evtype), 1] <- c("thunderstorm")
data_sel[grep("^tunderstorm", data_sel$evtype), 1] <- c("thunderstorm")
data_sel[grep("^severe thun", data_sel$evtype), 1] <- c("thunderstorm")
data_sel[grep(".*microburst.*", data_sel$evtype), 1] <- c("thunderstorm")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "thunderstorm"))
data_sel <- filter(data_sel, evtype != "thunderstorm")

# LIGHTNING
data_sel[grep("^lightning", data_sel$evtype), 1] <- c("lightning")
data_sel[grep("^lighting", data_sel$evtype), 1] <- c("lightning")
data_sel[grep("^ligntning", data_sel$evtype), 1] <- c("lightning")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "lightning"))
data_sel <- filter(data_sel, evtype != "lightning")

# TORNADO
data_sel[grep("^tornado", data_sel$evtype), 1] <- c("tornado")
data_sel[grep("torndao", data_sel$evtype), 1] <- c("tornado")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "tornado"))
data_sel <- filter(data_sel, evtype != "tornado")

# HAIL
data_sel[grep(".*hail", data_sel$evtype), 1] <- c("hail")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "hail"))
data_sel <- filter(data_sel, evtype != "hail")

# HEAVY SNOW
data_sel[grep(".*heavy snow.*", data_sel$evtype), 1] <- c("heavy snow")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "heavy snow"))
data_sel <- filter(data_sel, evtype != "heavy snow")

# HEAVY RAIN
data_sel[grep(".*heavy rain.*", data_sel$evtype), 1] <- c("heavy rain")
data_sel[grep("^heavy show", data_sel$evtype), 1] <- c("heavy rain")
data_sel[grep(".*precip.*", data_sel$evtype), 1] <- c("heavy rain")
data_sel[grep("^ex.*rain.*", data_sel$evtype), 1] <- c("heavy rain")
data_sel[grep("^hvy rain", data_sel$evtype), 1] <- c("heavy rain")
data_sel[grep("rainfall", data_sel$evtype), 1] <- c("heavy rain")
data_sel[grep("rainstorm", data_sel$evtype), 1] <- c("heavy rain")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "heavy rain"))
data_sel <- filter(data_sel, evtype != "heavy rain")

#BLIZZARD
data_sel[grep(".*blizzard", data_sel$evtype), 1] <- c("blizzard")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "blizzard"))
data_sel <- filter(data_sel, evtype != "blizzard")

# FLAHS FLOOD
data_sel[grep(".*flash", data_sel$evtype), 1] <- c("flash flood")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "flash flood"))
data_sel <- filter(data_sel, evtype != "flash flood")

# FLOOD
data_sel[grep("^flood", data_sel$evtype), 1] <- c("flood")
data_sel[grep("^urban", data_sel$evtype), 1] <- c("flood")
data_sel[grep(".*stre.*flood.*", data_sel$evtype), 1] <- c("flood")
data_sel[grep("^r.*flood.*", data_sel$evtype), 1] <- c("flood")
data_sel[grep("^minor flood.*", data_sel$evtype), 1] <- c("flood")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "flood"))
data_sel <- filter(data_sel, evtype != "flood")

# COASTAL FLOOD
data_sel[grep("^c.*flood", data_sel$evtype), 1] <- c("coastal flood")
data_sel[grep(".*high tide", data_sel$evtype), 1] <- c("coastal flood")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "coastal flood"))
data_sel <- filter(data_sel, evtype != "coastal flood")

# ASTRONOMICAL LOW TIDE
data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "astronomical low tide"))
data_sel <- filter(data_sel, evtype != "astronomical low tide")

# HEAT
data_sel[grep("^heat", data_sel$evtype), 1] <- c("heat")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "heat"))
data_sel <- filter(data_sel, evtype != "heat")

# TROPICAL STORM
data_sel[grep("^tropical storm", data_sel$evtype), 1] <- c("tropical storm")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "tropical storm"))
data_sel <- filter(data_sel, evtype != "tropical storm")

# TROPICAL DEPRESSION
data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "tropical depression"))
data_sel <- filter(data_sel, evtype != "tropical depression")

# TSUNAMI
data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "tsunami"))
data_sel <- filter(data_sel, evtype != "tsunami")

# DUST STORM
data_sel[grep("^dust.*storm", data_sel$evtype), 1] <- c("dust storm")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "dust storm"))
data_sel <- filter(data_sel, evtype != "dust storm")

# WINTER STORM
data_sel[grep("^winter storm", data_sel$evtype), 1] <- c("winter storm")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "winter storm"))
data_sel <- filter(data_sel, evtype != "winter storm")

# WILDFIRE
data_sel[grep("^wild", data_sel$evtype), 1] <- c("wildfire")
data_sel[grep(".*fire.*", data_sel$evtype), 1] <- c("wildfire")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "wildfire"))
data_sel <- filter(data_sel, evtype != "wildfire")

# WATERSPOUT
data_sel[grep("^water.*sp", data_sel$evtype), 1] <- c("waterspout")
data_sel[grep("^waytersp", data_sel$evtype), 1] <- c("waterspout")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "waterspout"))
data_sel <- filter(data_sel, evtype != "waterspout")

# FUNNEL CLOUD
data_sel[grep(".*funnel.*", data_sel$evtype), 1] <- c("funnel cloud")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "funnel cloud"))
data_sel <- filter(data_sel, evtype != "funnel cloud")

# RIP CURRENT
data_sel[grep("^rip", data_sel$evtype), 1] <- c("rip current")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "rip current"))
data_sel <- filter(data_sel, evtype != "rip current")

# FROST/FREEZE
data_sel[grep(".*frost", data_sel$evtype), 1] <- c("frost/freeze")
data_sel[grep(".*freez", data_sel$evtype), 1] <- c("frost/freeze")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "frost/freeze"))
data_sel <- filter(data_sel, evtype != "frost/freeze")

# SLEET
data_sel[grep(".*sleet.*", data_sel$evtype), 1] <- c("sleet")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "sleet"))
data_sel <- filter(data_sel, evtype != "sleet")

# LAKE-EFFECT SNOW
data_sel[grep("^lake.effect", data_sel$evtype), 1] <- c("lake-effect snow")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "lake-effect snow"))
data_sel <- filter(data_sel, evtype != "lake-effect snow")

# DUST DEVIL
data_sel[grep("^dust dev", data_sel$evtype), 1] <- c("dust devil")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "dust devil"))
data_sel <- filter(data_sel, evtype != "dust devil")

# ICE STORM
data_sel[grep("^ice storm", data_sel$evtype), 1] <- c("ice storm")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "ice storm"))
data_sel <- filter(data_sel, evtype != "ice storm")

# HIGH SURF
data_sel[grep(".*surf.*", data_sel$evtype), 1] <- c("high surf")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "high surf"))
data_sel <- filter(data_sel, evtype != "high surf")

# EXCESSIVE HEAT
data_sel[grep("^ex.*heat.*", data_sel$evtype), 1] <- c("excessive heat")
data_sel[grep("^ex.*warm.*", data_sel$evtype), 1] <- c("excessive heat")
data_sel[grep("^high temp.*", data_sel$evtype), 1] <- c("excessive heat")
data_sel[grep("^hot", data_sel$evtype), 1] <- c("excessive heat")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "excessive heat"))
data_sel <- filter(data_sel, evtype != "excessive heat")

# DROUGHT
data_sel[grep(".*dry.*", data_sel$evtype), 1] <- c("drought")
data_sel[grep("^drought", data_sel$evtype), 1] <- c("drought")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "drought"))
data_sel <- filter(data_sel, evtype != "drought")

# HIGH WIND
data_sel[grep("^strong wind", data_sel$evtype), 1] <- c("strong wind")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "strong wind"))
data_sel <- filter(data_sel, evtype != "strong wind")

# EXTREME COLD/WIND CHILL
data_sel[grep(".*cold.*", data_sel$evtype), 1] <- c("extreme cold/wind chill")
data_sel[grep(".*cool.*", data_sel$evtype), 1] <- c("extreme cold/wind chill")
data_sel[grep("^ex.*wind.*", data_sel$evtype), 1] <- c("extreme cold/wind chill")
data_sel[grep(".*wind ch.*", data_sel$evtype), 1] <- c("extreme cold/wind chill")
data_sel[grep("^low temp.*", data_sel$evtype), 1] <- c("extreme cold/wind chill")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "extreme cold/wind chill"))
data_sel <- filter(data_sel, evtype != "extreme cold/wind chill")

# VOLCANIC ASH
data_sel[grep("^volcanic ash", data_sel$evtype), 1] <- c("volcanic ash")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "volcanic ash"))
data_sel <- filter(data_sel, evtype != "volcanic ash")

# DENSE FOG
data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "dense fog"))
data_sel <- filter(data_sel, evtype != "dense fog")

# DENSE SMOKE
data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "dense smoke"))
data_sel <- filter(data_sel, evtype != "dense smoke")

# WINTER WEATHER
data_sel[grep("^winter weat", data_sel$evtype), 1] <- c("winter weather")
data_sel[grep(".*snowfall", data_sel$evtype), 1] <- c("winter weather")
data_sel[grep("^snow.*s", data_sel$evtype), 1] <- c("winter weather")
data_sel[grep("^ex.*snow", data_sel$evtype), 1] <- c("winter weather")
data_sel[grep("^wint", data_sel$evtype), 1] <- c("winter weather")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "winter weather"))
data_sel <- filter(data_sel, evtype != "winter weather")

# MARINE HIGH WIND
data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "marine high wind"))
data_sel <- filter(data_sel, evtype != "marine high wind")

# MARINE STRONG WIND
data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "marine strong wind"))
data_sel <- filter(data_sel, evtype != "marine strong wind")

# MARINE THUNDERSTORM WIND
data_sel[grep("^marine tstm wind", data_sel$evtype), 1] <- c("marine thunderstorm wind")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "marine thunderstorm wind"))
data_sel <- filter(data_sel, evtype != "marine thunderstorm wind")

# AVALANCE
data_sel[grep("^aval", data_sel$evtype), 1] <- c("avalanche")

data_evtypes <- rbind(data_evtypes, filter(data_sel, evtype == "avalanche"))
data_sel <- filter(data_sel, evtype != "avalanche")

Letters and integers in columns propdmgexp and crpdmgexp expresses exponentials of the values in propdmg and cropdmg columns. Values in propdmg and cropdmg columns are changed to non-exponential values in order for better comparison of the values.

# function to change propdmg values to non-exponential values
# +, -, ? are ignored
exponent_prop <- function(x) {
  ifelse (x$propdmgexp %in% c("H", "h"), x$propdmg * 100,
    ifelse (x$propdmgexp %in% c("K", "k"), x$propdmg * 1000,
    ifelse (x$propdmgexp %in% c("M", "m"), x$propdmg * 1000000,
    ifelse (x$propdmgexp %in% c("B", "b"), x$propdmg * 1000000000, 
    ifelse (x$propdmgexp %in% (0:8), x$propdmg * 10, x$propdmg)))))
}


# function to change cropdmg values to non-exponential values
# +, -, ? are ignored
exponent_crop <- function(x) {
  ifelse (x$cropdmgexp %in% c("H", "h"), x$cropdmg * 100,
    ifelse (x$cropdmgexp %in% c("K", "k"), x$cropdmg * 1000,
    ifelse (x$cropdmgexp %in% c("M", "m"), x$cropdmg * 1000000,
    ifelse (x$cropdmgexp %in% c("B", "b"), x$cropdmg * 1000000000, 
    ifelse (x$cropdmgexp %in% (0:8), x$cropdmg * 10, x$cropdmg)))))
}

# change propdmg values to non-exponential values
data_evtypes$propdmg_nonexp <- exponent_prop(data_evtypes) 

# change cropdmg values to non-exponential values
data_evtypes$cropdmg_nonexp <- exponent_crop(data_evtypes) 

Results

table_casualties <- data_evtypes %>% group_by(evtype) %>% summarise(n = n(), casualties = round(mean(fatalities + injuries), 2)) %>% arrange(desc(casualties))
table_casualties
## # A tibble: 41 × 3
##                evtype     n casualties
##                 <chr> <int>      <dbl>
## 1             tsunami    20       8.10
## 2      excessive heat  1725       5.03
## 3           hurricane   299       4.91
## 4                heat   934       3.94
## 5             tornado 60687       1.60
## 6         rip current   777       1.42
## 7          dust storm   429       1.08
## 8           ice storm  2007       1.03
## 9           avalanche   387       1.02
## 10 marine strong wind    48       0.75
## # ... with 31 more rows
plot1 <- table_casualties %>% ggplot(aes(x = casualties, y = evtype)) + geom_point()
plot1 + labs(title = "Average Amount of Casualties per Weather Event Type",  x = "Casualties on avg", y = "Event type", caption = "Figure 1. Average amount of casualties caused by different weather events. \nCasualties include fatalities and injuries caused by the weather event.") + theme (plot.caption=element_text(hjust=0, vjust=0.5))

The table and the plot above show tsunami is the most harmful weather event to puclic healt. The number of casualties (killed and injured) on average in tsunami is eight persons. The second most harmful weather event to puclic health is excessive heat with five casualties on average and the third most harmful is hurricane with less than five casualties on average.

table_economics <- data_evtypes %>% group_by(evtype) %>% summarise(n = n(), economics = mean(propdmg_nonexp + cropdmg_nonexp)) %>% arrange(desc(economics))
table_economics
## # A tibble: 41 × 3
##            evtype     n   economics
##             <chr> <int>       <dbl>
## 1       hurricane   299 303921497.7
## 2  tropical storm   697  12064973.5
## 3         tsunami    20   7204100.0
## 4         drought  2608   5758695.5
## 5           flood 29513   5453261.9
## 6       ice storm  2007   4467883.3
## 7        wildfire  4239   2099530.6
## 8    frost/freeze  1915   1060008.1
## 9         tornado 60687    971532.6
## 10   winter storm 11439    592835.1
## # ... with 31 more rows
plot2 <- table_economics %>% ggplot(aes(x = economics, y = evtype)) + geom_point()
plot2 + labs(title = "Log10 of Average Economic Consequences per \nWeather Event Type", x ="log10 of Economic Consequences ($)", y ="Event type", caption = "Figure 2. Average amount of economic consequences caused by different weather events. \nEconomic consequnces include consequences on property and crops.") + theme(plot.caption=element_text(hjust=0, vjust=0.5)) + 
  scale_x_log10()

The table and the plot above show that hurricane is the weather event that has the greates economical consequences. Economical losses due to damages on property and crops is on average over 300.000.000 $ in a hurricane. Economical consequences are second highest in tropical storms. Economical losses in the event of tropical strom is on average 120.000.000 $. On average, economic consequences in the case of tsunami are third highest with 72.000.000 $.