Introduction

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. Hence the assignment is to explore the storm data to answer these two questions

  1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
  2. Across the United States, which types of events have the greatest economic consequences?

Packages used

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)

unzipping and downloading

if(!file.exists("StormData.csv.bz2")) {
Original_Data_URL <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(Original_Data_URL, destfile="StormData.csv.bz2")
}

Loading the data

data <- read.csv("StormData.csv.bz2", stringsAsFactors=F)
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## EOF within quoted string

selecting required data

data$BGN_DATE <- as.Date(data$BGN_DATE, "%m/%d/%Y %H:%M:%S")

summary

summary(data$BGN_DATE)
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "1950-01-03" "1989-06-14" "1997-05-30" "1993-08-11" "2001-07-30" "2005-12-31"
recentdata<-data[data$BGN_DATE>as.Date("2002-01-01", "%Y-%m-%d"),]
selectNames <- c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP" ,"CROPDMG","CROPDMGEXP")
dataset<-select(recentdata, selectNames)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(selectNames)
## 
##   # Now:
##   data %>% select(all_of(selectNames))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.

Regrouping EVTYPE

dataset$EVTYPE[grepl("FLOOD", dataset$EVTYPE, ignore.case = TRUE)] <- "FLOOD"
dataset$EVTYPE[grepl("TORNADO", dataset$EVTYPE, ignore.case = TRUE)] <- "TORNADO"
dataset$EVTYPE[grepl("TSTM|THUNDERSTORM", dataset$EVTYPE, ignore.case = TRUE)] <- "TSTM" 
dataset$EVTYPE[grepl("TROPICAL|STORM", dataset$EVTYPE, ignore.case = TRUE)] <- "STORM"
dataset$EVTYPE[grepl("HURRICANE", dataset$EVTYPE, ignore.case = TRUE)] <- "HURRICANE"  
dataset$EVTYPE[grepl("ICE|SNOW|FROST|SLEET", dataset$EVTYPE, ignore.case = TRUE)] <- "SNOW"
dataset$EVTYPE[grepl("FOG", dataset$EVTYPE, ignore.case = TRUE)] <- "FOG"
dataset$EVTYPE[grepl("COLD|WINDCHILL|FREEZE|WINTER", dataset$EVTYPE, ignore.case = TRUE)] <- "COLD"
dataset$EVTYPE[grepl("HEAT|WARM|HOT", dataset$EVTYPE, ignore.case = TRUE)] <- "HEAT"
dataset$EVTYPE[grepl("CLOUD|FUNNEL", dataset$EVTYPE, ignore.case = TRUE)] <- "CLOUD" 
dataset$EVTYPE[grepl("HAIL", dataset$EVTYPE, ignore.case = TRUE)] <- "HAIL"
dataset$EVTYPE[grepl("DROUGHT|DRY", dataset$EVTYPE, ignore.case = TRUE)] <- "DROUGHT"
dataset$EVTYPE[grepl("LIGHTNING", dataset$EVTYPE, ignore.case = TRUE)] <- "LIGHTNING"
dataset$EVTYPE[grepl("FIRE", dataset$EVTYPE, ignore.case = TRUE)] <- "FIRE" 
dataset$EVTYPE[grepl("RAIN|SHOWER", dataset$EVTYPE, ignore.case = TRUE)] <- "RAIN"   
dataset$EVTYPE[grepl("WATERSPOUT", dataset$EVTYPE, ignore.case = TRUE)] <- "WATERSPOUT"   
dataset$EVTYPE[grepl("SURF", dataset$EVTYPE, ignore.case = TRUE)] <- "SURF"   
dataset$EVTYPE[grepl("CURRENT", dataset$EVTYPE, ignore.case = TRUE)] <- "CURRENT"    
dataset$EVTYPE[grepl("WIND|MICROBURST", dataset$EVTYPE, ignore.case = TRUE)] <- "WIND" 
dataset$EVTYPE[grepl("BLIZZARD", dataset$EVTYPE, ignore.case = TRUE)] <- "BLIZZARD"
dataset$EVTYPE[grepl("SLIDE", dataset$EVTYPE, ignore.case = TRUE)] <- "LANDSLIDE"
dataset$EVTYPE[grepl("DUST", dataset$EVTYPE, ignore.case = TRUE)] <- "DUST"        
dataset$EVTYPE<-factor(dataset$EVTYPE)

Calculating of Property and Corp Damage

dataset$PROPDMGEXP<-recode(dataset$PROPDMGEXP,'K'=1000,'M'=1000000,'B'=1000000000,.default=1)
dataset$CROPDMGEXP<-recode(dataset$CROPDMGEXP,'K'=1000,'M'=1000000,'B'=1000000000,.default=1)
dataset$PROPDMGVALUE <- dataset$PROPDMG*dataset$PROPDMGEXP
dataset$CROPDMGVALUE <- dataset$CROPDMG*dataset$CROPDMGEXP

Most harmful event types to population health

healthdata<-(dataset %>% group_by(EVTYPE) %>% summarise(FATALITIES = sum(FATALITIES),INJURIES = sum(INJURIES) ) %>% arrange(desc(FATALITIES+INJURIES)))
mostHarm<-healthdata[1:10,]
print(mostHarm)
## # A tibble: 10 × 3
##    EVTYPE    FATALITIES INJURIES
##    <fct>          <dbl>    <dbl>
##  1 TORNADO          179     2931
##  2 HURRICANE         60     1270
##  3 HEAT             284      919
##  4 LIGHTNING        151      926
##  5 TSTM              66      857
##  6 STORM             79      662
##  7 FLOOD            236      296
##  8 FIRE              25      487
##  9 WIND              81      384
## 10 CURRENT          136      138

plotting harmful effect

plot1<-gather(mostHarm, TYPE, VALUE, FATALITIES:INJURIES)   
ggplot(plot1, aes(x=reorder(EVTYPE,-VALUE), y=VALUE, fill=TYPE))+geom_bar(stat="identity")+labs(title="Harmful Events to Population Health", x="Event Type", y="Count")

Event types with the greatest economic consequences

ecsdata<-(dataset %>% group_by(EVTYPE) %>% summarise(PROPDMGVALUE = sum(PROPDMGVALUE),CROPDMGVALUE = sum(CROPDMGVALUE) ) %>% arrange(desc(PROPDMGVALUE+CROPDMGVALUE)))
mostEcon<-ecsdata[1:10,]

plot2<-gather(mostEcon, TYPE, VALUE, PROPDMGVALUE:CROPDMGVALUE)

ggplot(plot2, aes(x=reorder(EVTYPE,-VALUE), y=VALUE, fill=factor(TYPE, labels=c("crop damage", "property damage"))))+geom_bar(stat="identity")+labs(title="Economically Harmful Events", x="Event Type", y="Count")+guides(fill = guide_legend(title = "Type of damage"))

Answers to the two questions are

  1. Across the United States, tornados are the most harmful event to population health;
  2. Across the United States, floods have the greatest economic consequences.