Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. Hence the assignment is to explore the storm data to answer these two questions
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
if(!file.exists("StormData.csv.bz2")) {
Original_Data_URL <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(Original_Data_URL, destfile="StormData.csv.bz2")
}
data <- read.csv("StormData.csv.bz2", stringsAsFactors=F)
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec, :
## EOF within quoted string
data$BGN_DATE <- as.Date(data$BGN_DATE, "%m/%d/%Y %H:%M:%S")
summary(data$BGN_DATE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## "1950-01-03" "1989-06-14" "1997-05-30" "1993-08-11" "2001-07-30" "2005-12-31"
recentdata<-data[data$BGN_DATE>as.Date("2002-01-01", "%Y-%m-%d"),]
selectNames <- c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP" ,"CROPDMG","CROPDMGEXP")
dataset<-select(recentdata, selectNames)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(selectNames)
##
## # Now:
## data %>% select(all_of(selectNames))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
dataset$EVTYPE[grepl("FLOOD", dataset$EVTYPE, ignore.case = TRUE)] <- "FLOOD"
dataset$EVTYPE[grepl("TORNADO", dataset$EVTYPE, ignore.case = TRUE)] <- "TORNADO"
dataset$EVTYPE[grepl("TSTM|THUNDERSTORM", dataset$EVTYPE, ignore.case = TRUE)] <- "TSTM"
dataset$EVTYPE[grepl("TROPICAL|STORM", dataset$EVTYPE, ignore.case = TRUE)] <- "STORM"
dataset$EVTYPE[grepl("HURRICANE", dataset$EVTYPE, ignore.case = TRUE)] <- "HURRICANE"
dataset$EVTYPE[grepl("ICE|SNOW|FROST|SLEET", dataset$EVTYPE, ignore.case = TRUE)] <- "SNOW"
dataset$EVTYPE[grepl("FOG", dataset$EVTYPE, ignore.case = TRUE)] <- "FOG"
dataset$EVTYPE[grepl("COLD|WINDCHILL|FREEZE|WINTER", dataset$EVTYPE, ignore.case = TRUE)] <- "COLD"
dataset$EVTYPE[grepl("HEAT|WARM|HOT", dataset$EVTYPE, ignore.case = TRUE)] <- "HEAT"
dataset$EVTYPE[grepl("CLOUD|FUNNEL", dataset$EVTYPE, ignore.case = TRUE)] <- "CLOUD"
dataset$EVTYPE[grepl("HAIL", dataset$EVTYPE, ignore.case = TRUE)] <- "HAIL"
dataset$EVTYPE[grepl("DROUGHT|DRY", dataset$EVTYPE, ignore.case = TRUE)] <- "DROUGHT"
dataset$EVTYPE[grepl("LIGHTNING", dataset$EVTYPE, ignore.case = TRUE)] <- "LIGHTNING"
dataset$EVTYPE[grepl("FIRE", dataset$EVTYPE, ignore.case = TRUE)] <- "FIRE"
dataset$EVTYPE[grepl("RAIN|SHOWER", dataset$EVTYPE, ignore.case = TRUE)] <- "RAIN"
dataset$EVTYPE[grepl("WATERSPOUT", dataset$EVTYPE, ignore.case = TRUE)] <- "WATERSPOUT"
dataset$EVTYPE[grepl("SURF", dataset$EVTYPE, ignore.case = TRUE)] <- "SURF"
dataset$EVTYPE[grepl("CURRENT", dataset$EVTYPE, ignore.case = TRUE)] <- "CURRENT"
dataset$EVTYPE[grepl("WIND|MICROBURST", dataset$EVTYPE, ignore.case = TRUE)] <- "WIND"
dataset$EVTYPE[grepl("BLIZZARD", dataset$EVTYPE, ignore.case = TRUE)] <- "BLIZZARD"
dataset$EVTYPE[grepl("SLIDE", dataset$EVTYPE, ignore.case = TRUE)] <- "LANDSLIDE"
dataset$EVTYPE[grepl("DUST", dataset$EVTYPE, ignore.case = TRUE)] <- "DUST"
dataset$EVTYPE<-factor(dataset$EVTYPE)
dataset$PROPDMGEXP<-recode(dataset$PROPDMGEXP,'K'=1000,'M'=1000000,'B'=1000000000,.default=1)
dataset$CROPDMGEXP<-recode(dataset$CROPDMGEXP,'K'=1000,'M'=1000000,'B'=1000000000,.default=1)
dataset$PROPDMGVALUE <- dataset$PROPDMG*dataset$PROPDMGEXP
dataset$CROPDMGVALUE <- dataset$CROPDMG*dataset$CROPDMGEXP
healthdata<-(dataset %>% group_by(EVTYPE) %>% summarise(FATALITIES = sum(FATALITIES),INJURIES = sum(INJURIES) ) %>% arrange(desc(FATALITIES+INJURIES)))
mostHarm<-healthdata[1:10,]
print(mostHarm)
## # A tibble: 10 × 3
## EVTYPE FATALITIES INJURIES
## <fct> <dbl> <dbl>
## 1 TORNADO 179 2931
## 2 HURRICANE 60 1270
## 3 HEAT 284 919
## 4 LIGHTNING 151 926
## 5 TSTM 66 857
## 6 STORM 79 662
## 7 FLOOD 236 296
## 8 FIRE 25 487
## 9 WIND 81 384
## 10 CURRENT 136 138
plot1<-gather(mostHarm, TYPE, VALUE, FATALITIES:INJURIES)
ggplot(plot1, aes(x=reorder(EVTYPE,-VALUE), y=VALUE, fill=TYPE))+geom_bar(stat="identity")+labs(title="Harmful Events to Population Health", x="Event Type", y="Count")
ecsdata<-(dataset %>% group_by(EVTYPE) %>% summarise(PROPDMGVALUE = sum(PROPDMGVALUE),CROPDMGVALUE = sum(CROPDMGVALUE) ) %>% arrange(desc(PROPDMGVALUE+CROPDMGVALUE)))
mostEcon<-ecsdata[1:10,]
plot2<-gather(mostEcon, TYPE, VALUE, PROPDMGVALUE:CROPDMGVALUE)
ggplot(plot2, aes(x=reorder(EVTYPE,-VALUE), y=VALUE, fill=factor(TYPE, labels=c("crop damage", "property damage"))))+geom_bar(stat="identity")+labs(title="Economically Harmful Events", x="Event Type", y="Count")+guides(fill = guide_legend(title = "Type of damage"))