Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
#Read data
library(ggplot2)
## Registered S3 methods overwritten by 'ggplot2':
## method from
## [.quosures rlang
## c.quosures rlang
## print.quosures rlang
library(grid)
library(gridExtra)
library(data.table)
dataset <- read.csv('repdata-data-StormData.csv.bz2', stringsAsFactors=FALSE)
dataset.sub <- data.frame(dataset$STATE,
dataset$EVTYPE,
dataset$INJURIES,
dataset$FATALITIES,
dataset$PROPDMG,
dataset$PROPDMGEXP,
dataset$CROPDMG,
dataset$CROPDMGEXP)
colnames(dataset.sub) <- c('STATE',
'EVTYPE',
'INJURIES',
'FATALITIES',
'PROPDMG',
'PROPDMGEXP',
'CROPDMG',
'CROPDMGEXP')
dataset.sub <- data.table(dataset.sub)
dataset.sub <- dataset.sub[(EVTYPE != "?" & (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 |CROPDMG > 0)),
c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP" , "CROPDMG" , "CROPDMGEXP") ]
cols <- c("PROPDMGEXP", "CROPDMGEXP")
dataset.sub[, (cols) := c(lapply(.SD, toupper)), .SDcols = cols]
propDmgKey <- c("\"\"" = 10^0,"-" = 10^0,"+" = 10^0,"0" = 10^0,"1" = 10^1,"2" = 10^2,
"3" = 10^3,"4" = 10^4,"5" = 10^5,"6" = 10^6,"7" = 10^7,"8" = 10^8,"9" = 10^9,
"H" = 10^2,"K" = 10^3,"M" = 10^6,"B" = 10^9)
cropDmgKey <- c("\"\"" = 10^0,"?" = 10^0, "0" = 10^0,"K" = 10^3,"M" = 10^6,"B" = 10^9)
dataset.sub[, PROPDMGEXP := propDmgKey[as.character(dataset.sub[,PROPDMGEXP])]]
dataset.sub[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]
dataset.sub[, CROPDMGEXP := cropDmgKey[as.character(dataset.sub[,CROPDMGEXP])] ]
dataset.sub[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
dataset.sub <<- dataset.sub[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]
totalCost <<- dataset.sub[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]
totalCost <- totalCost[order(Total_Cost,decreasing = T), ]
totalCost <- totalCost[1:5, ]
head(totalCost, 3)
## EVTYPE propCost cropCost Total_Cost
## 1: FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3: TORNADO 56947380676 414953270 57362333946
totalInjuries <<- dataset.sub[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
totalInjuries <- totalInjuries[order(FATALITIES,decreasing = T), ]
totalInjuries <- totalInjuries[1:5, ]
head(totalInjuries, 3)
## EVTYPE FATALITIES INJURIES totals
## 1: TORNADO 5633 91346 96979
## 2: EXCESSIVE HEAT 1903 6525 8428
## 3: FLASH FLOOD 978 1777 2755
EVENTS_stuff <- melt(totalInjuries, id.vars="EVTYPE", variable.name = "Results")
head(EVENTS_stuff, 3)
## EVTYPE Results value
## 1: TORNADO FATALITIES 5633
## 2: EXCESSIVE HEAT FATALITIES 1903
## 3: FLASH FLOOD FATALITIES 978
p1 <- ggplot(EVENTS_stuff, aes(x=reorder(EVTYPE, -value), y=value)) +
geom_bar(stat="identity", aes(fill=Results), position="dodge") +
ylab("Frequency Count") +
xlab("Events") +
ggtitle("Top 5 Harmful to US Ppopulation Health") + theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle=90, hjust=1))
p1
### Greatest Economic Consequences
E_consequences <<- melt(totalCost, id.vars="EVTYPE", variable.name = "Types")
head(E_consequences, 3)
## EVTYPE Types value
## 1: FLOOD propCost 144657709807
## 2: HURRICANE/TYPHOON propCost 69305840000
## 3: TORNADO propCost 56947380676
# Create chart
p2 <- ggplot(E_consequences, aes(x=reorder(EVTYPE, -value), y=value)) +
geom_bar(stat="identity", aes(fill=Types), position="dodge") +
ylab("Cost (dollars)") +
xlab("Events") +
theme(axis.text.x = element_text(angle=90, hjust=1)) +
ggtitle("Top 5 US Storm Events causing Economic Consequences") +
theme(plot.title = element_text(hjust = 0.5))
p2