Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
#Read data
library(ggplot2)
library(grid)
library(gridExtra)
library(data.table)
dataset <- read.csv('repdata-data-StormData.csv.bz2', stringsAsFactors=FALSE)
dataset.sub <- data.frame(dataset$STATE,
dataset$EVTYPE,
dataset$INJURIES,
dataset$FATALITIES,
dataset$PROPDMG,
dataset$PROPDMGEXP,
dataset$CROPDMG,
dataset$CROPDMGEXP)
colnames(dataset.sub) <- c('STATE',
'EVTYPE',
'INJURIES',
'FATALITIES',
'PROPDMG',
'PROPDMGEXP',
'CROPDMG',
'CROPDMGEXP')
dataset.sub <- data.table(dataset.sub)
dataset.sub <- dataset.sub[(EVTYPE != "?" & (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 |CROPDMG > 0)),
c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP" , "CROPDMG" , "CROPDMGEXP") ]
cols <- c("PROPDMGEXP", "CROPDMGEXP")
dataset.sub[, (cols) := c(lapply(.SD, toupper)), .SDcols = cols]
propDmgKey <- c("\"\"" = 10^0,"-" = 10^0,"+" = 10^0,"0" = 10^0,"1" = 10^1,"2" = 10^2,
"3" = 10^3,"4" = 10^4,"5" = 10^5,"6" = 10^6,"7" = 10^7,"8" = 10^8,"9" = 10^9,
"H" = 10^2,"K" = 10^3,"M" = 10^6,"B" = 10^9)
cropDmgKey <- c("\"\"" = 10^0,"?" = 10^0, "0" = 10^0,"K" = 10^3,"M" = 10^6,"B" = 10^9)
dataset.sub[, PROPDMGEXP := propDmgKey[as.character(dataset.sub[,PROPDMGEXP])]]
dataset.sub[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]
dataset.sub[, CROPDMGEXP := cropDmgKey[as.character(dataset.sub[,CROPDMGEXP])] ]
dataset.sub[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
dataset.sub1 <<- dataset.sub[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]
## Warning in Ops.factor(PROPDMG, PROPDMGEXP): '*' not meaningful for factors
## Warning in Ops.factor(CROPDMG, CROPDMGEXP): '*' not meaningful for factors
totalCost <- dataset.sub1[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]
totalCost <- totalCost[order(Total_Cost,decreasing = T), ]
totalCost <- totalCost[1:5, ]
head(totalCost, 3)
## EVTYPE propCost cropCost Total_Cost
## 1: TORNADO NA NA NA
## 2: TSTM WIND NA NA NA
## 3: HAIL NA NA NA
totalInjuries <- dataset.sub1[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
totalInjuries <- totalInjuries[order(FATALITIES,decreasing = T), ]
totalInjuries <- totalInjuries[1:5, ]
head(totalInjuries, 3)
## EVTYPE FATALITIES INJURIES totals
## 1: TORNADO 5633 91346 96979
## 2: EXCESSIVE HEAT 1903 6525 8428
## 3: FLASH FLOOD 978 1777 2755
EVENTS_stuff <- melt(totalInjuries, id.vars="EVTYPE", variable.name = "Results")
head(EVENTS_stuff, 3)
## EVTYPE Results value
## 1: TORNADO FATALITIES 5633
## 2: EXCESSIVE HEAT FATALITIES 1903
## 3: FLASH FLOOD FATALITIES 978
p1 <- ggplot(EVENTS_stuff, aes(x=reorder(EVTYPE, -value), y=value)) +
geom_bar(stat="identity", aes(fill=Results), position="dodge") +
ylab("Frequency Count") +
xlab("Events") +
ggtitle("Top 5 Harmful to US Ppopulation Health") + theme(plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x = element_text(angle=90, hjust=1))
p1
### Greatest Economic Consequences
E_consequences <- melt(totalCost, id.vars="EVTYPE", variable.name = "Types")
head(E_consequences, 5)
## EVTYPE Types value
## 1: TORNADO propCost NA
## 2: TSTM WIND propCost NA
## 3: HAIL propCost NA
## 4: ICE STORM/FLASH FLOOD propCost NA
## 5: WINTER STORM propCost NA
# Create chart
p2 <- ggplot(E_consequences, aes(x=reorder(EVTYPE, -value), y=value)) +
geom_bar(stat="identity", aes(fill=Types), position="dodge") +
ylab("Cost (dollars)") +
xlab("Events") +
theme(axis.text.x = element_text(angle=90, hjust=1)) +
ggtitle("Top 5 US Storm Events causing Economic Consequences") +
theme(plot.title = element_text(hjust = 0.5))
p2
## Warning: Removed 15 rows containing missing values (geom_bar).