Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Data Processing

Reading dataset and Subseting dataset

    #Read data
    library(ggplot2)
    library(grid)
    library(gridExtra)
    library(data.table)
    dataset <- read.csv('repdata-data-StormData.csv.bz2', stringsAsFactors=FALSE)    
    dataset.sub <- data.frame(dataset$STATE,
                              dataset$EVTYPE,
                              dataset$INJURIES,
                              dataset$FATALITIES,
                              dataset$PROPDMG,
                              dataset$PROPDMGEXP,
                              dataset$CROPDMG,
                              dataset$CROPDMGEXP)
    colnames(dataset.sub) <- c('STATE', 
                               'EVTYPE', 
                               'INJURIES', 
                               'FATALITIES', 
                               'PROPDMG', 
                               'PROPDMGEXP',
                               'CROPDMG',
                               'CROPDMGEXP')
    dataset.sub <- data.table(dataset.sub)
    dataset.sub <- dataset.sub[(EVTYPE != "?" & (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 |CROPDMG > 0)), 
                               c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP" , "CROPDMG" , "CROPDMGEXP") ]

instead of (-,+, H, K, etc)

cols <- c("PROPDMGEXP", "CROPDMGEXP")
dataset.sub[,  (cols) := c(lapply(.SD, toupper)), .SDcols = cols]
propDmgKey <-  c("\"\"" = 10^0,"-" = 10^0,"+" = 10^0,"0" = 10^0,"1" = 10^1,"2" = 10^2,
                 "3" = 10^3,"4" = 10^4,"5" = 10^5,"6" = 10^6,"7" = 10^7,"8" = 10^8,"9" = 10^9,
                 "H" = 10^2,"K" = 10^3,"M" = 10^6,"B" = 10^9)
cropDmgKey <-  c("\"\"" = 10^0,"?" = 10^0, "0" = 10^0,"K" = 10^3,"M" = 10^6,"B" = 10^9)
dataset.sub[, PROPDMGEXP := propDmgKey[as.character(dataset.sub[,PROPDMGEXP])]]
dataset.sub[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]

dataset.sub[, CROPDMGEXP := cropDmgKey[as.character(dataset.sub[,CROPDMGEXP])] ]
dataset.sub[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]

Calculating Cost

dataset.sub1 <<- dataset.sub[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, propCost = PROPDMG * PROPDMGEXP, CROPDMG, CROPDMGEXP, cropCost = CROPDMG * CROPDMGEXP)]
## Warning in Ops.factor(PROPDMG, PROPDMGEXP): '*' not meaningful for factors
## Warning in Ops.factor(CROPDMG, CROPDMGEXP): '*' not meaningful for factors
totalCost <- dataset.sub1[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]

totalCost <- totalCost[order(Total_Cost,decreasing = T), ]

totalCost <- totalCost[1:5, ]

head(totalCost, 3)
##       EVTYPE propCost cropCost Total_Cost
## 1:   TORNADO       NA       NA         NA
## 2: TSTM WIND       NA       NA         NA
## 3:      HAIL       NA       NA         NA

Calcuating Injuries and Fatalities

totalInjuries <- dataset.sub1[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]

totalInjuries <- totalInjuries[order(FATALITIES,decreasing = T), ]

totalInjuries <- totalInjuries[1:5, ]

head(totalInjuries, 3)
##            EVTYPE FATALITIES INJURIES totals
## 1:        TORNADO       5633    91346  96979
## 2: EXCESSIVE HEAT       1903     6525   8428
## 3:    FLASH FLOOD        978     1777   2755

Results

Most Harmful to Population Health

EVENTS_stuff <- melt(totalInjuries, id.vars="EVTYPE", variable.name = "Results")
head(EVENTS_stuff, 3)
##            EVTYPE    Results value
## 1:        TORNADO FATALITIES  5633
## 2: EXCESSIVE HEAT FATALITIES  1903
## 3:    FLASH FLOOD FATALITIES   978
p1 <- ggplot(EVENTS_stuff, aes(x=reorder(EVTYPE, -value), y=value)) +
                      geom_bar(stat="identity", aes(fill=Results), position="dodge") +
                      ylab("Frequency Count") +
                      xlab("Events") +
                      ggtitle("Top 5 Harmful to US Ppopulation Health") + theme(plot.title = element_text(hjust = 0.5)) +
                      theme(axis.text.x = element_text(angle=90, hjust=1))
p1

### Greatest Economic Consequences

E_consequences <- melt(totalCost, id.vars="EVTYPE", variable.name = "Types")
head(E_consequences, 5)
##                   EVTYPE    Types value
## 1:               TORNADO propCost    NA
## 2:             TSTM WIND propCost    NA
## 3:                  HAIL propCost    NA
## 4: ICE STORM/FLASH FLOOD propCost    NA
## 5:          WINTER STORM propCost    NA
# Create chart
p2 <- ggplot(E_consequences, aes(x=reorder(EVTYPE, -value), y=value)) + 
             geom_bar(stat="identity", aes(fill=Types), position="dodge") + 
             ylab("Cost (dollars)") + 
             xlab("Events") + 
             theme(axis.text.x = element_text(angle=90, hjust=1)) + 
             ggtitle("Top 5 US Storm Events causing Economic Consequences") + 
             theme(plot.title = element_text(hjust = 0.5))

p2
## Warning: Removed 15 rows containing missing values (geom_bar).