Synopsis

The data in this report comes from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database documents the occurrence of storms and other significant weather phenomena having sufficient intensity to cause loss of life, injuries, significant property damage, and/or disruption to commerce. In addition, it is a partial record of other significant meteorological events, such as record maximum or minimum temperatures or precipitation that occurs in connection with another event.

The data for this assignment comes in the form of a comma-separated-value compressed via the bzip2 algorithm to reduce its size. It is available here:

Documentation of the database is available at the following sources;


Structure of the Analysis

The purpose of this report is to answer two questions from the NOAA database:

  1. Across the United States, which types of events (as indicated in the 𝙴𝚅𝚃𝚈𝙿𝙴 variable) are most harmful with respect to population health?
    • Properties in the data that describe population health: FATALITIES, INJURIES
  2. Across the United States, which types of events have the greatest economic consequences?
    • Properties in the data that describe economic consequences: PROPDMG, CROPDMG
    • PROPDMGEXP/CROPDMGEXP: H = Hundreds, K = Thousands, M = Millions, B = Billions

The first two graphs show the top ten weather events for the two properties in each question, and the last graph shows the top ten weather events for the summation of each of the two properties.



Data Processing

Load Packages

library(ggplot2)
library(dplyr)
library(grid)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.2.4

Load & View Data

storm <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))
str(storm)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
##  $ BGN_TIME  : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
##  $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
##  $ STATE     : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : Factor w/ 35 levels "","  N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_LOCATI: Factor w/ 54429 levels ""," Christiansburg",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_DATE  : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_TIME  : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_LOCATI: Factor w/ 34506 levels ""," CANTON"," TULIA",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ WFO       : Factor w/ 542 levels ""," CI","%SD",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ZONENAMES : Factor w/ 25112 levels "","                                                                                                                               "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : Factor w/ 436781 levels "","\t","\t\t",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...

Question 1: The harmfulness of weather events with respect to population health

health.full <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data = storm, sum)

health.fatalities: EVTYPE by most fatalities

health.fatalities <- head(arrange(health.full, desc(FATALITIES)), 10)
health.fatalities$INJURIES <- NULL

health.injuries: EVTYPE by most injuries

health.injuries <- head(arrange(health.full, desc(INJURIES)), 10)
health.injuries$FATALITIES <- NULL

health.harmful: summation of fatalities and injuries

health.harmful <- head(arrange(health.full, desc(INJURIES + FATALITIES)), 10)
health.harmful$HARMFUL <- health.harmful$FATALITIES + health.harmful$INJURIES
health.harmful$FATALITIES <- NULL
health.harmful$INJURIES <- NULL

Question 2: Weather events that have the greatest economic consequences

Subset the properties that determine the economic consequences

storm2 <- select(storm, EVTYPE, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

Factor EXP into DMG values. This process ignores bad entries into PROPDMGEXP and CROPDMGEXP.

storm2$PROPDMG <- ifelse(grepl("[Hh]", storm2$PROPDMGEXP), storm2$PROPDMG*100, 
                         storm2$PROPDMG)
storm2$PROPDMG <- ifelse(grepl("[Kk]", storm2$PROPDMGEXP), storm2$PROPDMG*1000, 
                         storm2$PROPDMG)
storm2$PROPDMG <- ifelse(grepl("[Mm]", storm2$PROPDMGEXP), storm2$PROPDMG*1000000, 
                         storm2$PROPDMG)
storm2$PROPDMG <- ifelse(grepl("[Bb]", storm2$PROPDMGEXP), storm2$PROPDMG*1000000000, 
                         storm2$PROPDMG)

storm2$CROPDMG <- ifelse(grepl("[Hh]", storm2$CROPDMGEXP), storm2$CROPDMG*100, 
                         storm2$CROPDMG)
storm2$CROPDMG <- ifelse(grepl("[Kk]", storm2$CROPDMGEXP), storm2$CROPDMG*1000, 
                         storm2$CROPDMG)
storm2$CROPDMG <- ifelse(grepl("[Mm]", storm2$CROPDMGEXP), storm2$CROPDMG*1000000, 
                         storm2$CROPDMG)
storm2$CROPDMG <- ifelse(grepl("[Bb]", storm2$CROPDMGEXP), storm2$CROPDMG*1000000000, 
                         storm2$CROPDMG)

money.full <- aggregate(cbind(PROPDMG, CROPDMG) ~ EVTYPE, data = storm2, sum)

money.prop: EVTYPE by highest property damage

money.prop <- head(arrange(money.full, desc(PROPDMG)), 10)
money.prop$CROPDMG <- NULL

money.crop: EVTYPE by highest crop damage

money.crop <- head(arrange(money.full, desc(CROPDMG)), 10)
money.crop$PROPDMG <- NULL

money.harmful: summation of PROPDMG and CROPDMG

money.harmful <- head(arrange(money.full, desc(PROPDMG + CROPDMG)), 10)
money.harmful$HARMFUL <- money.harmful$PROPDMG + money.harmful$CROPDMG
money.harmful$PROPDMG <- NULL
money.harmful$CROPDMG <- NULL



Results

The harmfulness of weather events with respect to population health

Fatalities | Injuries

health.fatalities$FATALITIES <- health.fatalities$FATALITIES / 1000
health.fatalities$EVTYPE <- 
        factor(health.fatalities$EVTYPE, 
               levels = health.fatalities$EVTYPE[order(!health.fatalities$FATALITIES)])
fatalities <- ggplot(health.fatalities, aes(EVTYPE, FATALITIES)) + 
        labs(title="Fatalities") +
        xlab("Event Type") + ylab("Number of Fatalities \n (in thousands)")
fatalities.plot <- fatalities + geom_bar(stat="identity", color = "black", 
                                         fill = "midnightblue") + 
        theme(axis.text.x = element_text(angle = 90, hjust = 1),
              panel.grid.major = element_line(colour = "white"),
              panel.background = element_rect(fill = "antiquewhite")) +
        coord_cartesian(ylim = c(0, 6)) 

health.injuries$INJURIES <- health.injuries$INJURIES / 1000
health.injuries$EVTYPE <- 
        factor(health.injuries$EVTYPE, 
               levels = health.injuries$EVTYPE[order(!health.injuries$INJURIES)])
injuries <- ggplot(health.injuries, aes(EVTYPE, INJURIES)) + 
        labs(title="Injuries") +
        xlab("Event Type") + ylab("Number of Injuries \n (in thousands)")
injuries.plot <- injuries + geom_bar(stat="identity", color = "black", 
                                         fill = "midnightblue") + 
        theme(axis.text.x = element_text(angle = 90, hjust = 1),
              panel.grid.major = element_line(colour = "white"),
              panel.background = element_rect(fill = "antiquewhite")) +
        coord_cartesian(ylim = c(0, 96))

grid.arrange(fatalities.plot, injuries.plot, ncol=2, nrow=1)

The harmfulness of weather events with respect to economic consequences

Property Damage | Crop Damage

money.prop$PROPDMG <- money.prop$PROPDMG / 1000000000
money.prop$EVTYPE <- 
        factor(money.prop$EVTYPE, 
               levels = money.prop$EVTYPE[order(!money.prop$PROPDMG)])
prop <- ggplot(money.prop, aes(EVTYPE, PROPDMG)) + 
        labs(title="Property Damage") +
        xlab("Event Type") + ylab("Property Damage \n (in billions)")
prop.plot <- prop + geom_bar(stat="identity", color = "black", 
                                         fill = "coral2") + 
        theme(axis.text.x = element_text(angle = 90, hjust = 1),
              panel.grid.major = element_line(colour = "gray80"),
              panel.grid.minor = element_line(colour = "gray80"),
              panel.background = element_rect(fill = "aliceblue")) +
        coord_cartesian(ylim = c(0, 150))

money.crop$CROPDMG <- money.crop$CROPDMG / 1000000000
money.crop$EVTYPE <- 
        factor(money.crop$EVTYPE, 
               levels = money.crop$EVTYPE[order(!money.crop$CROPDMG)])
crop <- ggplot(money.crop, aes(EVTYPE, CROPDMG)) + 
        labs(title="Crop Damage") +
        xlab("Event Type") + ylab("Crop Damage \n (in billions)")
crop.plot <- crop + geom_bar(stat="identity", color = "black", 
                             fill = "coral2") + 
        theme(axis.text.x = element_text(angle = 90, hjust = 1),
              panel.grid.major = element_line(colour = "gray80"),
              panel.grid.minor = element_line(colour = "gray80"),
              panel.background = element_rect(fill = "aliceblue")) +
        coord_cartesian(ylim = c(0, 15))

grid.arrange(prop.plot, crop.plot, ncol=2, nrow=1)

Summations of the effect of weather events on health & economy

health.harmful$HARMFUL <- health.harmful$HARMFUL / 1000
health.harmful$EVTYPE <- 
        factor(health.harmful$EVTYPE, 
               levels = health.harmful$EVTYPE[order(!health.harmful$HARMFUL)])
health <- ggplot(health.harmful, aes(EVTYPE, HARMFUL)) + 
        labs(title="Health") +
        xlab("Event Type") + ylab("Damage to Population Health \n (in thousands)")
health.plot <- health + geom_bar(stat="identity", color = "black", 
                                         fill = "midnightblue") + 
        theme(axis.text.x = element_text(angle = 90, hjust = 1),
              panel.grid.major = element_line(colour = "white"),
              panel.background = element_rect(fill = "antiquewhite")) +
        coord_cartesian(ylim = c(0, 100)) 

money.harmful$HARMFUL <- money.harmful$HARMFUL / 1000000000
money.harmful$EVTYPE <- 
        factor(money.harmful$EVTYPE, 
               levels = money.harmful$EVTYPE[order(!money.harmful$HARMFUL)])
money <- ggplot(money.harmful, aes(EVTYPE, HARMFUL)) + 
        labs(title="Economy") +
        xlab("Event Type") + ylab("Damage to Economy \n (in billions)")
money.plot <- money + geom_bar(stat="identity", color = "black", 
                             fill = "coral2") + 
        theme(axis.text.x = element_text(angle = 90, hjust = 1),
              panel.grid.major = element_line(colour = "gray80"),
              panel.grid.minor = element_line(colour = "gray80"),
              panel.background = element_rect(fill = "aliceblue")) +
        coord_cartesian(ylim = c(0, 155))

grid.arrange(health.plot, money.plot, ncol=2, nrow=1)


Conclusion