Weather Events And Their Impact on Public Health and the Economy

Synopsis

The objective of this brief analysis is to determine how public health and the economy are affected by certain severe or catastrophic weahter events such as floods, hurricanes or tornadoes. The data source is from NOAA and has been collected from 1950 to 2011. With some fairly rudimentary transformations the data from NOAA was used to make plots that clearly establish which types of events have the greatest impact on public health and the ecomomy.

Configuration

echo = TRUE  # Always make code visible
options(scipen = 1)  # Turn off scientific notations for numbers
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.20.0 (2016-02-17) successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods
## The following objects are masked from 'package:base':
## 
##     attach, detach, gc, load, save
## R.utils v2.3.0 (2016-04-13) successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
## 
##     timestamp
## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, inherits, isOpen, parse, warnings
library(ggplot2)
library(plyr)
require(gridExtra)
## Loading required package: gridExtra

Data Processing

The data is processed in the following steps:

  1. It is loaded from the original source
  2. It is unzipped
  3. It is stored into an R data structure
  4. It is converted based on factors stored in the data table

First, we download the data file and unzip it.

    download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "weatherData.csv.bz2")
    bunzip2("weatherData.csv.bz2", overwrite=T, remove=F)

Then, we read the generated csv file. If the data already exists in the working environment, we do not need to load it again. Otherwise, we read the csv file.

if (!"weatherData" %in% ls()) {
    weatherData <- read.csv("weatherData.csv", sep = ",")
}
dim(weatherData)
## [1] 902297     37
head(weatherData, n = 2)
##   STATE__          BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1 4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1 4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                        14   100 3   0          0
## 2         NA         0                         2   150 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2

Subset the weather data for the relevant events

subsetWeatherData <- weatherData[, c(8,23,24,25,26,27,28)]

Create conversions (since table does not have numbers but letters indicating factors)

subsetWeatherData$PROPEXP[subsetWeatherData$PROPDMGEXP == ""] <- 1
subsetWeatherData$PROPEXP[subsetWeatherData$PROPDMGEXP == "K"] <- 1000
subsetWeatherData$PROPEXP[subsetWeatherData$PROPDMGEXP == "M"] <- 1000000
subsetWeatherData$PROPEXP[subsetWeatherData$PROPDMGEXP == "B"] <- 1000000000
subsetWeatherData$PROPDMGVAL <- subsetWeatherData$PROPDMG * subsetWeatherData$PROPEXP


subsetWeatherData$CROPEXP[subsetWeatherData$CROPDMGEXP == ""] <- 1
subsetWeatherData$CROPEXP[subsetWeatherData$CROPDMGEXP == "K"] <- 1000
subsetWeatherData$CROPEXP[subsetWeatherData$CROPDMGEXP == "M"] <- 1000000
subsetWeatherData$CROPEXP[subsetWeatherData$CROPDMGEXP == "B"] <- 1000000000
subsetWeatherData$CROPDMGVAL <- subsetWeatherData$CROPDMG * subsetWeatherData$CROPEXP
aggregateEventType <- aggregate(FATALITIES ~ EVTYPE, data = subsetWeatherData, FUN = sum)
fatalities <- aggregateEventType[order(-aggregateEventType$FATALITIES),][1:10,]

par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(fatalities$FATALITIES, las = 3, names.arg = fatalities$EVTYPE, 
        main = "Fatalities due to Weather Events", 
        ylab = "Fatalities")
aggregateEventType2 <- aggregate(INJURIES ~ EVTYPE, data = subsetWeatherData, FUN = sum)
injuries <- aggregateEventType2[order(-aggregateEventType2$INJURIES),][1:10,]

barplot(injuries$INJURIES, las = 3, names.arg = injuries$EVTYPE, 
        main = "Injuries due to Weather Events", 
        ylab = "Injuries")

aggregatePropertyDamage <- aggregate(PROPDMGVAL ~ EVTYPE, data = subsetWeatherData, FUN = sum)
propdmg10 <- aggregatePropertyDamage[order(-aggregatePropertyDamage$PROPDMGVAL), ][1:10, ]

aggregatePropertyDamage <- aggregate(CROPDMGVAL ~ EVTYPE, data = subsetWeatherData, FUN = sum)
cropdmg10 <- aggregatePropertyDamage[order(-aggregatePropertyDamage$CROPDMGVAL), ][1:10, ]

par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(propdmg10$PROPDMGVAL/(10^9), las = 3, names.arg = propdmg10$EVTYPE, 
        main = "Property Damage due to Weather Events", ylab = "Cost in Billions USD")
barplot(cropdmg10$CROPDMGVAL/(10^9), las = 3, names.arg = cropdmg10$EVTYPE, 
    main = "Crop Damage due to Weather Events", ylab = "Cost in Billions USD")

## Results From the bar graphs above we can clearly conclude the following: 1. Tornadoes have the most devastating effect on public health in both the dimensions of: - Fatalaties - Injuries

  1. Floods have the largest negative impact on property
  2. Drought has the largest negative impact on crops