The objective of this brief analysis is to determine how public health and the economy are affected by certain severe or catastrophic weahter events such as floods, hurricanes or tornadoes. The data source is from NOAA and has been collected from 1950 to 2011. With some fairly rudimentary transformations the data from NOAA was used to make plots that clearly establish which types of events have the greatest impact on public health and the ecomomy.
echo = TRUE # Always make code visible
options(scipen = 1) # Turn off scientific notations for numbers
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.20.0 (2016-02-17) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
## R.utils v2.3.0 (2016-04-13) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
library(ggplot2)
library(plyr)
require(gridExtra)
## Loading required package: gridExtra
The data is processed in the following steps:
First, we download the data file and unzip it.
download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "weatherData.csv.bz2")
bunzip2("weatherData.csv.bz2", overwrite=T, remove=F)
Then, we read the generated csv file. If the data already exists in the working environment, we do not need to load it again. Otherwise, we read the csv file.
if (!"weatherData" %in% ls()) {
weatherData <- read.csv("weatherData.csv", sep = ",")
}
dim(weatherData)
## [1] 902297 37
head(weatherData, n = 2)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14 100 3 0 0
## 2 NA 0 2 150 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
Subset the weather data for the relevant events
subsetWeatherData <- weatherData[, c(8,23,24,25,26,27,28)]
Create conversions (since table does not have numbers but letters indicating factors)
subsetWeatherData$PROPEXP[subsetWeatherData$PROPDMGEXP == ""] <- 1
subsetWeatherData$PROPEXP[subsetWeatherData$PROPDMGEXP == "K"] <- 1000
subsetWeatherData$PROPEXP[subsetWeatherData$PROPDMGEXP == "M"] <- 1000000
subsetWeatherData$PROPEXP[subsetWeatherData$PROPDMGEXP == "B"] <- 1000000000
subsetWeatherData$PROPDMGVAL <- subsetWeatherData$PROPDMG * subsetWeatherData$PROPEXP
subsetWeatherData$CROPEXP[subsetWeatherData$CROPDMGEXP == ""] <- 1
subsetWeatherData$CROPEXP[subsetWeatherData$CROPDMGEXP == "K"] <- 1000
subsetWeatherData$CROPEXP[subsetWeatherData$CROPDMGEXP == "M"] <- 1000000
subsetWeatherData$CROPEXP[subsetWeatherData$CROPDMGEXP == "B"] <- 1000000000
subsetWeatherData$CROPDMGVAL <- subsetWeatherData$CROPDMG * subsetWeatherData$CROPEXP
aggregateEventType <- aggregate(FATALITIES ~ EVTYPE, data = subsetWeatherData, FUN = sum)
fatalities <- aggregateEventType[order(-aggregateEventType$FATALITIES),][1:10,]
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(fatalities$FATALITIES, las = 3, names.arg = fatalities$EVTYPE,
main = "Fatalities due to Weather Events",
ylab = "Fatalities")
aggregateEventType2 <- aggregate(INJURIES ~ EVTYPE, data = subsetWeatherData, FUN = sum)
injuries <- aggregateEventType2[order(-aggregateEventType2$INJURIES),][1:10,]
barplot(injuries$INJURIES, las = 3, names.arg = injuries$EVTYPE,
main = "Injuries due to Weather Events",
ylab = "Injuries")
aggregatePropertyDamage <- aggregate(PROPDMGVAL ~ EVTYPE, data = subsetWeatherData, FUN = sum)
propdmg10 <- aggregatePropertyDamage[order(-aggregatePropertyDamage$PROPDMGVAL), ][1:10, ]
aggregatePropertyDamage <- aggregate(CROPDMGVAL ~ EVTYPE, data = subsetWeatherData, FUN = sum)
cropdmg10 <- aggregatePropertyDamage[order(-aggregatePropertyDamage$CROPDMGVAL), ][1:10, ]
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(propdmg10$PROPDMGVAL/(10^9), las = 3, names.arg = propdmg10$EVTYPE,
main = "Property Damage due to Weather Events", ylab = "Cost in Billions USD")
barplot(cropdmg10$CROPDMGVAL/(10^9), las = 3, names.arg = cropdmg10$EVTYPE,
main = "Crop Damage due to Weather Events", ylab = "Cost in Billions USD")
## Results From the bar graphs above we can clearly conclude the following: 1. Tornadoes have the most devastating effect on public health in both the dimensions of: - Fatalaties - Injuries