In this report, we aim to analyze the impact of different weather events on public health and economy based on the storm database collected from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) from 1950 - 2011. We will use the estimates of fatalities, injuries, property and crop damage to decide which types of event are most harmful to the population health and economy. From these data, we found that excessive heat and tornado are most harmful with respect to population health, while flood, drought, and hurricane/typhoon have the greatest economic consequences.
echo = TRUE # Always make code visible
options(scipen = 1) # Turn off scientific notations for numbers
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.8.0 (2020-02-14 07:10:20 UTC) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.23.0 successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following object is masked from 'package:R.methodsS3':
##
## throw
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, load, save
## R.utils v2.9.2 successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, nullfile, parse,
## warnings
library(ggplot2)
library(plyr)
require(gridExtra)
## Loading required package: gridExtra
First, we download the data file and unzip it.Then, we read the generated csv file.
setwd("/Users/akhilaprasad/Downloads/")
if(!exists("storm.data")) {
storm.data <- read.csv(bzfile("repdata_data_StormData.csv.bz2"),header = TRUE)
}
dim(storm.data)
## [1] 902297 37
head(storm.data, n = 2)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14 100 3 0 0 15 25.0
## 2 0 2 150 2 0 0 0 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
There are 902297 rows and 37 columns in total.
vars <- c( "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
mydata <- storm.data[, vars]
tail(mydata)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 902292 WINTER WEATHER 0 0 0 K 0 K
## 902293 HIGH WIND 0 0 0 K 0 K
## 902294 HIGH WIND 0 0 0 K 0 K
## 902295 HIGH WIND 0 0 0 K 0 K
## 902296 BLIZZARD 0 0 0 K 0 K
## 902297 HEAVY SNOW 0 0 0 K 0 K
Listing the first 10 event types that most appear in the data:
sort(table(mydata$EVTYPE), decreasing = TRUE)[1:10]
##
## HAIL TSTM WIND THUNDERSTORM WIND TORNADO
## 288661 219940 82563 60652
## FLASH FLOOD FLOOD THUNDERSTORM WINDS HIGH WIND
## 54277 25326 20843 20212
## LIGHTNING HEAVY SNOW
## 15754 15708
We will group events like TUNDERSTORM WIND, TUNDERSTORM WINDS, HIGH WIND, etc. by containing the keyword ‘WIND’ as one event WIND. And we will transform other types of events in a similar way. New variable EVENTS is the transform variable of EVTYPE that have 10 different types of events: HEAT, FLOOD, etc., and type OTHER for events in which name the keyword is not found.
# create a new variable EVENT to transform variable EVTYPE in groups
mydata$EVENT <- "OTHER"
# group by keyword in EVTYPE
mydata$EVENT[grep("HAIL", mydata$EVTYPE, ignore.case = TRUE)] <- "HAIL"
mydata$EVENT[grep("HEAT", mydata$EVTYPE, ignore.case = TRUE)] <- "HEAT"
mydata$EVENT[grep("FLOOD", mydata$EVTYPE, ignore.case = TRUE)] <- "FLOOD"
mydata$EVENT[grep("WIND", mydata$EVTYPE, ignore.case = TRUE)] <- "WIND"
mydata$EVENT[grep("STORM", mydata$EVTYPE, ignore.case = TRUE)] <- "STORM"
mydata$EVENT[grep("SNOW", mydata$EVTYPE, ignore.case = TRUE)] <- "SNOW"
mydata$EVENT[grep("TORNADO", mydata$EVTYPE, ignore.case = TRUE)] <- "TORNADO"
mydata$EVENT[grep("WINTER", mydata$EVTYPE, ignore.case = TRUE)] <- "WINTER"
mydata$EVENT[grep("RAIN", mydata$EVTYPE, ignore.case = TRUE)] <- "RAIN"
# listing the transformed event types
sort(table(mydata$EVENT), decreasing = TRUE)
##
## HAIL WIND STORM FLOOD TORNADO OTHER WINTER SNOW RAIN HEAT
## 289270 255362 113156 82686 60700 48970 19604 17660 12241 2648
New variable(s) is product of value of damage and dollar unit.
mydata$PROPDMGEXP <- as.character(mydata$PROPDMGEXP)
mydata$PROPDMGEXP[is.na(mydata$PROPDMGEXP)] <- 0 # NA's considered as dollars
mydata$PROPDMGEXP[!grepl("K|M|B", mydata$PROPDMGEXP, ignore.case = TRUE)] <- 0 # everything exept K,M,B is dollar
mydata$PROPDMGEXP[grep("K", mydata$PROPDMGEXP, ignore.case = TRUE)] <- "3"
mydata$PROPDMGEXP[grep("M", mydata$PROPDMGEXP, ignore.case = TRUE)] <- "6"
mydata$PROPDMGEXP[grep("B", mydata$PROPDMGEXP, ignore.case = TRUE)] <- "9"
mydata$PROPDMGEXP <- as.numeric(as.character(mydata$PROPDMGEXP))
mydata$property.damage <- mydata$PROPDMG * 10^mydata$PROPDMGEXP
mydata$CROPDMGEXP <- as.character(mydata$CROPDMGEXP)
mydata$CROPDMGEXP[is.na(mydata$CROPDMGEXP)] <- 0 # NA's considered as dollars
mydata$CROPDMGEXP[!grepl("K|M|B", mydata$CROPDMGEXP, ignore.case = TRUE)] <- 0 # everything exept K,M,B is dollar
mydata$CROPDMGEXP[grep("K", mydata$CROPDMGEXP, ignore.case = TRUE)] <- "3"
mydata$CROPDMGEXP[grep("M", mydata$CROPDMGEXP, ignore.case = TRUE)] <- "6"
mydata$CROPDMGEXP[grep("B", mydata$CROPDMGEXP, ignore.case = TRUE)] <- "9"
mydata$CROPDMGEXP <- as.numeric(as.character(mydata$CROPDMGEXP))
mydata$crop.damage <- mydata$CROPDMG * 10^mydata$CROPDMGEXP
Print of first 10 values for property damage (in dollars) that most appear in the data:
sort(table(mydata$property.damage), decreasing = TRUE)[1:10]
##
## 0 5000 10000 1000 2000 25000 50000 3000 20000 15000
## 663123 31731 21787 17544 17186 17104 13596 10364 9179 8617
Print of first 10 values for crop damage (in dollars) that most appear in the data:
sort(table(mydata$crop.damage), decreasing = TRUE)[1:10]
##
## 0 5000 10000 50000 100000 1000 2000 25000 20000 500000
## 880198 4097 2349 1984 1233 956 951 830 758 721
Table of public health problems by event type
# aggregate FATALITIES and INJURIES by type of EVENT
agg.fatalites.and.injuries <- ddply(mydata, .(EVENT), summarize, Total = sum(FATALITIES + INJURIES, na.rm = TRUE))
agg.fatalites.and.injuries$type <- "fatalities and injuries"
# aggregate FATALITIES by type of EVENT
agg.fatalities <- ddply(mydata, .(EVENT), summarize, Total = sum(FATALITIES, na.rm = TRUE))
agg.fatalities$type <- "fatalities"
# aggregate INJURIES by type of EVENT
agg.injuries <- ddply(mydata, .(EVENT), summarize, Total = sum(INJURIES, na.rm = TRUE))
agg.injuries$type <- "injuries"
# combine all
agg.health <- rbind(agg.fatalities, agg.injuries)
health.by.event <- join (agg.fatalities, agg.injuries, by="EVENT", type="inner")
health.by.event
## EVENT Total type Total type
## 1 FLOOD 1524 fatalities 8602 injuries
## 2 HAIL 15 fatalities 1371 injuries
## 3 HEAT 3138 fatalities 9224 injuries
## 4 OTHER 2626 fatalities 12224 injuries
## 5 RAIN 114 fatalities 305 injuries
## 6 SNOW 164 fatalities 1164 injuries
## 7 STORM 416 fatalities 5339 injuries
## 8 TORNADO 5661 fatalities 91407 injuries
## 9 WIND 1209 fatalities 9001 injuries
## 10 WINTER 278 fatalities 1891 injuries
# aggregate PropDamage and CropDamage by type of EVENT
agg.propdmg.and.cropdmg <- ddply(mydata, .(EVENT), summarize, Total = sum(property.damage + crop.damage, na.rm = TRUE))
agg.propdmg.and.cropdmg$type <- "property and crop damage"
# aggregate PropDamage by type of EVENT
agg.prop <- ddply(mydata, .(EVENT), summarize, Total = sum(property.damage, na.rm = TRUE))
agg.prop$type <- "property"
# aggregate INJURIES by type of EVENT
agg.crop <- ddply(mydata, .(EVENT), summarize, Total = sum(crop.damage, na.rm = TRUE))
agg.crop$type <- "crop"
# combine all
agg.economic <- rbind(agg.prop, agg.crop)
economic.by.event <- join (agg.prop, agg.crop, by="EVENT", type="inner")
economic.by.event
## EVENT Total type Total type
## 1 FLOOD 167502193929 property 12266906100 crop
## 2 HAIL 15733043048 property 3046837473 crop
## 3 HEAT 20325750 property 904469280 crop
## 4 OTHER 97246712337 property 23588880870 crop
## 5 RAIN 3270230192 property 919315800 crop
## 6 SNOW 1024169752 property 134683100 crop
## 7 STORM 66304415393 property 6374474888 crop
## 8 TORNADO 58593098029 property 417461520 crop
## 9 WIND 10847166618 property 1403719150 crop
## 10 WINTER 6777295251 property 47444000 crop
# transform EVENT to factor variable for health variables
agg.health$EVENT <- as.factor(agg.health$EVENT)
# plot FATALITIES and INJURIES by EVENT
health.plot <- ggplot(agg.health, aes(x = EVENT, y = Total, fill = type)) + geom_bar(stat = "identity") +
coord_flip() +
xlab("Event Type") +
ylab("Total number of health impact") +
ggtitle("Weather event types impact on public health") +
theme(plot.title = element_text(hjust = 0.5))
print(health.plot)
The most harmful weather event for health (in number of total fatalites and injuries) is, by far, a tornado.
# transform EVENT to factor variable for health variables
agg.health$EVENT <- as.factor(agg.health$EVENT)
# plot FATALITIES and INJURIES by EVENT
health.plot <- ggplot(agg.health, aes(x = EVENT, y = Total, fill = type)) + geom_bar(stat = "identity") +
coord_flip() +
xlab("Event Type") +
ylab("Total number of health impact") +
ggtitle("Weather event types impact on public health") +
theme(plot.title = element_text(hjust = 0.5))
print(health.plot)
The most devastating weather event with the greatest economic cosequences (to property and crops) is a flood.
From these data, we found that excessive heat and tornado are most harmful with respect to population health, while flood, drought, and hurricane/typhoon have the greatest economic consequences.