Synopsis
Severe wheater events can cause public health issues and property damage. National Oceanic and Atmoshperic Administration’s (NOAA) storm database is a good resourse to study and plan for these events. This report contains analysis of most harmfull events to human health and mortality and the events with the most economic impact (property damage and crop damage). Other analysis could be done but they were not the purpose of this class.
Data processing
1.Downloading U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database the from here. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
library(readr)
URL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(URL, destfile = "StormData")
StormData <- read.csv("StormData", sep = ",", header = TRUE)
head(StormData)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
StormDataReduced only represents the columns that are used in this analysis.
StormDataReduced <- subset(StormData, EVTYPE !="?", select = c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG","PROPDMGEXP", "CROPDMG", "CROPDMGEXP"))
Exluding NAs
StormDataReducedNoNA <- na.omit(StormDataReduced)
dim(StormDataReducedNoNA)
## [1] 902296 7
sum(is.na(StormDataReducedNoNA))
## [1] 0
How many different events types are in the database?
length(unique(StormDataReducedNoNA$EVTYPE))
## [1] 984
Reducing Event Types in the DataBase
There are 984 event types (EVTYPE) in the database, I am converting them to all CAPs to decrease the types:
StormDataReducedNoNA$EVTYPE <- toupper(StormDataReducedNoNA$EVTYPE)
length(unique(StormDataReducedNoNA$EVTYPE))
## [1] 897
Still a lot of events(897), lets try to consolidate them:
# AVALANCHE
StormDataReducedNoNA$EVTYPE <- gsub('.*AVALANCE.*', 'AVALANCHE', StormDataReducedNoNA$EVTYPE)
# BLIZZARD
StormDataReducedNoNA$EVTYPE <- gsub('.*BLIZZARD.*', 'BLIZZARD', StormDataReducedNoNA$EVTYPE)
# CLOUD
StormDataReducedNoNA$EVTYPE <- gsub('.*CLOUD.*', 'CLOUD', StormDataReducedNoNA$EVTYPE)
# COLD
StormDataReducedNoNA$EVTYPE <- gsub('.*COLD.*', 'COLD', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*FREEZ.*', 'COLD', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*FROST.*', 'COLD', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*ICE.*', 'COLD', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*LOW TEMPERATURE RECORD.*', 'COLD', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*LO.*TEMP.*', 'COLD', StormDataReducedNoNA$EVTYPE)
# DRY
StormDataReducedNoNA$EVTYPE <- gsub('.*DRY.*', 'DRY', StormDataReducedNoNA$EVTYPE)
# DUST
StormDataReducedNoNA$EVTYPE <- gsub('.*DUST.*', 'DUST', StormDataReducedNoNA$EVTYPE)
# FIRE
StormDataReducedNoNA$EVTYPE <- gsub('.*FIRE.*', 'FIRE', StormDataReducedNoNA$EVTYPE)
# FLOOD
StormDataReducedNoNA$EVTYPE <- gsub('.*FLOOD.*', 'FLOOD', StormDataReducedNoNA$EVTYPE)
# FOG
StormDataReducedNoNA$EVTYPE <- gsub('.*FOG.*', 'FOG', StormDataReducedNoNA$EVTYPE)
#HAIL
StormDataReducedNoNA$EVTYPE <- gsub('.*HAIL.*', 'HAIL', StormDataReducedNoNA$EVTYPE)
# HEAT
StormDataReducedNoNA$EVTYPE <- gsub('.*HEAT.*', 'HEAT', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*WARM.*', 'HEAT', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*HIGH.*TEMP.*', 'HEAT', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*RECORD HIGH TEMPERATURES.*', 'HEAT', StormDataReducedNoNA$EVTYPE)
# HYPOTHERMIA/EXPOSURE
StormDataReducedNoNA$EVTYPE <- gsub('.*HYPOTHERMIA.*', 'HYPOTHERMIA/EXPOSURE', StormDataReducedNoNA$EVTYPE)
# LANDSLIDE
StormDataReducedNoNA$EVTYPE <- gsub('.*LANDSLIDE.*', 'LANDSLIDE', StormDataReducedNoNA$EVTYPE)
# LIGHTNING
StormDataReducedNoNA$EVTYPE <- gsub('^LIGHTNING.*', 'LIGHTNING', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('^LIGNTNING.*', 'LIGHTNING', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('^LIGHTING.*', 'LIGHTNING', StormDataReducedNoNA$EVTYPE)
# MICROBURST
StormDataReducedNoNA$EVTYPE <- gsub('.*MICROBURST.*', 'MICROBURST', StormDataReducedNoNA$EVTYPE)
# MUDSLIDE
StormDataReducedNoNA$EVTYPE <- gsub('.*MUDSLIDE.*', 'MUDSLIDE', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*MUD SLIDE.*', 'MUDSLIDE', StormDataReducedNoNA$EVTYPE)
# RAIN
StormDataReducedNoNA$EVTYPE <- gsub('.*RAIN.*', 'RAIN', StormDataReducedNoNA$EVTYPE)
# RIP CURRENT
StormDataReducedNoNA$EVTYPE <- gsub('.*RIP CURRENT.*', 'RIP CURRENT', StormDataReducedNoNA$EVTYPE)
# STORM
StormDataReducedNoNA$EVTYPE <- gsub('.*STORM.*', 'STORM', StormDataReducedNoNA$EVTYPE)
# SUMMARY
StormDataReducedNoNA$EVTYPE <- gsub('.*SUMMARY.*', 'SUMMARY', StormDataReducedNoNA$EVTYPE)
# TORNADO
StormDataReducedNoNA$EVTYPE <- gsub('.*TORNADO.*', 'TORNADO', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*TORNDAO.*', 'TORNADO', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*LANDSPOUT.*', 'TORNADO', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*WATERSPOUT.*', 'TORNADO', StormDataReducedNoNA$EVTYPE)
# SURF
StormDataReducedNoNA$EVTYPE <- gsub('.*SURF.*', 'SURF', StormDataReducedNoNA$EVTYPE)
# VOLCANIC
StormDataReducedNoNA$EVTYPE <- gsub('.*VOLCANIC.*', 'VOLCANIC', StormDataReducedNoNA$EVTYPE)
# WET
StormDataReducedNoNA$EVTYPE <- gsub('.*WET.*', 'WET', StormDataReducedNoNA$EVTYPE)
# WIND
StormDataReducedNoNA$EVTYPE <- gsub('.*WIND.*', 'WIND', StormDataReducedNoNA$EVTYPE)
# WINTER
StormDataReducedNoNA$EVTYPE <- gsub('.*WINTER.*', 'WINTER', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*WINTRY.*', 'WINTER', StormDataReducedNoNA$EVTYPE)
StormDataReducedNoNA$EVTYPE <- gsub('.*SNOW.*', 'WINTER', StormDataReducedNoNA$EVTYPE)
How many unique events do we have now:
length(unique(StormDataReducedNoNA$EVTYPE))
## [1] 136
Calculating monetary property and crop damage from PROPDMG, PROPDMGEXP, CROPDMG and CROPDMGEXP
Information about the property damage consists of two parts: PROPDMG is the significant number and PROPDMGEXP is the multiplier. The same is true of the CROPDMG and CROPDMGEXP. PROPDMGEXP and CROPDMGEXP contain ‘K’ for Thousand, ‘M’ for Million and ‘B’ for Billion. We’ll calculate multiplier for both property damage and crop damage and will create two new columns for a total damage: pCost and cCost
Multiplier <- function(exp) {
exp <- toupper(exp);
if (exp == "") return (10^0);
if (exp == "-") return (10^0);
if (exp == "?") return (10^0);
if (exp == "+") return (10^0);
if (exp == "0") return (10^0);
if (exp == "1") return (10^1);
if (exp == "2") return (10^2);
if (exp == "3") return (10^3);
if (exp == "4") return (10^4);
if (exp == "5") return (10^5);
if (exp == "6") return (10^6);
if (exp == "7") return (10^7);
if (exp == "8") return (10^8);
if (exp == "9") return (10^9);
if (exp == "H") return (10^2);
if (exp == "K") return (10^3);
if (exp == "M") return (10^6);
if (exp == "B") return (10^9);
return (NA);
}
Final step of calculating costs for property and crop damage
StormDataReducedNoNA$pCost <- with(StormDataReducedNoNA, as.numeric(PROPDMG) * sapply(PROPDMGEXP, Multiplier))/10^9
StormDataReducedNoNA$cCost <- with(StormDataReducedNoNA, as.numeric(CROPDMG) * sapply(CROPDMGEXP, Multiplier))/10^9
Creating a summarized dataset of damage impact costs
CostImpact <- aggregate(x = list(DAMAGE_IMPACT = StormDataReducedNoNA$pCost + StormDataReducedNoNA$cCost), by = list(EVENT_TYPE = StormDataReducedNoNA$EVTYPE),
FUN = sum, na.rm = TRUE)
CostImpact <- CostImpact[order(CostImpact$DAMAGE_IMPACT, decreasing = TRUE),]
head(CostImpact, 10)
## EVENT_TYPE DAMAGE_IMPACT
## 30 FLOOD 180.58156
## 64 HURRICANE/TYPHOON 71.91371
## 108 STORM 70.44994
## 112 TORNADO 57.42785
## 37 HAIL 20.73720
## 22 DROUGHT 15.01867
## 56 HURRICANE 14.61023
## 15 COLD 12.69944
## 134 WIND 12.00554
## 28 FIRE 8.90491
Results
1. Across the United States, which types of events (as indicated in the EVTYPE) are most harmful with respect to population health?
HealthImpact <- aggregate(x = list(HEALTH_IMPACT = StormDataReducedNoNA$FATALITIES + StormDataReducedNoNA$INJURIES),by = list(EVENT_TYPE = StormDataReducedNoNA$EVTYPE), FUN = sum,na.rm = TRUE)
HealthImpact <- HealthImpact[order(HealthImpact$HEALTH_IMPACT, decreasing = TRUE),]
head(HealthImpact, 10)
## EVENT_TYPE HEALTH_IMPACT
## 112 TORNADO 97075
## 38 HEAT 12392
## 30 FLOOD 10127
## 134 WIND 9893
## 70 LIGHTNING 6049
## 108 STORM 4780
## 15 COLD 3100
## 135 WINTER 1924
## 28 FIRE 1698
## 37 HAIL 1512
Presenting results
library(ggplot2)
HealthImpactChart <- ggplot(head(HealthImpact, 10), aes(x=reorder(EVENT_TYPE,-HEALTH_IMPACT), y=HEALTH_IMPACT, fill = EVENT_TYPE))+
geom_bar(stat = "identity",
alpha = 0.5)+
theme(axis.text.x = element_blank())+
labs(title = "Top 10 Most Harmfull Weather Events", x = "Event Type", y = "Health Impact")
HealthImpactChart
Tornados are the most harmfull event in respect to human health
2. Across the United States, which types of events have the greatest economic consequences?
Creating a graph for the economic impact of natural disasters
CostImpactChart <- ggplot(head(CostImpact, 10), aes(x=reorder(EVENT_TYPE,-DAMAGE_IMPACT),
y = DAMAGE_IMPACT, fill = EVENT_TYPE))+
geom_bar(stat = "identity")+
theme(axis.text.x = element_blank())+
labs(title = "Top Weather Events With Worse Economic Consequences", x = "Event Type", y = "Economic Impact")
CostImpactChart
Floods are the most economically significant events