Synopsis: The following report is about the impact of weather events on two relevant aspects: population health and property/crop damage. The study is based on the data given by National Oceanic & Atmospheric Administration (NOAA) and National Weather Service (NWS).The weather events described in the dataset were classified (grouped) in seven representative categories: Convection, Extreme Temperature,Flood, Marine,Winter, Tropical Cyclone, and Others. The report includes all scripts needed to reproduce the study. The classification process was carried out according to a previous study called: 2009 Annual Summaries by NOAA.
#require(R.utils) #download.file(“https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2”,“repdata_StormData.csv.bz2”, method = “curl”) #bunzip2(“repdata_StormData.csv.bz2”)
# Read all columns df <- read.table('repdata_data_StormData.csv',
# sep=',',header=TRUE,stringsAsFactors=FALSE)
# Read some columns
classes = c(rep("NULL", 7), "character", rep("NULL", 14), rep("numeric", 3),
"character", "numeric", "character", rep("NULL", 9))
df <- read.table("repdata_data_StormData.csv", sep = ",", header = TRUE, colClasses = classes)
##Data Processing
library(stringr)
# New Column for Categories
df$WEvent <- df$EVTYPE
# Create 7 categories: Convection, Marine, Extreme Temperature, Flood,
# Tropical Cyclone, and Others.
# Events for Convection
a = c("LIGHTNING", "THUNDER", "TORNADO", "HAIL", "TSTM")
for (i in 1:length(a)) {
df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "Convection", df$WEvent,
ignore.case = TRUE)
}
# Events for Marine
a = c("COASTAL", "MARINE", "TSUNAMI", "RIP", "BEACH", "TIDE", "SURF", "SEA",
"SWELL", "WAVE")
for (i in 1:length(a)) {
df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "Marine", df$WEvent,
ignore.case = TRUE)
}
# Events Extreme Temperature
a = c("HEAT", "COLD", "COOL", "WARM", "HOT", "HIGH TEMP", "LOW TEMP", "HYPOTHERMIA",
"HYPERTHERMIA")
for (i in 1:length(a)) {
df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "Extreme Temperature",
df$WEvent, ignore.case = TRUE)
}
# Events for Flood
df$WEvent <- gsub("^.*FLOOD.*$", "Flood", df$WEvent, ignore.case = TRUE)
# Events for Winter
a = c("WINTER", "ICE", "AVALANCHE", "SNOW", "BLIZZARD", "FREEZE", "FREEZING",
"FROST", "WINTRY", "STORM", "ICY", "SLEET")
for (i in 1:length(a)) {
df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "Winter", df$WEvent,
ignore.case = TRUE)
}
# Events for Tropical Cyclone
a = c("TROPICAL", "TYPHOON", "HURRICANE")
for (i in 1:length(a)) {
df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "Tropical Cyclone",
df$WEvent, ignore.case = TRUE)
}
# Events for Others
df$WEvent <- gsub("Other", "NA", df$WEvent)
a = c("VOLCANIC", "DUST", "RAIN", "WATERSPOUT", "FIRE", "FOG", "WIND", "PRECIPITATION",
"SHOWER", "WET", "BURST", "SMOKE", "DRY", "DRIEST", "DROWNING", "DROUGHT",
"DAM", "FUNNEL", "CLOUD", "MUD", "LANDSLIDE", "LANSLUMP", "LANDSPOUT", "TURBULENCE",
"ROCK SLIDE", "SEICHE", "NORTHERN LIGHTS")
for (i in 1:length(a)) {
df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "Others", df$WEvent,
ignore.case = TRUE)
}
# Errors
df$WEvent <- gsub("AVALANCE", "Winter", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("LIGHTING", "Convection", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("WATER SPOUT", "Others", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("WAYTERSPOUT", "Others", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("PRECIP", "Others", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("WND", "Others", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("LIGNTNING", "Convection", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("FLASH FLOOODING", "Flood", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("TORNDAO", "Convection", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("Sml Stream Fld", "Flood", df$WEvent, ignore.case = TRUE)
# No Classified
a = c("SUMMARY", "APACHE", "EXCESSIVE", "HEAVY MIX", "GUSTNADO", "HIGH WATER",
"GLAZE", "VOG", "URBAN", "SMALL STREAM", "RED", "RECORD", "RAPIDLY", "MONTHLY",
"MILD", "AND", "HYPNAMIA", "HypNAmia/Exposure", "NA PATTERN", "NA/NA", "NA/SMALL",
"NA/SML", "SOUTHEAST", "NA SMALL", "NA NA SMALL", "NA TEMPERATURE", "REMNANTS OF FLOYD",
"NA LOW", "NA HIGH", "No Severe Weather", "NA RISING WATER", "NA FLAG CRITERIA",
"NA/EXPOSURE", "MIXED Others", "HIGH", "HEAVY OthersATATION", "Temperature NA",
"NONE", "NA NA NA", "NA NA", "NA")
for (i in 1:length(a)) {
df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "NA", df$WEvent, ignore.case = TRUE)
}
df$WEvent[df$WEvent == "?"] <- "NA"
df$WEvent <- sub("OTHER", "NA", df$WEvent)
# Discard all 'NA' values
dfp <- subset(df, df$WEvent != "NA")
##Results
#1) Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
# Aggregate for Weather Event
dfp1 <- setNames(aggregate(cbind(FATALITIES, INJURIES, FATALITIES + INJURIES) ~
WEvent, data = dfp, sum), c("WEvent", "FATALITIES", "INJURIES", "TOTAL"))
# Plot
par(bg = "white", mfrow = c(2, 2))
WE = c("Convection", "XTemp", "Flood", "Marine", "TCyclone", "Winter", "Others")
with(dfp1, barplot(FATALITIES, main = "Fatalities vs Weather Event", col = 2:8,
ylab = "Fatalities", xlab = "Type of Weather Event", names = WE, cex.axis = 1))
grid()
with(dfp1, barplot(INJURIES, main = "Injuries vs Weather Event", col = 2:8,
ylab = "Injuries", xlab = "Type of Weather Event", names = WE, cex.axis = 1))
grid()
with(dfp1, barplot(TOTAL, main = "Fatalities + Injuries vs Weather Event", col = 2:8,
ylab = "Fatalities + Injuries", xlab = "Type of Weather Event", names = WE,
cex.axis = 1))
grid()
# Table
head(dfp1)
## WEvent FATALITIES INJURIES TOTAL
## 1 Convection 7224 107554 114778
## 2 Extreme Temperature 3419 9152 12571
## 3 Flood 1519 8597 10116
## 4 Marine 1066 1396 2462
## 5 Others 765 4979 5744
## 6 Tropical Cyclone 135 1331 1466
As we can see in the figure, the events related to convection processes are the most harmful to population health, both in terms of fatalities as injuries.
#2) Across the United States, which types of events have the greatest economic consequences?
# Select nonzero values for PROPDMG\v\v. For PROPDMGEXP values \v\vbetween 0-9 and
# B, K, m, M.
# Subset for Property Damages
dfprop = subset(dfp, PROPDMG != 0 & (PROPDMGEXP %in% seq("0":"9") | PROPDMGEXP %in%
c("B", "K", "m", "M")), select = c(WEvent, PROPDMG, PROPDMGEXP))
# Replace B=1e9, K=1e3, m=M=1e6
dfprop$PROPDMGEXP[dfprop$PROPDMGEXP == "B"] <- 9
dfprop$PROPDMGEXP[dfprop$PROPDMGEXP == "K"] <- 3
dfprop$PROPDMGEXP[dfprop$PROPDMGEXP == "m"] <- 6
dfprop$PROPDMGEXP[dfprop$PROPDMGEXP == "M"] <- 6
# Subset for Crop Damages
dfcrop = subset(dfp, CROPDMG != 0 & (CROPDMGEXP %in% seq("0":"9") | CROPDMGEXP %in%
c("B", "K", "m", "M")), select = c(WEvent, CROPDMG, CROPDMGEXP))
# Replace B=1e9, K=1e3, m=M=1e6
dfcrop$CROPDMGEXP[dfcrop$CROPDMGEXP == "B"] <- 9
dfcrop$CROPDMGEXP[dfcrop$CROPDMGEXP == "K"] <- 3
dfcrop$CROPDMGEXP[dfcrop$CROPDMGEXP == "m"] <- 6
dfcrop$CROPDMGEXP[dfcrop$CROPDMGEXP == "M"] <- 6
# New column for values
dfprop$PDMG <- (dfprop$PROPDMG) * 10^(as.numeric(dfprop$PROPDMGEXP))
dfcrop$CDMG <- (dfcrop$CROPDMG) * 10^(as.numeric(dfcrop$CROPDMGEXP))
# Property and Crop Damages aggregated
dfpropt <- aggregate(PDMG ~ WEvent, data = dfprop, sum)
dfcropt <- aggregate(CDMG ~ WEvent, data = dfcrop, sum)
# General Table with Property and Crop Damages aggregated.
M <- merge(dfpropt, dfcropt, by = "WEvent")
M$TOTAL <- dfpropt$PDMG + dfcropt$CDMG
M$PDMG <- M$PDMG/1e+06
M$CDMG <- M$CDMG/1e+06
M$PDMG <- M$TOTAL/1e+06
# Plot
par(bg = "white", mfrow = c(2, 2))
WE = c("Convection", "XTemp", "Flood", "Marine", "TCyclone", "Winter", "Others")
with(M, barplot(PDMG, main = "Property Damages vs Weather Event", col = 2:8,
ylab = "Property Damages (millions $)", xlab = "Type of Weather Event",
names = WE))
grid()
with(M, barplot(CDMG, main = "Crop Damages vs Weather Event", col = 2:8, ylab = "Crop Damages (millions $)",
xlab = "Type of Weather Event", names = WE))
grid()
with(M, barplot(TOTAL, main = "Economic Consequences vs Weather Event", col = 2:8,
ylab = "Total (millions $)", xlab = "Type of Weather Event", names = WE))
grid()
# Table
colnames(M) <- c("WEvent", "PDMG (millions $)", "CDMG (millions $)", "TOTAL (millions $)")
head(M)
## WEvent PDMG (millions $) CDMG (millions $)
## 1 Convection 91454 4747.73
## 2 Extreme Temperature 2547 2290.49
## 3 Flood 180159 12380.01
## 4 Marine 5407 48.18
## 5 Others 35302 16099.85
## 6 Tropical Cyclone 90874 5516.12
## TOTAL (millions $)
## 1 9.145e+10
## 2 2.547e+09
## 3 1.802e+11
## 4 5.407e+09
## 5 3.530e+10
## 6 9.087e+10
As we can see in the figure, the events related to flood are the most harmful in economic terms.