Analysis of the Weather Events most harmful for population health and their economic consequences.

Synopsis: The following report is about the impact of weather events on two relevant aspects: population health and property/crop damage. The study is based on the data given by National Oceanic & Atmospheric Administration (NOAA) and National Weather Service (NWS).The weather events described in the dataset were classified (grouped) in seven representative categories: Convection, Extreme Temperature,Flood, Marine,Winter, Tropical Cyclone, and Others. The report includes all scripts needed to reproduce the study. The classification process was carried out according to a previous study called: 2009 Annual Summaries by NOAA.

Loading dataset from bz2 file

#require(R.utils) #download.file(“https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2”,“repdata_StormData.csv.bz2”, method = “curl”) #bunzip2(“repdata_StormData.csv.bz2”)

# Read all columns df <- read.table('repdata_data_StormData.csv',
# sep=',',header=TRUE,stringsAsFactors=FALSE)

# Read some columns
classes = c(rep("NULL", 7), "character", rep("NULL", 14), rep("numeric", 3), 
    "character", "numeric", "character", rep("NULL", 9))
df <- read.table("repdata_data_StormData.csv", sep = ",", header = TRUE, colClasses = classes)

##Data Processing

library(stringr)
# New Column for Categories
df$WEvent <- df$EVTYPE

# Create 7 categories: Convection, Marine, Extreme Temperature, Flood,
# Tropical Cyclone, and Others.

# Events for Convection
a = c("LIGHTNING", "THUNDER", "TORNADO", "HAIL", "TSTM")
for (i in 1:length(a)) {
    df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "Convection", df$WEvent, 
        ignore.case = TRUE)
}

# Events for Marine
a = c("COASTAL", "MARINE", "TSUNAMI", "RIP", "BEACH", "TIDE", "SURF", "SEA", 
    "SWELL", "WAVE")
for (i in 1:length(a)) {
    df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "Marine", df$WEvent, 
        ignore.case = TRUE)
}

# Events Extreme Temperature
a = c("HEAT", "COLD", "COOL", "WARM", "HOT", "HIGH TEMP", "LOW TEMP", "HYPOTHERMIA", 
    "HYPERTHERMIA")
for (i in 1:length(a)) {
    df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "Extreme Temperature", 
        df$WEvent, ignore.case = TRUE)
}

# Events for Flood
df$WEvent <- gsub("^.*FLOOD.*$", "Flood", df$WEvent, ignore.case = TRUE)

# Events for Winter
a = c("WINTER", "ICE", "AVALANCHE", "SNOW", "BLIZZARD", "FREEZE", "FREEZING", 
    "FROST", "WINTRY", "STORM", "ICY", "SLEET")
for (i in 1:length(a)) {
    df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "Winter", df$WEvent, 
        ignore.case = TRUE)
}

# Events for Tropical Cyclone
a = c("TROPICAL", "TYPHOON", "HURRICANE")
for (i in 1:length(a)) {
    df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "Tropical Cyclone", 
        df$WEvent, ignore.case = TRUE)
}

# Events for Others
df$WEvent <- gsub("Other", "NA", df$WEvent)
a = c("VOLCANIC", "DUST", "RAIN", "WATERSPOUT", "FIRE", "FOG", "WIND", "PRECIPITATION", 
    "SHOWER", "WET", "BURST", "SMOKE", "DRY", "DRIEST", "DROWNING", "DROUGHT", 
    "DAM", "FUNNEL", "CLOUD", "MUD", "LANDSLIDE", "LANSLUMP", "LANDSPOUT", "TURBULENCE", 
    "ROCK SLIDE", "SEICHE", "NORTHERN LIGHTS")
for (i in 1:length(a)) {
    df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "Others", df$WEvent, 
        ignore.case = TRUE)
}

# Errors
df$WEvent <- gsub("AVALANCE", "Winter", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("LIGHTING", "Convection", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("WATER SPOUT", "Others", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("WAYTERSPOUT", "Others", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("PRECIP", "Others", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("WND", "Others", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("LIGNTNING", "Convection", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("FLASH FLOOODING", "Flood", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("TORNDAO", "Convection", df$WEvent, ignore.case = TRUE)
df$WEvent <- gsub("Sml Stream Fld", "Flood", df$WEvent, ignore.case = TRUE)

# No Classified
a = c("SUMMARY", "APACHE", "EXCESSIVE", "HEAVY MIX", "GUSTNADO", "HIGH WATER", 
    "GLAZE", "VOG", "URBAN", "SMALL STREAM", "RED", "RECORD", "RAPIDLY", "MONTHLY", 
    "MILD", "AND", "HYPNAMIA", "HypNAmia/Exposure", "NA PATTERN", "NA/NA", "NA/SMALL", 
    "NA/SML", "SOUTHEAST", "NA SMALL", "NA NA SMALL", "NA TEMPERATURE", "REMNANTS OF FLOYD", 
    "NA LOW", "NA HIGH", "No Severe Weather", "NA RISING WATER", "NA FLAG CRITERIA", 
    "NA/EXPOSURE", "MIXED Others", "HIGH", "HEAVY OthersATATION", "Temperature NA", 
    "NONE", "NA NA NA", "NA NA", "NA")
for (i in 1:length(a)) {
    df$WEvent <- gsub(str_replace("^.*X.*$", "X", a[i]), "NA", df$WEvent, ignore.case = TRUE)
}
df$WEvent[df$WEvent == "?"] <- "NA"
df$WEvent <- sub("OTHER", "NA", df$WEvent)

# Discard all 'NA' values
dfp <- subset(df, df$WEvent != "NA")

##Results

#1) Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

# Aggregate for Weather Event
dfp1 <- setNames(aggregate(cbind(FATALITIES, INJURIES, FATALITIES + INJURIES) ~ 
    WEvent, data = dfp, sum), c("WEvent", "FATALITIES", "INJURIES", "TOTAL"))
# Plot
par(bg = "white", mfrow = c(2, 2))
WE = c("Convection", "XTemp", "Flood", "Marine", "TCyclone", "Winter", "Others")
with(dfp1, barplot(FATALITIES, main = "Fatalities vs Weather Event", col = 2:8, 
    ylab = "Fatalities", xlab = "Type of Weather Event", names = WE, cex.axis = 1))
grid()
with(dfp1, barplot(INJURIES, main = "Injuries vs Weather Event", col = 2:8, 
    ylab = "Injuries", xlab = "Type of Weather Event", names = WE, cex.axis = 1))
grid()
with(dfp1, barplot(TOTAL, main = "Fatalities + Injuries vs Weather Event", col = 2:8, 
    ylab = "Fatalities + Injuries", xlab = "Type of Weather Event", names = WE, 
    cex.axis = 1))
grid()
# Table
head(dfp1)
##                WEvent FATALITIES INJURIES  TOTAL
## 1          Convection       7224   107554 114778
## 2 Extreme Temperature       3419     9152  12571
## 3               Flood       1519     8597  10116
## 4              Marine       1066     1396   2462
## 5              Others        765     4979   5744
## 6    Tropical Cyclone        135     1331   1466

plot of chunk unnamed-chunk-3

As we can see in the figure, the events related to convection processes are the most harmful to population health, both in terms of fatalities as injuries.

#2) Across the United States, which types of events have the greatest economic consequences?

# Select nonzero values for PROPDMG\v\v. For PROPDMGEXP values \v\vbetween 0-9 and
# B, K, m, M.

# Subset for Property Damages
dfprop = subset(dfp, PROPDMG != 0 & (PROPDMGEXP %in% seq("0":"9") | PROPDMGEXP %in% 
    c("B", "K", "m", "M")), select = c(WEvent, PROPDMG, PROPDMGEXP))
# Replace B=1e9, K=1e3, m=M=1e6
dfprop$PROPDMGEXP[dfprop$PROPDMGEXP == "B"] <- 9
dfprop$PROPDMGEXP[dfprop$PROPDMGEXP == "K"] <- 3
dfprop$PROPDMGEXP[dfprop$PROPDMGEXP == "m"] <- 6
dfprop$PROPDMGEXP[dfprop$PROPDMGEXP == "M"] <- 6
# Subset for Crop Damages
dfcrop = subset(dfp, CROPDMG != 0 & (CROPDMGEXP %in% seq("0":"9") | CROPDMGEXP %in% 
    c("B", "K", "m", "M")), select = c(WEvent, CROPDMG, CROPDMGEXP))
# Replace B=1e9, K=1e3, m=M=1e6
dfcrop$CROPDMGEXP[dfcrop$CROPDMGEXP == "B"] <- 9
dfcrop$CROPDMGEXP[dfcrop$CROPDMGEXP == "K"] <- 3
dfcrop$CROPDMGEXP[dfcrop$CROPDMGEXP == "m"] <- 6
dfcrop$CROPDMGEXP[dfcrop$CROPDMGEXP == "M"] <- 6

# New column for values
dfprop$PDMG <- (dfprop$PROPDMG) * 10^(as.numeric(dfprop$PROPDMGEXP))
dfcrop$CDMG <- (dfcrop$CROPDMG) * 10^(as.numeric(dfcrop$CROPDMGEXP))

# Property and Crop Damages aggregated
dfpropt <- aggregate(PDMG ~ WEvent, data = dfprop, sum)
dfcropt <- aggregate(CDMG ~ WEvent, data = dfcrop, sum)

# General Table with Property and Crop Damages aggregated.
M <- merge(dfpropt, dfcropt, by = "WEvent")
M$TOTAL <- dfpropt$PDMG + dfcropt$CDMG
M$PDMG <- M$PDMG/1e+06
M$CDMG <- M$CDMG/1e+06
M$PDMG <- M$TOTAL/1e+06

# Plot
par(bg = "white", mfrow = c(2, 2))
WE = c("Convection", "XTemp", "Flood", "Marine", "TCyclone", "Winter", "Others")
with(M, barplot(PDMG, main = "Property Damages vs Weather Event", col = 2:8, 
    ylab = "Property Damages (millions $)", xlab = "Type of Weather Event", 
    names = WE))
grid()
with(M, barplot(CDMG, main = "Crop Damages vs Weather Event", col = 2:8, ylab = "Crop Damages (millions $)", 
    xlab = "Type of Weather Event", names = WE))
grid()
with(M, barplot(TOTAL, main = "Economic Consequences vs Weather Event", col = 2:8, 
    ylab = "Total (millions $)", xlab = "Type of Weather Event", names = WE))
grid()

# Table
colnames(M) <- c("WEvent", "PDMG (millions $)", "CDMG (millions $)", "TOTAL (millions $)")
head(M)
##                WEvent PDMG (millions $) CDMG (millions $)
## 1          Convection             91454           4747.73
## 2 Extreme Temperature              2547           2290.49
## 3               Flood            180159          12380.01
## 4              Marine              5407             48.18
## 5              Others             35302          16099.85
## 6    Tropical Cyclone             90874           5516.12
##   TOTAL (millions $)
## 1          9.145e+10
## 2          2.547e+09
## 3          1.802e+11
## 4          5.407e+09
## 5          3.530e+10
## 6          9.087e+10

plot of chunk unnamed-chunk-4

As we can see in the figure, the events related to flood are the most harmful in economic terms.