Exploring the NOAA Storm Database : Weather events which causes the most economic and health impact.

The main goal of this research is to answer the following questions: 1. Across the United States, which types of events are most harmful with respect to population health? 2. Across the United States, which types of events have the greatest economic consequences? ### Synopsis

Data processing

Required libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Data Acquisition

# READ DATA FROM CSV
storm <- read.csv("repdata_data_StormData.csv")
#Print dimensions and variables
sprintf("Dimensions of storm: Observations %s, Variables %s",nrow(storm),ncol(storm))
## [1] "Dimensions of storm: Observations 902297, Variables 37"
names(storm)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
#CLEANING DATA
#Type of events
typeof(storm$EVTYPE)
## [1] "integer"
diffEVTYPE <- unique(storm$EVTYPE)
length(diffEVTYPE)
## [1] 985

The number of different type of events is greater than the documentation says so we need to make the following transformation:

event_types <- tolower(storm$EVTYPE)
event_types <- gsub("[[:blank:][:punct:]+]", " ", event_types)
length(unique(event_types))
## [1] 874

Now the number of different events is as the documentation says. So update the data.

storm$EVTYPE <- event_types

Results

As the number of different events is too big, we are only to consider the top 10. #### Question 1: Across the United States, which types of events are most harmful with respect to population health?

Fatalities <- aggregate(FATALITIES ~ EVTYPE, data=storm, sum)
Injuries <- aggregate(INJURIES ~ EVTYPE, data=storm, sum)

# Get Top 10 
FatalitiesTop10 <- head(Fatalities[order(Fatalities$FATALITIES, decreasing = TRUE),],10)
InjuriesTop10 <- head(Injuries[order(Injuries$INJURIES, decreasing = TRUE),],10)

# Make plots
p1 <- ggplot(FatalitiesTop10,
             aes(x=reorder(EVTYPE, FATALITIES), y=FATALITIES, fill=FATALITIES)) +
    geom_bar(stat="identity") +
    coord_flip() +
    ylab("Total number of FATALITIES") +
    xlab("Event type") +
    theme(legend.position="none")

p2 <- ggplot(InjuriesTop10,
             aes(x=reorder(EVTYPE, INJURIES), y=INJURIES, fill=INJURIES)) +
    geom_bar(stat="identity") +
    coord_flip() + 
    ylab("Total number of injuries") +
    xlab("Event type") +
    theme(legend.position="none")

#Print plots
print(p1)

print(p2)

Tornadoes cause most number of deaths and injuries among all event types by far.

Across the United States, which types of events have the greatest economic consequences?

The exponents in letter format must be converted to numeric and applied.

# Function to translate the exponent:
exp_transform <- function(e) {
    # h -> hundred, k -> thousand, m -> million, b -> billion
    if (e %in% c('h', 'H'))
        return(2)
    else if (e %in% c('k', 'K'))
        return(3)
    else if (e %in% c('m', 'M'))
        return(6)
    else if (e %in% c('b', 'B'))
        return(9)
    else if (!is.na(as.numeric(e))) # if a digit
        return(as.numeric(e))
    else if (e %in% c('', '-', '?', '+'))
        return(1)
    else {
        stop("Invalid exponent value.")
    }
}

prop_dmg_exp <- sapply(storm$PROPDMGEXP, FUN=exp_transform)
storm$PROPDMG_incl_exp <- storm$PROPDMG * (10 ** prop_dmg_exp)
crop_dmg_exp <- sapply(storm$CROPDMGEXP, FUN=exp_transform)
storm$CROPDMG_incl_exp <- storm$CROPDMG * (10 ** crop_dmg_exp)

Aggregate data, get Top 10 and plot

# Aggregate data
PROPDMG_incl_exp_per_evt <- aggregate(PROPDMG_incl_exp ~ EVTYPE, data=storm, sum)
CROPDMG_incl_exp_per_evt <- aggregate(CROPDMG_incl_exp ~ EVTYPE, data=storm, sum)

# Get Top 10
PROPDMG_Top10 <- head(PROPDMG_incl_exp_per_evt[order(PROPDMG_incl_exp_per_evt$PROPDMG_incl_exp, decreasing = TRUE),],10)
PROPDMG_Top10
##                 EVTYPE PROPDMG_incl_exp
## 138        flash flood     6.820237e+13
## 697 thunderstorm winds     2.086532e+13
## 741            tornado     1.078951e+12
## 209               hail     3.157558e+11
## 410          lightning     1.729433e+11
## 154              flood     1.446577e+11
## 366  hurricane typhoon     6.930584e+10
## 166           flooding     5.920826e+10
## 585        storm surge     4.332354e+10
## 270         heavy snow     1.793259e+10
CROPDMG_Top10 <- head(CROPDMG_incl_exp_per_evt[order(CROPDMG_incl_exp_per_evt$CROPDMG_incl_exp, decreasing = TRUE),],10)
CROPDMG_Top10
##                EVTYPE CROPDMG_incl_exp
## 84            drought      13972566000
## 154             flood       5661968450
## 519       river flood       5029459000
## 382         ice storm       5022113500
## 209              hail       3025974480
## 357         hurricane       2741910000
## 366 hurricane typhoon       2607872800
## 138       flash flood       1421317100
## 125      extreme cold       1312973000
## 185      frost freeze       1094186000
# Make plots
p3 <- ggplot(data=PROPDMG_Top10,
             aes(x=reorder(EVTYPE, PROPDMG_incl_exp), y=PROPDMG_incl_exp, fill=PROPDMG_incl_exp)) +
    geom_bar(stat="identity") +
    coord_flip() +
    ylab("Total number of property damage") +
    xlab("Event type") +
    theme(legend.position="none")

p4 <- ggplot(data=CROPDMG_Top10,
             aes(x=reorder(EVTYPE, CROPDMG_incl_exp), y=CROPDMG_incl_exp, fill=CROPDMG_incl_exp)) +
    geom_bar(stat="identity") +
    coord_flip() +
    ylab("Total number of crop damage") +
    xlab("Event type") +
    theme(legend.position="none")

#Print plots
print(p3)

print(p4)

Flash floods and thunderstorm winds cost the largest property damages. The most severe weather event in terms of crop damage is drought.