Synopsis

The goal of this report is to explain the analysis realized over the NOAA Storm Database and describe the results found related to which Event Types were most harmful with respect to population health and which ones have the greatest economic consequences. This database contains data starting in the year 1950 and end in November 2011. From these data, the founds showed that Tornado is the most harmful event type with the highest number of fatalities and injuries and the Flood have the highest amount of damage (property plus crop) in dollars.

Data Procesing

Loading and Processing the Raw Data

# Load full dataset
stormdataFull <- read.csv(
        file = "repdata-data-StormData.csv.bz2",
        header = TRUE,
        sep = ",",
        na.strings = "NA"
)

There are 902297 rows and 37 columns

dim(stormdataFull)
## [1] 902297     37
names(stormdataFull)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Population Health

To analyze which Event Types (“EVETYPE”) were most harmful with respect to population health, create a subset with fatalities (“FATALITIES”) and injuries (“INJURIES”), being sure that the data is clean of missing values.

# Selecting data to be used in the analysis
stormdata <- stormdataFull[,c("EVTYPE", "FATALITIES", "INJURIES")]

# Create dataset without missing values
stormdata <- subset(stormdata, complete.cases(stormdata))

Aggregate the sum of Fatalities and Injuries by Event Type.

# Aggregate Fatalities and Injuries
totalF <- aggregate(
                list(Fatalities = stormdata$FATALITIES, Injuries = stormdata$INJURIES),
                by = list(EventType = stormdata$EVTYPE), FUN = sum)

As the idea is find a list of the most harmful event types, find top 10 event types in number of fatalities and injuries and merge them in a unique list.

# Top 10 Event type in number of fatalities
topF10 <- head(totalF[order(totalF$Fatalities, decreasing = TRUE), ],10)$EventType

# Top 10 Event type in number of injuries
topI10 <- head(totalF[order(totalF$Injuries, decreasing = TRUE), ],10)$EventType

# Most Harmful Event types with respect to population health
# is the union of top Event type in number of fatalities and injuries
topHarmful <- totalF[totalF$EventType %in% union(topF10,topI10),]

Economic Consequences

To analyze which Event Types (“EVETYPE”) caused the greatest economic consequences, create a subset with property damage (“PROPDMG”) and crop damage (“CROPDMG”), and their alphabetical characters used to signify magnitude (“PROPDMGEXP”, “CROPDMGEXP”), being sure that the data is clean of missing values.

# Selecting data to be used in the analysis
stormdata <- stormdataFull[,c("EVTYPE",
                              "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]

# Create dataset without missing values
stormdata <- subset(stormdata, complete.cases(stormdata))

Calculate value of property and crop damage using alphabetical characters used to signify magnitude (“H” for hundreds, “K” for thousands, “M” for millions and “B” for billions).

# Distinct alphabetical characters used to signify magnitude
unique(stormdata$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(stormdata$CROPDMGEXP)
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M
# to upper case to make it easier
stormdata$PROPDMGEXP <- toupper(stormdata$PROPDMGEXP)
stormdata$CROPDMGEXP <- toupper(stormdata$CROPDMGEXP)

# Create a new column with adjusted value of property damage with magnitude character
stormdata$PROPDMG_M <- stormdata$PROPDMG
stormdata[stormdata$PROPDMGEXP == "H",]$PROPDMG_M <- stormdata[stormdata$PROPDMGEXP == "H",]$PROPDMG * 1e2
stormdata[stormdata$PROPDMGEXP == "K",]$PROPDMG_M <- stormdata[stormdata$PROPDMGEXP == "K",]$PROPDMG * 1e3
stormdata[stormdata$PROPDMGEXP == "M",]$PROPDMG_M <- stormdata[stormdata$PROPDMGEXP == "M",]$PROPDMG * 1e6
stormdata[stormdata$PROPDMGEXP == "B",]$PROPDMG_M <- stormdata[stormdata$PROPDMGEXP == "B",]$PROPDMG * 1e9

# Create a new column with adjusted value of crop damage with magnitude character
stormdata$CROPDMG_M <- stormdata$CROPDMG
stormdata[stormdata$CROPDMGEXP == "H",]$CROPDMG_M <- stormdata[stormdata$CROPDMGEXP == "H",]$CROPDMG * 1e2
stormdata[stormdata$CROPDMGEXP == "K",]$CROPDMG_M <- stormdata[stormdata$CROPDMGEXP == "K",]$CROPDMG * 1e3
stormdata[stormdata$CROPDMGEXP == "M",]$CROPDMG_M <- stormdata[stormdata$CROPDMGEXP == "M",]$CROPDMG * 1e6
stormdata[stormdata$CROPDMGEXP == "B",]$CROPDMG_M <- stormdata[stormdata$CROPDMGEXP == "B",]$CROPDMG * 1e9

Sum Property and Crop damages to calculate Total Damage per Event Type

stormdata$TOTALDMG <- stormdata$PROPDMG_M + stormdata$CROPDMG_M 

Aggregate the sum of total damage (property and crop) by event type and find a top 10 list of the event types that have the greatest economic consequences.

# Selecting data to be used in the analysis
stormDamage <- stormdata[,c("EVTYPE", "TOTALDMG")]


# Aggregate Property Damage and Crop Damage
totalDamage <- aggregate(list(SumDMG = stormdata$TOTALDMG), 
                    by = list(EventType = stormdata$EVTYPE), FUN = sum)


# Top 10 Event types that have the greatest economic consequences
topDamage <- head(totalDamage[order(totalDamage$SumDMG, decreasing = TRUE), ],10)

Results

The most harmful event types with respect to population health are related to their total of fatalities and injuries. Tornado is the most harmful event type as showed in the ordered table below.

topHarmful[order(topHarmful$Fatalities + topHarmful$Injuries, decreasing = TRUE),]
##             EventType Fatalities Injuries
## 834           TORNADO       5633    91346
## 130    EXCESSIVE HEAT       1903     6525
## 856         TSTM WIND        504     6957
## 170             FLOOD        470     6789
## 464         LIGHTNING        816     5230
## 275              HEAT        937     2100
## 153       FLASH FLOOD        978     1777
## 427         ICE STORM         89     1975
## 760 THUNDERSTORM WIND        133     1488
## 359         HIGH WIND        248     1137
## 244              HAIL         15     1361
## 585       RIP CURRENT        368      232
## 19          AVALANCHE        224      170

The bar chart showed the most harmful event types and the relation with their number of fatalities and injuries.

#Load necessary library
library(lattice)

# Barchart 
barchart(topHarmful$EventType~topHarmful$Fatalities + topHarmful$Injuries,
         xlab = "Number of Health Problems",
         main = "Most Harmful Event types with respect to population health",
         par.settings = list(superpose.polygon = list(col = c("black","red"))) ,
         auto.key = list(corner = c(1, 0.1), text = c('Fatalities','Injuries'))
         )

The event types that have the greatest economic consequences could be found adding the cost of property and crop damages. the Flood have the highest amount of damage (property plus crop) in dollars as showed in the ordered table below.

topDamage[order(topDamage$SumDMG, decreasing = TRUE),]
##             EventType       SumDMG
## 170             FLOOD 150319678257
## 411 HURRICANE/TYPHOON  71913712800
## 834           TORNADO  57352114049
## 670       STORM SURGE  43323541000
## 244              HAIL  18758222016
## 153       FLASH FLOOD  17562129167
## 95            DROUGHT  15018672000
## 402         HURRICANE  14610229010
## 590       RIVER FLOOD  10148404500
## 427         ICE STORM   8967041360

The bar chart plotted showed the event types that have the greatest economic consequences adding the cost of property and crop damages in billions of dollars.

#Load necessary library
library(lattice)

# Barchart 
barchart(topDamage$EventType~(topDamage$SumDMG/1e9),
         xlab = "Property and Crop Damage (in Billion US$)",
         main = "Top 10 Event types that have the greatest economic consequences"
)