Synopsis

This report takes the storm data from NOAA database and looks at all of the event types (EVTYPES), FATALITIES, INJURIES, property damage (PROPDMG), and crop damage (CROPDMG) to determine which weather events have the most impact on population health and economic status. Crop damage and propery damage are combined in one DAMAGES total for the purpose of this research. Fatalities and injuries are also combined in a “public health” total (HEALTH) for the purposes of assessing total damage to population health.

The results were as follows:
* Tornados, excessive heat, and flash flood were the top 3 causes of fatalities.
* Tornados, Thunderstorm wind, and flood were the top 3 causes of injuries.
* Tornadoes/Thunderstorm wind/hail, high winds/cold, and Hurricane Opal/High winds were the top 3 causes of total economic damages.

Purpose

The purpose of this report is to look at two (2) main questions and review how to analyze these 2 questions, through key comparisons.

  1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
  2. Across the United States, which types of events have the greatest economic consequences?

Data Processing

Basic set up: Load libraries, Download and read the data.

library(dplyr)
library(knitr)
library(lubridate)
library(ggplot2)

#check if the file exists. if not, download
if (file.exists('stormdata.csv.bz2') == FALSE){
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2Fstormdata.csv.bz2", "stormdata.csv.bz2")
}

Now, extract, read and clean up data

rawdata <- read.csv(bzfile("stormdata.csv.bz2"))
stormdata <- rawdata #preserve raw data by modifying copy
#drop unnecessary columns
stormdata<- stormdata[,grep('BGN_DATE|EVTYPE|FATALITIES|INJURIES|PROPDMG|PROPRDMGEXP|CROPDMG|CROPDMGEXP', x = names(stormdata) )]
#transfrom data
stormdata <- transform(stormdata, EVTYPE = factor(EVTYPE))
stormdata$EVTYPE <- toupper(stormdata$EVTYPE)

Fill in the blanks & format damages

#get the # of 0s required and sub
stormdata$PROPDMGEXP[is.na(stormdata$PROPDMGEXP)] <- "0"
stormdata$PROPDMGEXP <- as.character(stormdata$PROPDMGEXP)
stormdata$PROPDMGEXP[toupper(stormdata$PROPDMGEXP) == 'H'] <- "2"
stormdata$PROPDMGEXP[toupper(stormdata$PROPDMGEXP) == 'K'] <- "3"
stormdata$PROPDMGEXP[toupper(stormdata$PROPDMGEXP) == 'M'] <- "6"
stormdata$PROPDMGEXP[toupper(stormdata$PROPDMGEXP) == 'B'] <- "9"
stormdata$PROPDMGEXP[is.na(stormdata$PROPDMGEXP)] <- 0
stormdata$PROPDMGEXP <- as.numeric(stormdata$PROPDMGEXP)
## Warning: NAs introduced by coercion
# calculate total damages and place into new column
stormdata$TOTALPROPDMG <- stormdata$PROPDMG * 10^stormdata$PROPDMGEXP

#repeat for crops    
stormdata$CROPDMGEXP[is.na(stormdata$CROPDMGEXP)] <- "0"    
stormdata$CROPDMGEXP <- as.character(stormdata$CROPDMGEXP)
stormdata$CROPDMGEXP[toupper(stormdata$CROPDMGEXP) == 'H'] <- "2"
stormdata$CROPDMGEXP[toupper(stormdata$CROPDMGEXP) == 'K'] <- "3"
stormdata$CROPDMGEXP[toupper(stormdata$CROPDMGEXP) == 'M'] <- "6"
stormdata$CROPDMGEXP[toupper(stormdata$CROPDMGEXP) == 'B'] <- "9"
stormdata$CROPDMGEXP[is.na(stormdata$CROPDMGEXP)] <- 0
stormdata$CROPDMGEXP <- as.numeric(stormdata$CROPDMGEXP)
## Warning: NAs introduced by coercion
# calculate total damages and place into new column
stormdata$TOTALCROPDMG <- as.numeric(stormdata$CROPDMG * 10^stormdata$CROPDMGEXP)

Add new columns and summarize data

#add crop damage cost and property damage cost
stormdata$DAMAGES<- stormdata$TOTALCROPDMG+stormdata$TOTALPROPDMG
stormdata$HEALTH<- stormdata$INJURIES+stormdata$FATALITIES

Results

Display the top 10 causes of Fatalities.

#group by event type and sum by count
sum_fat<-aggregate(stormdata$FATALITIES, by=list(EVENTS=stormdata$EVTYPE),FUN=sum)
sum_fat<-arrange(sum_fat,desc(x))
p_fat<-head(sum_fat,n=10)
p_fat
##            EVENTS    x
## 1         TORNADO 5633
## 2  EXCESSIVE HEAT 1903
## 3     FLASH FLOOD  978
## 4            HEAT  937
## 5       LIGHTNING  816
## 6       TSTM WIND  504
## 7           FLOOD  470
## 8     RIP CURRENT  368
## 9       HIGH WIND  248
## 10      AVALANCHE  224

Display the top 10 causes of Injuries.

#group by event type and sum by count
sum_inj<-aggregate(stormdata$INJURIES, by=list(EVENTS=stormdata$EVTYPE), FUN=sum)
sum_inj<-arrange(sum_inj,desc(x))
p_inj<-head(sum_inj,n=10)
p_inj
##               EVENTS     x
## 1            TORNADO 91346
## 2          TSTM WIND  6957
## 3              FLOOD  6789
## 4     EXCESSIVE HEAT  6525
## 5          LIGHTNING  5230
## 6               HEAT  2100
## 7          ICE STORM  1975
## 8        FLASH FLOOD  1777
## 9  THUNDERSTORM WIND  1488
## 10              HAIL  1361

Display the top 10 causes of all Fatalities and Injuries.

#group by event type and sum by count
sum_health<-aggregate(stormdata$HEALTH, by=list(EVENTS=stormdata$EVTYPE),FUN=sum)
sum_health<-arrange(sum_health,desc(x))
p_health<-head(sum_health,n=10)
p_health
##               EVENTS     x
## 1            TORNADO 96979
## 2     EXCESSIVE HEAT  8428
## 3          TSTM WIND  7461
## 4              FLOOD  7259
## 5          LIGHTNING  6046
## 6               HEAT  3037
## 7        FLASH FLOOD  2755
## 8          ICE STORM  2064
## 9  THUNDERSTORM WIND  1621
## 10      WINTER STORM  1527

Display the top 10 causes of Property Damage

sum_dmg<-aggregate(stormdata$DAMAGES, by=list(EVENTS=stormdata$EVTYPE),FUN=sum)
sum_dmg<-arrange(sum_dmg,desc(x))
p_dmg<-head(sum_dmg,n=10)
p_dmg
##                           EVENTS          x
## 1     TORNADOES, TSTM WIND, HAIL 1602500000
## 2                HIGH WINDS/COLD  117500000
## 3      HURRICANE OPAL/HIGH WINDS  110000000
## 4        WINTER STORM HIGH WINDS   65000000
## 5           HEAVY RAIN/HIGH SURF   15000000
## 6                LAKESHORE FLOOD    7540000
## 7         HIGH WINDS HEAVY RAINS    7510000
## 8                   FOREST FIRES    5500000
## 9           FLASH FLOODING/FLOOD    1925000
## 10 HEAVY SNOW/HIGH WINDS & FLOOD    1520000