Synopsis

The following report analyzes historic data for severe weather event and demonstrates which events caused the most damage to human health and which events caused the most economic damage.

Data Processing

#read in the data set specifying the column class types, to save time on loading
classes <- c(NULL, "character", NULL, NULL, NULL, NULL, "factor", "factor", rep(NULL, 14), rep("factor", 3), "factor", "factor", "factor", rep(NULL, 9))
#get a sample of data (for memory purposes)
wdata <- read.csv("repdata_data_StormData.csv.bz2", colClasses = classes, header=TRUE, nrow = 400000)

#get rid of unnecessary columns
wdata <- wdata[,-c(1,3:6,9:22, 29:37)]
#Clean the EVENTTYPE column for similar terms
levels(wdata$EVTYPE) <- toupper(levels(wdata$EVTYPE))
levels(wdata$EVTYPE)[grepl("BLIZZARD", levels(wdata$EVTYPE))] <- "BLIZZARDS"

levels(wdata$EVTYPE)[grepl("FLOOD", levels(wdata$EVTYPE))] <- "ANY FLOOD"
levels(wdata$EVTYPE)[grepl("THUNDERSTORM", levels(wdata$EVTYPE))] <- "THUNDERSTORMS"

levels(wdata$EVTYPE)[grepl("HEAVY RAIN", levels(wdata$EVTYPE))] <- "HEAVY RAINS/LIGHTNING"
levels(wdata$EVTYPE)[grepl("LIGHTNING", levels(wdata$EVTYPE))] <- "HEAVY RAINS/LIGHTNING"
levels(wdata$EVTYPE)[grepl("TORNADO", levels(wdata$EVTYPE))] <- "TORNADOS"
levels(wdata$EVTYPE)[grepl("WIND", levels(wdata$EVTYPE))] <- "WINDS"
levels(wdata$EVTYPE)[grepl("TROPICAL STORM", levels(wdata$EVTYPE))] <- "TROPICAL STORMS"

levels(wdata$EVTYPE)[grepl("HURRICANE", levels(wdata$EVTYPE))] <- "HURRICANES"

levels(wdata$EVTYPE)[grepl("SPOUT", levels(wdata$EVTYPE))] <- "WATER SPOUTS"

levels(wdata$EVTYPE)[grepl("WINTER STORM", levels(wdata$EVTYPE))] <- "WINTER STORMS"
levels(wdata$EVTYPE)[grepl("URBAN", levels(wdata$EVTYPE))] <- "ANY FLOOD"
levels(wdata$EVTYPE)[grepl("COLD", levels(wdata$EVTYPE))] <- "COLD WEATHER"
levels(wdata$EVTYPE)[grepl("HEAT", levels(wdata$EVTYPE))] <- "HEAT"
levels(wdata$EVTYPE)[grepl("SLIDE", levels(wdata$EVTYPE))] <- "MUD SLIDES"
levels(wdata$EVTYPE)[grepl("ICE", levels(wdata$EVTYPE))] <- "ICE"
levels(wdata$EVTYPE)[grepl("HEAVY SNOW", levels(wdata$EVTYPE))] <- "HEAVY SNOW"
levels(wdata$EVTYPE)[grepl("HAIL", levels(wdata$EVTYPE))] <- "HAIL"

levels(wdata$EVTYPE)[grepl("FUNNEL", levels(wdata$EVTYPE))] <- "FUNNEL CLOUD"
levels(wdata$EVTYPE)[grepl("FREEZING RAIN", levels(wdata$EVTYPE))] <- "FREEZING RAIN"
levels(wdata$EVTYPE)[grepl("MICROBURST", levels(wdata$EVTYPE))] <- "MICROBURST"
levels(wdata$EVTYPE)[grepl("FIRE", levels(wdata$EVTYPE))] <- "WILD FIRES"

levels(wdata$EVTYPE)[grepl("RECORD HIGH", levels(wdata$EVTYPE))] <- "RECORD HIGH TEMPERATURE"
levels(wdata$EVTYPE)[grepl("RAIN", levels(wdata$EVTYPE))] <- "ANY RAIN"

Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

library(dplyr)
wdata$FATALITIES <- as.numeric(wdata$FATALITIES)
wdata$INJURIES <- as.numeric(wdata$INJURIES)
healthDamage <- wdata[,3:5] %>% 
  group_by(EVTYPE) %>% summarise( 
                       fatalities = mean(FATALITIES, na.rm = TRUE),
                       injuries = mean(INJURIES, na.rm = TRUE)
  )
healthDamage<-healthDamage[order(-healthDamage$injuries,-healthDamage$fatalities),] 
topTenHealth <- healthDamage[1:10,]
topTenHealth
## Source: local data frame [10 x 3]
## 
##              EVTYPE fatalities injuries
## 169      ROUGH SEAS     15.000   129.00
## 127 MARINE ACCIDENT      2.000    67.00
## 288    WARM WEATHER      1.000    67.00
## 128   MARINE MISHAP     21.000    65.00
## 72            GLAZE      1.925    37.52
## 133    MIXED PRECIP      1.200    29.10
## 48       DUST STORM      1.075    28.48
## 106       ICY ROADS      1.630    25.30
## 90        HIGH SEAS      1.500    17.75
## 269         TYPHOON      1.000    15.73

Across the United States, which types of events have the greatest economic consequences?

First let’s convert the damage values into correct ones, using the units columns.

wdata$PROPDMG <- as.numeric(wdata$PROPDMG)

#recode the units columns

wdata$PROPDMGEXP <- tolower(as.character(wdata$PROPDMGEXP))
wdata$PROPDMGEXP[grepl("b", wdata$PROPDMGEXP)] <- "1000000000"
wdata$PROPDMGEXP[grepl("k", wdata$PROPDMGEXP)] <- "1000"
wdata$PROPDMGEXP[grepl("m", wdata$PROPDMGEXP)] <- "1000000"
wdata$PROPDMGEXP[grepl("h", wdata$PROPDMGEXP)] <- "100"
wdata$propertydamage <- rep(0, nrow(wdata))
wdata$PROPDMGEXP <- as.numeric(wdata$PROPDMGEXP)
## Warning: NAs introduced by coercion
for (i in nrow(wdata)) {
  if (!is.na(wdata$PROPDMGEXP) & wdata$PROPDMGEXP > 99) {
    wdata$propertydamage <- wdata$PROPDMG * wdata$PROPDMGEXP
  }
  else {
    wdata$propertydamage <- wdata$PROPDMG
  }
}
## Warning: the condition has length > 1 and only the first element will be
## used

Now let’s estimate mean property damage.

propertyDamageSummary <- wdata[,c("EVTYPE", "propertydamage")] %>% 
  group_by(EVTYPE) %>% summarise( 
                       meanPropertyDamage = mean(propertydamage, na.rm = TRUE)
 )
propertyDamageSummary<-propertyDamageSummary[order(-propertyDamageSummary$meanPropertyDamage),] 
topTenProperty <- propertyDamageSummary[1:10,]
topTenProperty
## Source: local data frame [10 x 2]
## 
##              EVTYPE meanPropertyDamage
## 102      HURRICANES          6.692e+09
## 84    WINTER STORMS          1.130e+09
## 30  DAMAGING FREEZE          6.110e+08
## 19       WILD FIRES          3.634e+08
## 269         TYPHOON          1.836e+08
## 36          DROUGHT          1.506e+08
## 14        BLIZZARDS          1.152e+08
## 265 TROPICAL STORMS          9.922e+07
## 1         ANY FLOOD          9.880e+07
## 91        HIGH SURF          5.746e+07
library(rCharts)
propertyDamagePlot <- hPlot(meanPropertyDamage ~ EVTYPE, data = topTenProperty, type = "column", title = "Average Property Damage by Event Type (Top Ten)")
propertyDamagePlot$xAxis(type = "category", title = list(text = "Type of Severe Weather"))
propertyDamagePlot$yAxis(title = list(text = "Property Damage"))
#settings for displaying the plot
library(knitr)
opts_chunk$set(comment = NA, results = 'asis', comment = NA, tidy = F)
#display the plot
library(rCharts)
propertyDamagePlot$show('inline', include_assets = TRUE, cdn = TRUE)

Results

Judging from the sample data we processed, the most damaging events to human health on average are ROUGH SEAS, TORRENTIAL RAINFALL, MARINE ACCIDENT (and MARINE MISHAP), WARM WEATHER, GLAZE , MIXED PRECIP, DUST STORM , ICY ROADS , HIGH SEAS, TYPHOON. The most damaging events economically are HURRICANES, WINTER STORMS, DAMAGING FREEZE, WILD FIRES, TYPHOON, DROUGHT, BLIZZARDS, TROPICAL STORMS , ANY FLOOD, and HIGH SURF.