The following report analyzes historic data for severe weather event and demonstrates which events caused the most damage to human health and which events caused the most economic damage.
#read in the data set specifying the column class types, to save time on loading
classes <- c(NULL, "character", NULL, NULL, NULL, NULL, "factor", "factor", rep(NULL, 14), rep("factor", 3), "factor", "factor", "factor", rep(NULL, 9))
#get a sample of data (for memory purposes)
wdata <- read.csv("repdata_data_StormData.csv.bz2", colClasses = classes, header=TRUE, nrow = 400000)
#get rid of unnecessary columns
wdata <- wdata[,-c(1,3:6,9:22, 29:37)]
#Clean the EVENTTYPE column for similar terms
levels(wdata$EVTYPE) <- toupper(levels(wdata$EVTYPE))
levels(wdata$EVTYPE)[grepl("BLIZZARD", levels(wdata$EVTYPE))] <- "BLIZZARDS"
levels(wdata$EVTYPE)[grepl("FLOOD", levels(wdata$EVTYPE))] <- "ANY FLOOD"
levels(wdata$EVTYPE)[grepl("THUNDERSTORM", levels(wdata$EVTYPE))] <- "THUNDERSTORMS"
levels(wdata$EVTYPE)[grepl("HEAVY RAIN", levels(wdata$EVTYPE))] <- "HEAVY RAINS/LIGHTNING"
levels(wdata$EVTYPE)[grepl("LIGHTNING", levels(wdata$EVTYPE))] <- "HEAVY RAINS/LIGHTNING"
levels(wdata$EVTYPE)[grepl("TORNADO", levels(wdata$EVTYPE))] <- "TORNADOS"
levels(wdata$EVTYPE)[grepl("WIND", levels(wdata$EVTYPE))] <- "WINDS"
levels(wdata$EVTYPE)[grepl("TROPICAL STORM", levels(wdata$EVTYPE))] <- "TROPICAL STORMS"
levels(wdata$EVTYPE)[grepl("HURRICANE", levels(wdata$EVTYPE))] <- "HURRICANES"
levels(wdata$EVTYPE)[grepl("SPOUT", levels(wdata$EVTYPE))] <- "WATER SPOUTS"
levels(wdata$EVTYPE)[grepl("WINTER STORM", levels(wdata$EVTYPE))] <- "WINTER STORMS"
levels(wdata$EVTYPE)[grepl("URBAN", levels(wdata$EVTYPE))] <- "ANY FLOOD"
levels(wdata$EVTYPE)[grepl("COLD", levels(wdata$EVTYPE))] <- "COLD WEATHER"
levels(wdata$EVTYPE)[grepl("HEAT", levels(wdata$EVTYPE))] <- "HEAT"
levels(wdata$EVTYPE)[grepl("SLIDE", levels(wdata$EVTYPE))] <- "MUD SLIDES"
levels(wdata$EVTYPE)[grepl("ICE", levels(wdata$EVTYPE))] <- "ICE"
levels(wdata$EVTYPE)[grepl("HEAVY SNOW", levels(wdata$EVTYPE))] <- "HEAVY SNOW"
levels(wdata$EVTYPE)[grepl("HAIL", levels(wdata$EVTYPE))] <- "HAIL"
levels(wdata$EVTYPE)[grepl("FUNNEL", levels(wdata$EVTYPE))] <- "FUNNEL CLOUD"
levels(wdata$EVTYPE)[grepl("FREEZING RAIN", levels(wdata$EVTYPE))] <- "FREEZING RAIN"
levels(wdata$EVTYPE)[grepl("MICROBURST", levels(wdata$EVTYPE))] <- "MICROBURST"
levels(wdata$EVTYPE)[grepl("FIRE", levels(wdata$EVTYPE))] <- "WILD FIRES"
levels(wdata$EVTYPE)[grepl("RECORD HIGH", levels(wdata$EVTYPE))] <- "RECORD HIGH TEMPERATURE"
levels(wdata$EVTYPE)[grepl("RAIN", levels(wdata$EVTYPE))] <- "ANY RAIN"
library(dplyr)
wdata$FATALITIES <- as.numeric(wdata$FATALITIES)
wdata$INJURIES <- as.numeric(wdata$INJURIES)
healthDamage <- wdata[,3:5] %>%
group_by(EVTYPE) %>% summarise(
fatalities = mean(FATALITIES, na.rm = TRUE),
injuries = mean(INJURIES, na.rm = TRUE)
)
healthDamage<-healthDamage[order(-healthDamage$injuries,-healthDamage$fatalities),]
topTenHealth <- healthDamage[1:10,]
topTenHealth
## Source: local data frame [10 x 3]
##
## EVTYPE fatalities injuries
## 169 ROUGH SEAS 15.000 129.00
## 127 MARINE ACCIDENT 2.000 67.00
## 288 WARM WEATHER 1.000 67.00
## 128 MARINE MISHAP 21.000 65.00
## 72 GLAZE 1.925 37.52
## 133 MIXED PRECIP 1.200 29.10
## 48 DUST STORM 1.075 28.48
## 106 ICY ROADS 1.630 25.30
## 90 HIGH SEAS 1.500 17.75
## 269 TYPHOON 1.000 15.73
First let’s convert the damage values into correct ones, using the units columns.
wdata$PROPDMG <- as.numeric(wdata$PROPDMG)
#recode the units columns
wdata$PROPDMGEXP <- tolower(as.character(wdata$PROPDMGEXP))
wdata$PROPDMGEXP[grepl("b", wdata$PROPDMGEXP)] <- "1000000000"
wdata$PROPDMGEXP[grepl("k", wdata$PROPDMGEXP)] <- "1000"
wdata$PROPDMGEXP[grepl("m", wdata$PROPDMGEXP)] <- "1000000"
wdata$PROPDMGEXP[grepl("h", wdata$PROPDMGEXP)] <- "100"
wdata$propertydamage <- rep(0, nrow(wdata))
wdata$PROPDMGEXP <- as.numeric(wdata$PROPDMGEXP)
## Warning: NAs introduced by coercion
for (i in nrow(wdata)) {
if (!is.na(wdata$PROPDMGEXP) & wdata$PROPDMGEXP > 99) {
wdata$propertydamage <- wdata$PROPDMG * wdata$PROPDMGEXP
}
else {
wdata$propertydamage <- wdata$PROPDMG
}
}
## Warning: the condition has length > 1 and only the first element will be
## used
Now let’s estimate mean property damage.
propertyDamageSummary <- wdata[,c("EVTYPE", "propertydamage")] %>%
group_by(EVTYPE) %>% summarise(
meanPropertyDamage = mean(propertydamage, na.rm = TRUE)
)
propertyDamageSummary<-propertyDamageSummary[order(-propertyDamageSummary$meanPropertyDamage),]
topTenProperty <- propertyDamageSummary[1:10,]
topTenProperty
## Source: local data frame [10 x 2]
##
## EVTYPE meanPropertyDamage
## 102 HURRICANES 6.692e+09
## 84 WINTER STORMS 1.130e+09
## 30 DAMAGING FREEZE 6.110e+08
## 19 WILD FIRES 3.634e+08
## 269 TYPHOON 1.836e+08
## 36 DROUGHT 1.506e+08
## 14 BLIZZARDS 1.152e+08
## 265 TROPICAL STORMS 9.922e+07
## 1 ANY FLOOD 9.880e+07
## 91 HIGH SURF 5.746e+07
library(rCharts)
propertyDamagePlot <- hPlot(meanPropertyDamage ~ EVTYPE, data = topTenProperty, type = "column", title = "Average Property Damage by Event Type (Top Ten)")
propertyDamagePlot$xAxis(type = "category", title = list(text = "Type of Severe Weather"))
propertyDamagePlot$yAxis(title = list(text = "Property Damage"))
#settings for displaying the plot
library(knitr)
opts_chunk$set(comment = NA, results = 'asis', comment = NA, tidy = F)
#display the plot
library(rCharts)
propertyDamagePlot$show('inline', include_assets = TRUE, cdn = TRUE)
Judging from the sample data we processed, the most damaging events to human health on average are ROUGH SEAS, TORRENTIAL RAINFALL, MARINE ACCIDENT (and MARINE MISHAP), WARM WEATHER, GLAZE , MIXED PRECIP, DUST STORM , ICY ROADS , HIGH SEAS, TYPHOON. The most damaging events economically are HURRICANES, WINTER STORMS, DAMAGING FREEZE, WILD FIRES, TYPHOON, DROUGHT, BLIZZARDS, TROPICAL STORMS , ANY FLOOD, and HIGH SURF.