Setup

knitr::opts_chunk$set(cache=TRUE)
require(sqldf)
## Loading required package: sqldf
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
require(lattice)
## Loading required package: lattice
# Read in the data. Note: This assumes you've downloaded the data from the repo and unzipped it in your working directory 
setwd("C:\\Users\\aadlandma\\Desktop\\ReproducbleResearch\\StormAssignment")
storm <- read.csv("repdata-data-StormData.csv",stringsAsFactors=F)

Summary

To complete this analysis, I can going to create a table of the deadliest events and display the values in a barplot. I am also going to show the aggregated damage for each event as a barchart using the lattice package.

Preprocessing

There are many different EVTYPEs as a result of typos and non-standardized entries.

length(unique(storm$EVTYPE))
## [1] 985

To deal with the different EVTYPEs, I am going to using SQL UPDATE statements to change the values of the rows using the sqldf package. This will consolidate the EVTYPEs down to only the most prevelant events.

knitr::opts_chunk$set(cache=TRUE)
storm <- sqldf(c('update storm set EVTYPE = "HURRICANE" where EVTYPE like "%CANE%" or EVTYPE like "%TYPHOON%"
                  or EVTYPE like "%cane%"', "select * from main.storm"))
## Loading required package: tcltk
# Thunder Storms (include wind)
storm <- sqldf(c('update storm set EVTYPE = "THUNDERSTORM" where EVTYPE like "%TSTM%"
                  or EVTYPE like "%THUNDER%" or EVTYPE like "%UNDER%" or EVTYPE like "%under%"', "select * from main.storm"))
# Lightining 
storm <- sqldf(c('update storm set EVTYPE = "LIGNTNING" where EVTYPE like "%LIGNTNING%"
                  or EVTYPE like "%LIGHTING%" or EVTYPE like "%LIGHTNING%"
                 ', "select * from main.storm"))
# Excessive Heat 
storm <- sqldf(c('update storm set EVTYPE = "HEAT" where EVTYPE like "%HEAT%"
                  or EVTYPE like "%Heat%" or EVTYPE like "%heat%" or EVTYPE like "%WARM%" or EVTYPE like "%HOT%" or EVTYPE like "%Record temperature%"
                 ', "select * from main.storm"))
# Tornado
storm <- sqldf(c('update storm set EVTYPE = "TORNADO" where EVTYPE like "%TORNADO%"
                  or EVTYPE like "%Tornado%" or EVTYPE like "%tornado%" or EVTYPE like "%TORN%"
                 ', "select * from main.storm"))
# Flash floods
storm <- sqldf(c('update storm set EVTYPE = "FLASH FLOOD" where EVTYPE like "%FLASH%"
                  or EVTYPE like "%Flash%" or EVTYPE like "%flash%"
                 ', "select * from main.storm"))
# Coastal Floods 
storm <- sqldf(c('update storm set EVTYPE = "COASTAL FLOOD" where EVTYPE like "%coastal%"
                  or EVTYPE like "%COASTAL%" or 
            EVTYPE like "%CSTL%" or EVTYPE like "%Coastal%" or EVTYPE like "%TIDAL F%" 
                    or EVTYPE like "%Tidal F%"
                 ', "select * from main.storm"))
# All other Floods 
storm <- sqldf(c('update storm set EVTYPE = "FLOOD" 
                  where (EVTYPE not like "%FLASH%" and EVTYPE not like "%COASTAL%")
            and (EVTYPE like "%FLOOD%" or EVTYPE like "%Flood%" or EVTYPE like "%flood%" or EVTYPE like "%FLD%")', "select * from main.storm"))

# Blizzard
storm <- sqldf(c('update storm set EVTYPE = "BLIZZARD" where EVTYPE like "%LIZZ%"
                  or EVTYPE like "%lizz%" 
                 ', "select * from main.storm"))
# Cold/Windchill
storm <- sqldf(c('update storm set EVTYPE = "COLD" where EVTYPE like "%COLD%"
                  or EVTYPE like "%Cold%" or EVTYPE like "%cold%" or EVTYPE like "%CHILL%" 
                    or EVTYPE like "%chill%" 
                 ', "select * from main.storm"))
# Heavy Rain 
storm <- sqldf(c('update storm set EVTYPE = "RAIN" where EVTYPE like "%RAIN%"
                  or EVTYPE like "%Rain%" or EVTYPE like "%rain%" 
                 ', "select * from main.storm"))
# High Wind 
storm <- sqldf(c('update storm set EVTYPE = "WIND" where EVTYPE like "%WIND%"
                  or EVTYPE like "%Wind%" or EVTYPE like "%wind%" or EVTYPE like "%WND%" 
               ', "select * from main.storm"))
# Heavy Snow 
storm <- sqldf(c('update storm set EVTYPE = "SNOW" where EVTYPE like "%SNOW%"
                  or EVTYPE like "%snow%" or EVTYPE like "%Snow%"  
               ', "select * from main.storm"))

# Debris Flow 
storm <- sqldf(c('update storm set EVTYPE = "DEBRIS" where EVTYPE like "%LANDSLIDE%"
                  or EVTYPE like "%SLIDE%" or EVTYPE like "%slide%"  or EVTYPE like "%Slide%"  
               ', "select * from main.storm"))


# Smoke
storm <- sqldf(c('update storm set EVTYPE = "SMOKE" where EVTYPE like "%SMOKE%"
                  or EVTYPE like "%smoke%" or EVTYPE like "%Smoke%"  
               ', "select * from main.storm"))

# Drought
storm <- sqldf(c('update storm set EVTYPE = "DROUGHT" where EVTYPE like "%DROUGHT%"
                  or EVTYPE like "%drought%" or EVTYPE like "%Drought%"  
               ', "select * from main.storm"))
# Dust Devil 
storm <- sqldf(c('update storm set EVTYPE = "DDEVIL" where EVTYPE like "%DEVIL%"
                  or EVTYPE like "%devil%" or EVTYPE like "%Devil%"  
               ', "select * from main.storm"))
# Dust Storm 
storm <- sqldf(c('update storm set EVTYPE = "DUSTSTORM" where EVTYPE like "%DUST%"
                  or EVTYPE like "%dust%" or EVTYPE like "%DUST%" or EVTYPE like "%DURST%"  
               ', "select * from main.storm"))
# Funnel Cloud 
storm <- sqldf(c('update storm set EVTYPE = "FUNNEL" where EVTYPE like "%FUNNEL%"
                  or EVTYPE like "%funnel%" or EVTYPE like "%Funnel%"  
               ', "select * from main.storm"))
# High Surf
storm <- sqldf(c('update storm set EVTYPE = "HSURF" where EVTYPE like "%SURF%"
                  or EVTYPE like "%surf%" or EVTYPE like "%Surf%"  
               ', "select * from main.storm"))
# Rip Current
storm <- sqldf(c('update storm set EVTYPE = "RIP" where EVTYPE like "%rip%"
                  or EVTYPE like "%RIP%" or EVTYPE like "%Rip%"  
               ', "select * from main.storm"))

# Tropical Depression 
storm <- sqldf(c('update storm set EVTYPE = "TDEPRESSION" where EVTYPE like "%Depression%"
                  or EVTYPE like "%depression%" or EVTYPE like "%DEPRESSION%"  
               ', "select * from main.storm"))
# Tropical Storm 
storm <- sqldf(c('update storm set EVTYPE = "TROPICAL STORM" where EVTYPE like "%TROPICAL%"
                  or EVTYPE like "%tropical%" or EVTYPE like "%Tropical%"  
               ', "select * from main.storm"))

# Volcanic Ash 
storm <- sqldf(c('update storm set EVTYPE = "VOLCANIC" where EVTYPE like "%Volcanic%"
                  or EVTYPE like "%VOLCANIC%" or EVTYPE like "%volcanic%"  
               ', "select * from main.storm"))
# Water Spouts 
storm <- sqldf(c('update storm set EVTYPE = "WATERSPOUT" where EVTYPE like "%SPOUT%"
                  or EVTYPE like "%Spout%" or EVTYPE like "%spout%"  
               ', "select * from main.storm"))
# Wild Fire  
storm <- sqldf(c('update storm set EVTYPE = "WILDFIRE" where EVTYPE like "%FIRE%"
                  or EVTYPE like "%Fire%" or EVTYPE like "%fire%"  
               ', "select * from main.storm"))

# Avalanche  
storm <- sqldf(c('update storm set EVTYPE = "AVALANCHE" where EVTYPE like "%AVAL%"
                  or EVTYPE like "%Aval%" or EVTYPE like "%aval%"  
               ', "select * from main.storm"))


# Sleet  
storm <- sqldf(c('update storm set EVTYPE = "SLEET" where EVTYPE like "%Sleet%"
                  or EVTYPE like "%sleet%" or EVTYPE like "%SLEET%"  
               ', "select * from main.storm"))
# Frost  
storm <- sqldf(c('update storm set EVTYPE = "FROST" where EVTYPE like "%Frost%"
                  or EVTYPE like "%frost%" or EVTYPE like "%FROST%"  or EVTYPE like "%FREEZE%" or EVTYPE like "%Freeze%"
               ', "select * from main.storm"))

# Winter Weather
storm <- sqldf(c('update storm set EVTYPE = "WINWEATHER" where EVTYPE like "%WINTER%"
                  or EVTYPE like "%winter%" or EVTYPE like "%Winter%"  or EVTYPE like "%Wintry%"
                or EVTYPE like "%wintry%" or EVTYPE like "%WINTRY%"
               ', "select * from main.storm"))

# Ice Storm 
storm <- sqldf(c('update storm set EVTYPE = "ICESTORM" where EVTYPE like "%ICE%"
                  or EVTYPE like "%ice%" or EVTYPE like "%Ice%"  
               ', "select * from main.storm"))
# Storm Surge 
storm <- sqldf(c('update storm set EVTYPE = "STORM SURGE" where EVTYPE like "%SURGE%"
                  or EVTYPE like "%surge%" or EVTYPE like "%Surge%"  
               ', "select * from main.storm"))

# Create a new data frame with the damage, fatality, and injury fields aggregated by EVTYPE Limit by the top 50 most prevelant events 
stormAgg  <- sqldf("Select EVTYPE, sum(FATALITIES) as DeathCount,avg(FATALITIES) as AvgDeath,
                    sum(INJURIES) as InjuryCount, avg(INJURIES) as AvgInjury, sum(PROPDMG) as PropDamage,
                    avg(PROPDMG) as AvgPropDamage, sum(CROPDMG) as CropDamage, avg(CROPDMG) as AvgCropDamage,
                    count(*) as EVCount from storm group by EVTYPE order by EVCount desc limit 50;") 

stormState  <- sqldf("Select EVTYPE, sum(FATALITIES) as DeathCount,avg(FATALITIES) as AvgDeath,
                    sum(INJURIES) as InjuryCount, avg(INJURIES) as AvgInjury, sum(PROPDMG) as PropDamage,
                    avg(PROPDMG) as AvgPropDamage, sum(CROPDMG) as CropDamage, avg(CROPDMG) as AvgCropDamage,
                    count(*) as EVCount,STATE__,STATE from storm group by EVTYPE, STATE__ order by STATE__,InjuryCount;") 

stormDamage <- sqldf("SELECT EVTYPE,PropDamage,CropDamage from stormAgg order by PropDamage desc limit 5")

Analysis

Now that we have the cleaned the data to the most prevelant events, we can plot the most deadly and damaging event types.

# Display the rows for the five most fatal events 
head(stormAgg[order(-stormAgg$DeathCount,-stormAgg$InjuryCount),][1:5],5)
##          EVTYPE DeathCount    AvgDeath InjuryCount  AvgInjury
## 3       TORNADO       5636 0.092850082       91407 1.50588138
## 14         HEAT       3178 1.054412741        9243 3.06668879
## 4   FLASH FLOOD       1035 0.018590031        1802 0.03236641
## 9     LIGNTNING        817 0.051813800        5231 0.33174784
## 1  THUNDERSTORM        756 0.002244529        9545 0.02833866

Create a Barplot with the top ten most fatal events in the Dataset

barplot(sort(stormAgg$DeathCount,decreasing=T)[1:5],names.arg=c("Tornado","Heat","Flash Flood","Lightning","Thunderstorm"),main="Top 5 Most Fatal Weather Events",xlab="Event Type",ylab="Death Count")

Create a barchart with the most damaging events

# Display the rows for the five most fatal events 
barchart(PropDamage+CropDamage~EVTYPE,data=stormDamage)

## Conclusion It appears that across the United States, Tornados are the most deadly and damaging weather events. If you were a response planner in an area affected by Tornados, you may consider improving your early warning capabilities to warn citizens of incoming Tornados.

This initial analysis shows only the aggregated values across the United States. Future work will show the most fatal and damaging weather events by state and country. This would allow planners to focus on the most relevant weather event in their area.