knitr::opts_chunk$set(cache=TRUE)
require(sqldf)
## Loading required package: sqldf
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
require(lattice)
## Loading required package: lattice
# Read in the data. Note: This assumes you've downloaded the data from the repo and unzipped it in your working directory
setwd("C:\\Users\\aadlandma\\Desktop\\ReproducbleResearch\\StormAssignment")
storm <- read.csv("repdata-data-StormData.csv",stringsAsFactors=F)
To complete this analysis, I can going to create a table of the deadliest events and display the values in a barplot. I am also going to show the aggregated damage for each event as a barchart using the lattice package.
There are many different EVTYPEs as a result of typos and non-standardized entries.
length(unique(storm$EVTYPE))
## [1] 985
To deal with the different EVTYPEs, I am going to using SQL UPDATE statements to change the values of the rows using the sqldf package. This will consolidate the EVTYPEs down to only the most prevelant events.
knitr::opts_chunk$set(cache=TRUE)
storm <- sqldf(c('update storm set EVTYPE = "HURRICANE" where EVTYPE like "%CANE%" or EVTYPE like "%TYPHOON%"
or EVTYPE like "%cane%"', "select * from main.storm"))
## Loading required package: tcltk
# Thunder Storms (include wind)
storm <- sqldf(c('update storm set EVTYPE = "THUNDERSTORM" where EVTYPE like "%TSTM%"
or EVTYPE like "%THUNDER%" or EVTYPE like "%UNDER%" or EVTYPE like "%under%"', "select * from main.storm"))
# Lightining
storm <- sqldf(c('update storm set EVTYPE = "LIGNTNING" where EVTYPE like "%LIGNTNING%"
or EVTYPE like "%LIGHTING%" or EVTYPE like "%LIGHTNING%"
', "select * from main.storm"))
# Excessive Heat
storm <- sqldf(c('update storm set EVTYPE = "HEAT" where EVTYPE like "%HEAT%"
or EVTYPE like "%Heat%" or EVTYPE like "%heat%" or EVTYPE like "%WARM%" or EVTYPE like "%HOT%" or EVTYPE like "%Record temperature%"
', "select * from main.storm"))
# Tornado
storm <- sqldf(c('update storm set EVTYPE = "TORNADO" where EVTYPE like "%TORNADO%"
or EVTYPE like "%Tornado%" or EVTYPE like "%tornado%" or EVTYPE like "%TORN%"
', "select * from main.storm"))
# Flash floods
storm <- sqldf(c('update storm set EVTYPE = "FLASH FLOOD" where EVTYPE like "%FLASH%"
or EVTYPE like "%Flash%" or EVTYPE like "%flash%"
', "select * from main.storm"))
# Coastal Floods
storm <- sqldf(c('update storm set EVTYPE = "COASTAL FLOOD" where EVTYPE like "%coastal%"
or EVTYPE like "%COASTAL%" or
EVTYPE like "%CSTL%" or EVTYPE like "%Coastal%" or EVTYPE like "%TIDAL F%"
or EVTYPE like "%Tidal F%"
', "select * from main.storm"))
# All other Floods
storm <- sqldf(c('update storm set EVTYPE = "FLOOD"
where (EVTYPE not like "%FLASH%" and EVTYPE not like "%COASTAL%")
and (EVTYPE like "%FLOOD%" or EVTYPE like "%Flood%" or EVTYPE like "%flood%" or EVTYPE like "%FLD%")', "select * from main.storm"))
# Blizzard
storm <- sqldf(c('update storm set EVTYPE = "BLIZZARD" where EVTYPE like "%LIZZ%"
or EVTYPE like "%lizz%"
', "select * from main.storm"))
# Cold/Windchill
storm <- sqldf(c('update storm set EVTYPE = "COLD" where EVTYPE like "%COLD%"
or EVTYPE like "%Cold%" or EVTYPE like "%cold%" or EVTYPE like "%CHILL%"
or EVTYPE like "%chill%"
', "select * from main.storm"))
# Heavy Rain
storm <- sqldf(c('update storm set EVTYPE = "RAIN" where EVTYPE like "%RAIN%"
or EVTYPE like "%Rain%" or EVTYPE like "%rain%"
', "select * from main.storm"))
# High Wind
storm <- sqldf(c('update storm set EVTYPE = "WIND" where EVTYPE like "%WIND%"
or EVTYPE like "%Wind%" or EVTYPE like "%wind%" or EVTYPE like "%WND%"
', "select * from main.storm"))
# Heavy Snow
storm <- sqldf(c('update storm set EVTYPE = "SNOW" where EVTYPE like "%SNOW%"
or EVTYPE like "%snow%" or EVTYPE like "%Snow%"
', "select * from main.storm"))
# Debris Flow
storm <- sqldf(c('update storm set EVTYPE = "DEBRIS" where EVTYPE like "%LANDSLIDE%"
or EVTYPE like "%SLIDE%" or EVTYPE like "%slide%" or EVTYPE like "%Slide%"
', "select * from main.storm"))
# Smoke
storm <- sqldf(c('update storm set EVTYPE = "SMOKE" where EVTYPE like "%SMOKE%"
or EVTYPE like "%smoke%" or EVTYPE like "%Smoke%"
', "select * from main.storm"))
# Drought
storm <- sqldf(c('update storm set EVTYPE = "DROUGHT" where EVTYPE like "%DROUGHT%"
or EVTYPE like "%drought%" or EVTYPE like "%Drought%"
', "select * from main.storm"))
# Dust Devil
storm <- sqldf(c('update storm set EVTYPE = "DDEVIL" where EVTYPE like "%DEVIL%"
or EVTYPE like "%devil%" or EVTYPE like "%Devil%"
', "select * from main.storm"))
# Dust Storm
storm <- sqldf(c('update storm set EVTYPE = "DUSTSTORM" where EVTYPE like "%DUST%"
or EVTYPE like "%dust%" or EVTYPE like "%DUST%" or EVTYPE like "%DURST%"
', "select * from main.storm"))
# Funnel Cloud
storm <- sqldf(c('update storm set EVTYPE = "FUNNEL" where EVTYPE like "%FUNNEL%"
or EVTYPE like "%funnel%" or EVTYPE like "%Funnel%"
', "select * from main.storm"))
# High Surf
storm <- sqldf(c('update storm set EVTYPE = "HSURF" where EVTYPE like "%SURF%"
or EVTYPE like "%surf%" or EVTYPE like "%Surf%"
', "select * from main.storm"))
# Rip Current
storm <- sqldf(c('update storm set EVTYPE = "RIP" where EVTYPE like "%rip%"
or EVTYPE like "%RIP%" or EVTYPE like "%Rip%"
', "select * from main.storm"))
# Tropical Depression
storm <- sqldf(c('update storm set EVTYPE = "TDEPRESSION" where EVTYPE like "%Depression%"
or EVTYPE like "%depression%" or EVTYPE like "%DEPRESSION%"
', "select * from main.storm"))
# Tropical Storm
storm <- sqldf(c('update storm set EVTYPE = "TROPICAL STORM" where EVTYPE like "%TROPICAL%"
or EVTYPE like "%tropical%" or EVTYPE like "%Tropical%"
', "select * from main.storm"))
# Volcanic Ash
storm <- sqldf(c('update storm set EVTYPE = "VOLCANIC" where EVTYPE like "%Volcanic%"
or EVTYPE like "%VOLCANIC%" or EVTYPE like "%volcanic%"
', "select * from main.storm"))
# Water Spouts
storm <- sqldf(c('update storm set EVTYPE = "WATERSPOUT" where EVTYPE like "%SPOUT%"
or EVTYPE like "%Spout%" or EVTYPE like "%spout%"
', "select * from main.storm"))
# Wild Fire
storm <- sqldf(c('update storm set EVTYPE = "WILDFIRE" where EVTYPE like "%FIRE%"
or EVTYPE like "%Fire%" or EVTYPE like "%fire%"
', "select * from main.storm"))
# Avalanche
storm <- sqldf(c('update storm set EVTYPE = "AVALANCHE" where EVTYPE like "%AVAL%"
or EVTYPE like "%Aval%" or EVTYPE like "%aval%"
', "select * from main.storm"))
# Sleet
storm <- sqldf(c('update storm set EVTYPE = "SLEET" where EVTYPE like "%Sleet%"
or EVTYPE like "%sleet%" or EVTYPE like "%SLEET%"
', "select * from main.storm"))
# Frost
storm <- sqldf(c('update storm set EVTYPE = "FROST" where EVTYPE like "%Frost%"
or EVTYPE like "%frost%" or EVTYPE like "%FROST%" or EVTYPE like "%FREEZE%" or EVTYPE like "%Freeze%"
', "select * from main.storm"))
# Winter Weather
storm <- sqldf(c('update storm set EVTYPE = "WINWEATHER" where EVTYPE like "%WINTER%"
or EVTYPE like "%winter%" or EVTYPE like "%Winter%" or EVTYPE like "%Wintry%"
or EVTYPE like "%wintry%" or EVTYPE like "%WINTRY%"
', "select * from main.storm"))
# Ice Storm
storm <- sqldf(c('update storm set EVTYPE = "ICESTORM" where EVTYPE like "%ICE%"
or EVTYPE like "%ice%" or EVTYPE like "%Ice%"
', "select * from main.storm"))
# Storm Surge
storm <- sqldf(c('update storm set EVTYPE = "STORM SURGE" where EVTYPE like "%SURGE%"
or EVTYPE like "%surge%" or EVTYPE like "%Surge%"
', "select * from main.storm"))
# Create a new data frame with the damage, fatality, and injury fields aggregated by EVTYPE Limit by the top 50 most prevelant events
stormAgg <- sqldf("Select EVTYPE, sum(FATALITIES) as DeathCount,avg(FATALITIES) as AvgDeath,
sum(INJURIES) as InjuryCount, avg(INJURIES) as AvgInjury, sum(PROPDMG) as PropDamage,
avg(PROPDMG) as AvgPropDamage, sum(CROPDMG) as CropDamage, avg(CROPDMG) as AvgCropDamage,
count(*) as EVCount from storm group by EVTYPE order by EVCount desc limit 50;")
stormState <- sqldf("Select EVTYPE, sum(FATALITIES) as DeathCount,avg(FATALITIES) as AvgDeath,
sum(INJURIES) as InjuryCount, avg(INJURIES) as AvgInjury, sum(PROPDMG) as PropDamage,
avg(PROPDMG) as AvgPropDamage, sum(CROPDMG) as CropDamage, avg(CROPDMG) as AvgCropDamage,
count(*) as EVCount,STATE__,STATE from storm group by EVTYPE, STATE__ order by STATE__,InjuryCount;")
stormDamage <- sqldf("SELECT EVTYPE,PropDamage,CropDamage from stormAgg order by PropDamage desc limit 5")
Now that we have the cleaned the data to the most prevelant events, we can plot the most deadly and damaging event types.
# Display the rows for the five most fatal events
head(stormAgg[order(-stormAgg$DeathCount,-stormAgg$InjuryCount),][1:5],5)
## EVTYPE DeathCount AvgDeath InjuryCount AvgInjury
## 3 TORNADO 5636 0.092850082 91407 1.50588138
## 14 HEAT 3178 1.054412741 9243 3.06668879
## 4 FLASH FLOOD 1035 0.018590031 1802 0.03236641
## 9 LIGNTNING 817 0.051813800 5231 0.33174784
## 1 THUNDERSTORM 756 0.002244529 9545 0.02833866
Create a Barplot with the top ten most fatal events in the Dataset
barplot(sort(stormAgg$DeathCount,decreasing=T)[1:5],names.arg=c("Tornado","Heat","Flash Flood","Lightning","Thunderstorm"),main="Top 5 Most Fatal Weather Events",xlab="Event Type",ylab="Death Count")
Create a barchart with the most damaging events
# Display the rows for the five most fatal events
barchart(PropDamage+CropDamage~EVTYPE,data=stormDamage)
## Conclusion It appears that across the United States, Tornados are the most deadly and damaging weather events. If you were a response planner in an area affected by Tornados, you may consider improving your early warning capabilities to warn citizens of incoming Tornados.
This initial analysis shows only the aggregated values across the United States. Future work will show the most fatal and damaging weather events by state and country. This would allow planners to focus on the most relevant weather event in their area.