Synopsis

This document lists analysis of impact caused by storms and other severe weather conditions. It includes all the code and charts created to analyze impact.The analysis document will be publish on RPubs.com. The scope of analysis is limited to
  • Evaluation of the most harmful event with respect to population health in United States
  • Determination of event which has most economic impact

Data Processing Section

Download file repdata-data-StormData.csv.bz2 from url https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2. Unzip this file into repdata-data-StormData.csv and save it on R working directory

Use below piece of code to read this file into R. Remember to Cahce below piece of code

StormData <- read.csv("repdata-data-StormData.csv", header = T, sep = ",", na.string = "NA")

Subset required columns EVTYPE, CASUALTY, INJURY, PROPDMG, PROPDMGEXP, CROPDMG and CROPDMGEXP. Also, convert exp value to numeric thousand, million or billion corresponding to K, M and P. Ignore any other values and replace them with zeroes.

require(reshape2)
require(ggplot2)

DamageData <- subset(StormData, select = c(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP) )
DamageData$EVTYPE       <- as.character(DamageData$EVTYPE)
DamageData$PROPDMGEXP   <- as.character(DamageData$PROPDMGEXP)
DamageData$CROPDMGEXP   <- as.character(DamageData$CROPDMGEXP)

DamageData$PROPDMGEXP[grepl("K", ignore.case = T, DamageData$PROPDMGEXP)] <- 1000
DamageData$PROPDMGEXP[grepl("M", ignore.case = T, DamageData$PROPDMGEXP)] <- 1000000
DamageData$PROPDMGEXP[grepl("B", ignore.case = T, DamageData$PROPDMGEXP)] <- 1000000000
DamageData$PROPDMGEXP[DamageData$PROPDMGEXP != 1000 & DamageData$PROPDMGEXP != 1000000 & DamageData$PROPDMGEXP != 1000000000 ]  <- 0

DamageData$CROPDMGEXP[grepl("K", ignore.case = T, DamageData$CROPDMGEXP)] <- 1000
DamageData$CROPDMGEXP[grepl("M", ignore.case = T, DamageData$CROPDMGEXP)] <- 1000000
DamageData$CROPDMGEXP[grepl("B", ignore.case = T, DamageData$CROPDMGEXP)] <- 1000000000
DamageData$CROPDMGEXP[DamageData$CROPDMGEXP != 1000 & DamageData$CROPDMGEXP != 1000000 & DamageData$CROPDMGEXP != 1000000000 ]  <- 0

After K,M,B are replaced by their respective numerical equivalent, add new columns to calculate entire damage by multiplying damage with exp values, and then summarize data using melt and dcast function to consolidate damage amounts by event type

DamageData$PROPDMGTOTAL <- DamageData$PROPDMG * as.numeric(DamageData$PROPDMGEXP)
DamageData$CROPDMGTOTAL <- DamageData$CROPDMG * as.numeric(DamageData$CROPDMGEXP)

MeltData <- melt(DamageData, id=c(1), measure.vars=c(2,3,8,9))
DataSum <- dcast(MeltData, EVTYPE ~ variable, sum)

Remove records with no damages (economic or otherwise)

DataSum <- DataSum[(DataSum$FATALITIES + DataSum$INJURIES +DataSum$PROPDMGTOTAL + DataSum$CROPDMGTOTAL>0),]

The EVTYPE column does not have standard naming convention. Analyze & use Grep function to clean it up.

DataSum$EVTYPE[grep("beach ero|Erosion|cstl",ignore.case=T,DataSum$EVTYPE)] <- "Coastal Erosion"
DataSum$EVTYPE[grep("BURST",ignore.case=T,DataSum$EVTYPE)] <- "Storm"
DataSum$EVTYPE[grep("Mix|Sleet|Freezing R",ignore.case=T,DataSum$EVTYPE)] <- "Wintry Mix"
DataSum$EVTYPE[grep("Ice|Cold|Chil|Freez|thermia",ignore.case=T,DataSum$EVTYPE)] <- "Severe Cold Weather"
DataSum$EVTYPE[grep("cool|temp|Light Snow",ignore.case=T,DataSum$EVTYPE)] <- "Severe Cold Weather"
DataSum$EVTYPE[grep("Winter|Blizz|Heavy snow|Snow",ignore.case=T,DataSum$EVTYPE)] <- "Blizzard"
DataSum$EVTYPE[grep("Icy|Black Ice|Glaze",ignore.case=T,DataSum$EVTYPE)] <- "Icy Conditions"
DataSum$EVTYPE[grep("Storm",ignore.case=T,DataSum$EVTYPE)] <- "Storm"
DataSum$EVTYPE[grep("Whirl|spout|Funnel|nado",ignore.case=T,DataSum$EVTYPE)] <- "Tornado"
DataSum$EVTYPE[grep("Tropical Dep|ndao",ignore.case=T,DataSum$EVTYPE)] <- "Tornado"
DataSum$EVTYPE[grep("Wind|Hurri|Typh|TSTM|wnd|Severe Turb",ignore.case=T,DataSum$EVTYPE)] <- "Storm"
DataSum$EVTYPE[grep("Hail",ignore.case=T,DataSum$EVTYPE)] <- "Hail/Hailstorm"
DataSum$EVTYPE[grep("Rain|Flood|Wet|Precip|High Wat",ignore.case=T,DataSum$EVTYPE)] <- "Heavy Rain/Flash Flood"
DataSum$EVTYPE[grep("Urban|Flash|Rising Wa|Heavy Sh",ignore.case=T,DataSum$EVTYPE)] <- "Heavy Rain/Flash Flood"
DataSum$EVTYPE[grep("Tide|Surf|Wave|Swell|High Sea|Seiche",ignore.case=T,DataSum$EVTYPE)] <- "Tide/Surf/Wave"
DataSum$EVTYPE[grep("Surge|Current|Rough Sea|Heavy Seas",ignore.case=T,DataSum$EVTYPE)] <- "Tide/Surf/Wave"
DataSum$EVTYPE[grep("Light|Lignt",ignore.case=T,DataSum$EVTYPE)] <- "Lightning"
DataSum$EVTYPE[grep("Fire",ignore.case=T,DataSum$EVTYPE)] <- "Fire/Wild Fire"
DataSum$EVTYPE[grep("Drought",ignore.case=T,DataSum$EVTYPE)] <- "Drought"
DataSum$EVTYPE[grep("Heat|Hot|DRY|Warm|Driest|High Te",ignore.case=T,DataSum$EVTYPE)] <- "Extreme Hot and Dry"
DataSum$EVTYPE[grep("Slide|Slump",ignore.case=T,DataSum$EVTYPE)] <- "Landslide"
DataSum$EVTYPE[grep("Volcan",ignore.case=T,DataSum$EVTYPE)] <- "Volcanic Ash"
DataSum$EVTYPE[grep("Smoke|Dust|Fog",ignore.case=T,DataSum$EVTYPE)] <- "Smoke/Dust/Fog"
DataSum$EVTYPE[grep("Avalanc",ignore.case=T,DataSum$EVTYPE)] <- "Avalanche"
DataSum$EVTYPE[grep("Frost",ignore.case=T,DataSum$EVTYPE)] <- "Frost"
DataSum$EVTYPE[grep("Marine",ignore.case=T,DataSum$EVTYPE)] <- "Marine Accidents"
DataSum$EVTYPE[grep("other|\\?|Apache|Drowning|Dam break|High",ignore.case=T,DataSum$EVTYPE)] <- "Other"
DataSum$EVTYPE[grep("Tsunami",ignore.case=T,DataSum$EVTYPE)] <- "Tsunami"

Summarize data again after cleaning up Event type. Data thus obtained will be clean and ready for plotting and analysis

MeltData <- melt(DataSum, id=c(1), measure.vars=c(2,3,4,5))
SummarizedData <- dcast(MeltData, EVTYPE ~ variable, sum)

SummarizedData$TotalEcoDmg <- SummarizedData$PROPDMGTOTAL + SummarizedData$CROPDMGTOTAL

Result Section

Generate plots for Weather Event vs Population Health Impact

qplot(x=EVTYPE, y=FATALITIES, data=SummarizedData, xlab = "Weather Event", ylab="# of Fatalities", main = "US - Weather Event v/s Fatalities (1950-2011)", fill=EVTYPE, geom = c("bar", "text"), label=FATALITIES, stat = "identity") + theme(axis.text.x=element_text(angle=90), legend.title=element_blank()) + theme(legend.position="") 

qplot(x=EVTYPE, y=INJURIES, data=SummarizedData, xlab = "Weather Event", ylab="# of Injuries", main = "US - Weather Event v/s Injuries (1950-2011)", fill=EVTYPE, geom = c("bar", "text"), label=INJURIES, stat = "identity") + theme(axis.text.x=element_text(angle=90), legend.title=element_blank()) + theme(legend.position="")

Inference - It can be clearly seen from the above two graph that Tornados are responsible for most Injuries and Casualties

Generate plots for Weather Event vs Economic (Property + Crop damage) Impact

qplot(x=EVTYPE, y=TotalEcoDmg/1000000, data=SummarizedData, xlab = "Weather Event", ylab="Damage in Million USD", main = "US - Weather Event v/s Economic Damage (1950-2011)", fill=EVTYPE, geom = c("bar", "text"), label=round(TotalEcoDmg/1000000,digits=1), stat = "identity") + theme(axis.text.x=element_text(angle=90), legend.title=element_blank()) + theme(legend.position="") 

Inference - It can be seen that out of all extreme weather conditions, Heavy Rains/Flash Floods along with Storms cause the most damage, with storms causing marginally more damage than the former.