Download file repdata-data-StormData.csv.bz2 from url https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2. Unzip this file into repdata-data-StormData.csv and save it on R working directory
Use below piece of code to read this file into R. Remember to Cahce below piece of code
StormData <- read.csv("repdata-data-StormData.csv", header = T, sep = ",", na.string = "NA")
Subset required columns EVTYPE, CASUALTY, INJURY, PROPDMG, PROPDMGEXP, CROPDMG and CROPDMGEXP. Also, convert exp value to numeric thousand, million or billion corresponding to K, M and P. Ignore any other values and replace them with zeroes.
require(reshape2)
require(ggplot2)
DamageData <- subset(StormData, select = c(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP) )
DamageData$EVTYPE <- as.character(DamageData$EVTYPE)
DamageData$PROPDMGEXP <- as.character(DamageData$PROPDMGEXP)
DamageData$CROPDMGEXP <- as.character(DamageData$CROPDMGEXP)
DamageData$PROPDMGEXP[grepl("K", ignore.case = T, DamageData$PROPDMGEXP)] <- 1000
DamageData$PROPDMGEXP[grepl("M", ignore.case = T, DamageData$PROPDMGEXP)] <- 1000000
DamageData$PROPDMGEXP[grepl("B", ignore.case = T, DamageData$PROPDMGEXP)] <- 1000000000
DamageData$PROPDMGEXP[DamageData$PROPDMGEXP != 1000 & DamageData$PROPDMGEXP != 1000000 & DamageData$PROPDMGEXP != 1000000000 ] <- 0
DamageData$CROPDMGEXP[grepl("K", ignore.case = T, DamageData$CROPDMGEXP)] <- 1000
DamageData$CROPDMGEXP[grepl("M", ignore.case = T, DamageData$CROPDMGEXP)] <- 1000000
DamageData$CROPDMGEXP[grepl("B", ignore.case = T, DamageData$CROPDMGEXP)] <- 1000000000
DamageData$CROPDMGEXP[DamageData$CROPDMGEXP != 1000 & DamageData$CROPDMGEXP != 1000000 & DamageData$CROPDMGEXP != 1000000000 ] <- 0
After K,M,B are replaced by their respective numerical equivalent, add new columns to calculate entire damage by multiplying damage with exp values, and then summarize data using melt and dcast function to consolidate damage amounts by event type
DamageData$PROPDMGTOTAL <- DamageData$PROPDMG * as.numeric(DamageData$PROPDMGEXP)
DamageData$CROPDMGTOTAL <- DamageData$CROPDMG * as.numeric(DamageData$CROPDMGEXP)
MeltData <- melt(DamageData, id=c(1), measure.vars=c(2,3,8,9))
DataSum <- dcast(MeltData, EVTYPE ~ variable, sum)
Remove records with no damages (economic or otherwise)
DataSum <- DataSum[(DataSum$FATALITIES + DataSum$INJURIES +DataSum$PROPDMGTOTAL + DataSum$CROPDMGTOTAL>0),]
The EVTYPE column does not have standard naming convention. Analyze & use Grep function to clean it up.
DataSum$EVTYPE[grep("beach ero|Erosion|cstl",ignore.case=T,DataSum$EVTYPE)] <- "Coastal Erosion"
DataSum$EVTYPE[grep("BURST",ignore.case=T,DataSum$EVTYPE)] <- "Storm"
DataSum$EVTYPE[grep("Mix|Sleet|Freezing R",ignore.case=T,DataSum$EVTYPE)] <- "Wintry Mix"
DataSum$EVTYPE[grep("Ice|Cold|Chil|Freez|thermia",ignore.case=T,DataSum$EVTYPE)] <- "Severe Cold Weather"
DataSum$EVTYPE[grep("cool|temp|Light Snow",ignore.case=T,DataSum$EVTYPE)] <- "Severe Cold Weather"
DataSum$EVTYPE[grep("Winter|Blizz|Heavy snow|Snow",ignore.case=T,DataSum$EVTYPE)] <- "Blizzard"
DataSum$EVTYPE[grep("Icy|Black Ice|Glaze",ignore.case=T,DataSum$EVTYPE)] <- "Icy Conditions"
DataSum$EVTYPE[grep("Storm",ignore.case=T,DataSum$EVTYPE)] <- "Storm"
DataSum$EVTYPE[grep("Whirl|spout|Funnel|nado",ignore.case=T,DataSum$EVTYPE)] <- "Tornado"
DataSum$EVTYPE[grep("Tropical Dep|ndao",ignore.case=T,DataSum$EVTYPE)] <- "Tornado"
DataSum$EVTYPE[grep("Wind|Hurri|Typh|TSTM|wnd|Severe Turb",ignore.case=T,DataSum$EVTYPE)] <- "Storm"
DataSum$EVTYPE[grep("Hail",ignore.case=T,DataSum$EVTYPE)] <- "Hail/Hailstorm"
DataSum$EVTYPE[grep("Rain|Flood|Wet|Precip|High Wat",ignore.case=T,DataSum$EVTYPE)] <- "Heavy Rain/Flash Flood"
DataSum$EVTYPE[grep("Urban|Flash|Rising Wa|Heavy Sh",ignore.case=T,DataSum$EVTYPE)] <- "Heavy Rain/Flash Flood"
DataSum$EVTYPE[grep("Tide|Surf|Wave|Swell|High Sea|Seiche",ignore.case=T,DataSum$EVTYPE)] <- "Tide/Surf/Wave"
DataSum$EVTYPE[grep("Surge|Current|Rough Sea|Heavy Seas",ignore.case=T,DataSum$EVTYPE)] <- "Tide/Surf/Wave"
DataSum$EVTYPE[grep("Light|Lignt",ignore.case=T,DataSum$EVTYPE)] <- "Lightning"
DataSum$EVTYPE[grep("Fire",ignore.case=T,DataSum$EVTYPE)] <- "Fire/Wild Fire"
DataSum$EVTYPE[grep("Drought",ignore.case=T,DataSum$EVTYPE)] <- "Drought"
DataSum$EVTYPE[grep("Heat|Hot|DRY|Warm|Driest|High Te",ignore.case=T,DataSum$EVTYPE)] <- "Extreme Hot and Dry"
DataSum$EVTYPE[grep("Slide|Slump",ignore.case=T,DataSum$EVTYPE)] <- "Landslide"
DataSum$EVTYPE[grep("Volcan",ignore.case=T,DataSum$EVTYPE)] <- "Volcanic Ash"
DataSum$EVTYPE[grep("Smoke|Dust|Fog",ignore.case=T,DataSum$EVTYPE)] <- "Smoke/Dust/Fog"
DataSum$EVTYPE[grep("Avalanc",ignore.case=T,DataSum$EVTYPE)] <- "Avalanche"
DataSum$EVTYPE[grep("Frost",ignore.case=T,DataSum$EVTYPE)] <- "Frost"
DataSum$EVTYPE[grep("Marine",ignore.case=T,DataSum$EVTYPE)] <- "Marine Accidents"
DataSum$EVTYPE[grep("other|\\?|Apache|Drowning|Dam break|High",ignore.case=T,DataSum$EVTYPE)] <- "Other"
DataSum$EVTYPE[grep("Tsunami",ignore.case=T,DataSum$EVTYPE)] <- "Tsunami"
Summarize data again after cleaning up Event type. Data thus obtained will be clean and ready for plotting and analysis
MeltData <- melt(DataSum, id=c(1), measure.vars=c(2,3,4,5))
SummarizedData <- dcast(MeltData, EVTYPE ~ variable, sum)
SummarizedData$TotalEcoDmg <- SummarizedData$PROPDMGTOTAL + SummarizedData$CROPDMGTOTAL
Generate plots for Weather Event vs Population Health Impact
qplot(x=EVTYPE, y=FATALITIES, data=SummarizedData, xlab = "Weather Event", ylab="# of Fatalities", main = "US - Weather Event v/s Fatalities (1950-2011)", fill=EVTYPE, geom = c("bar", "text"), label=FATALITIES, stat = "identity") + theme(axis.text.x=element_text(angle=90), legend.title=element_blank()) + theme(legend.position="")
qplot(x=EVTYPE, y=INJURIES, data=SummarizedData, xlab = "Weather Event", ylab="# of Injuries", main = "US - Weather Event v/s Injuries (1950-2011)", fill=EVTYPE, geom = c("bar", "text"), label=INJURIES, stat = "identity") + theme(axis.text.x=element_text(angle=90), legend.title=element_blank()) + theme(legend.position="")
Generate plots for Weather Event vs Economic (Property + Crop damage) Impact
qplot(x=EVTYPE, y=TotalEcoDmg/1000000, data=SummarizedData, xlab = "Weather Event", ylab="Damage in Million USD", main = "US - Weather Event v/s Economic Damage (1950-2011)", fill=EVTYPE, geom = c("bar", "text"), label=round(TotalEcoDmg/1000000,digits=1), stat = "identity") + theme(axis.text.x=element_text(angle=90), legend.title=element_blank()) + theme(legend.position="")