This markdown explains how the writer performed a data analysis to identify the most harmful events in terms of public health and economic damage. The most harmful events in terms of public health were decided by the number of casualities. The most harmful events in terms of economic damage were decided by the amount of property and crop damage. The analysis indicates that the top 5 most harmful events for public health were 1. Heat Wave, 2. Tropical Storm Gordon, 3. Wild Fires, 4. ThunderStorm, and 5. Tornadoes, Tsunami, Wind, Hail. The top 5 most economically impactful events were 1. Tornadoes, Tsunami, Wind, Hail, 2. Hurricane/Typhoon, 3.Hurricane/Opal, 4. River Flood and 5. Hurricane.
# Read the raw data
raw <- read.csv("./repdata%2Fdata%2FStormData.csv.bz2", stringsAsFactors = F)
This code chunk processes health related data.
# Get top 10 most health-wise impactful events
# Assign fatalities/injuries related data to health data frame
health <- raw[, c("EVTYPE", "FATALITIES", "INJURIES")]
# Combine the fatalities and injuries number
health$combined <- health$FATALITIES + health$INJURIES
# Take the mean of combined of each event type
meancombined <- with(health, aggregate(combined, list(Event = EVTYPE), mean, na.rm=T))
names(meancombined) <- c("Event", "Combined")
# Extract the event types with top 5 fatalities+injuries
topmeancombined <- meancombined[order(meancombined$Combined, decreasing=T)[1:5],]
This code chunk processes economic impact related data.
# Get top 10 of most economically impactful event
# Unitconver function spits out multiplier for crop or prop damanges depending on the unit
unitconvert <- function(x){
if (x=="K") {
return (1000)}
else if (x=="M"|x=="m"){
return (1000000)
}
else if (x=="B"){
return (1000000000)
}
else{
return(100)
}
}
# Assign property damange related data to prop data frame
econ <- raw[, c("EVTYPE","PROPDMG","PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
# Filter out rows with non-sensical units
econ <- econ[which(econ$PROPDMGEXP %in% c("K", "M", "B","m","h","H")),]
econ <- econ[which(econ$CROPDMGEXP %in% c("K", "M", "B","m","h","H")),]
# Get the multiplier for damage column using unitconvert function
econ$propmultiplier <- sapply(econ$PROPDMGEXP, unitconvert)
# Multiply the prop damange by multiplier
econ$PROPDMG_mult <- econ$PROPDMG*econ$propmultiplier
# Get the multiplier for damage column using unitconvert function
econ$cropmultiplier <- sapply(econ$CROPDMGEXP, unitconvert)
# Multiply the crop damange by multiplier
econ$CROPDMG_mult <- econ$CROPDMG*econ$cropmultiplier
# Combine
econ$combinedDMG <- econ$PROPDMG_mult+econ$CROPDMG_mult
# Take the mean of combined damage by event
meanEcon <- with(econ, aggregate(combinedDMG, list(event = EVTYPE), mean, na.rm=T))
names(meanEcon) <- c("Event", "CombinedDmg")
# Extract the event types with top 5 prop damange
topmeanEcon <- meanEcon[order(meanEcon$CombinedDmg, decreasing=T)[1:5],]
The analysis indicates that the top 5 most harmful events for public health were 1. Heat Wave, 2. Tropical Storm Gordon, 3. Wild Fires, 4. ThunderStorm, and 5. Tornadoes, Tsunami, Wind, Hail. , 3.Hurricane/Opal, 4. River Flood and 5. Hurricane.
# Create a barplot top 5 most impactful events wrt population health.
library(ggplot2)
topmeancombined$Event <- gsub(" ", "\n", topmeancombined$Event)
phealth <- ggplot(topmeancombined, aes(x=reorder(Event, Combined), y=Combined))
phealth + geom_bar(stat="identity") + labs(x= "Event", y="Casualties", title = "Top 5 most harmful events with respect to popultion health")+theme(axis.text.y=element_text(size=8))+coord_flip()
The top 5 most economically impactful events were 1. Tornadoes, Tsunami, Wind, Hail, 2. Hurricane/Typhoon
# Create a barplot top 5 most economically impactful events.
library(ggplot2)
topmeanEcon$Event <- gsub(" ", "\n", topmeanEcon$Event)
pEcon <- ggplot(topmeanEcon, aes(x=reorder(Event, CombinedDmg), y=CombinedDmg))
pEcon + geom_bar(stat="identity") + labs(x= "Event", y="Economical Damage", title = "Top 5 most economically impactful events")+theme(axis.text.y=element_text(size=8))+coord_flip()