The goal of the assignment is to explore the NOAA Storm Database and explore the effects of severe weather events on both population and economy.The database covers the time period between 1950 and November 2011.
The following analysis investigates which types of severe weather events are most harmful on:
Information about the data used: Documentation
Download link: data
if(!("repdata%2Fdata%2FStormData.csv.bz2" %in% list.files()))
# if the file is not present in the working directory by the default name,
{
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
destfile = "./FILEZIP.bz2")
unzip("./FILEZIP.bz2")
}
DATA<-fread("data.csv")
# Finding the names of variables which are unnecessary
temp <- colnames(DATA[, !c("EVTYPE",
"FATALITIES",
"INJURIES",
"PROPDMG",
"PROPDMGEXP",
"CROPDMG",
"CROPDMGEXP")])
# Removing the variables permanently from the object
DATA[, c(temp) := NULL]
# Subsetting for values where the loss is positive
DATA <- DATA[(EVTYPE != "?" &
(INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0))]
Details regarding the variables PROPDMGEXP and CROPDMGEXP are given in the documentation.
Converting these representative symbols into actual values can help in numerical calculations.
# Change all damage exponents to uppercase.
temp <- c("PROPDMGEXP", "CROPDMGEXP")
DATA[, (temp) := c(lapply(.SD, toupper)), .SDcols = temp]
# Map property damage alphanumeric exponents to numeric values.
propertyDamage_KEY <- c("\"\"" = 10^0,
"-" = 10^0,
"+" = 10^0,
"0" = 10^0,
"1" = 10^1,
"2" = 10^2,
"3" = 10^3,
"4" = 10^4,
"5" = 10^5,
"6" = 10^6,
"7" = 10^7,
"8" = 10^8,
"9" = 10^9,
"H" = 10^2,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
# Map crop damage alphanumeric exponents to numeric values
cropDamage_KEY <- c("\"\"" = 10^0,
"?" = 10^0,
"0" = 10^0,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
DATA[, PROPDMGEXP := propertyDamage_KEY[as.character(DATA[,PROPDMGEXP])]]
DATA[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]
DATA[, CROPDMGEXP := cropDamage_KEY[as.character(DATA[,CROPDMGEXP])] ]
DATA[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
Let propertyCost = PROPDMG x PROPDMGEXP
Let cropCost = CROPDMG x CROPDMGEXP
DATA <- DATA[, .(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP,CROPDMG, CROPDMGEXP, propertyCost = PROPDMG * PROPDMGEXP ,cropCost = CROPDMG * CROPDMGEXP)]
totalCost is another data table which contains the total property cost, crop cost and their sum lost for every kind of event. The data is stored in descending order of the total cost lost for each event.
totalCost <- DATA[, .(property_cost = sum(propertyCost), crop_cost = sum(cropCost), total_cost = sum(propertyCost) + sum(cropCost)), by = .(EVTYPE)]
totalCost <- totalCost[order(-total_cost), ]
totalCost <- totalCost[1:10, ]
totalCasualty is a data table which contains the total number of lives lost and severe injuries substained for every kind of event. The data is stored in descending order of the total number of lives lost for each event.
totalCasualty <- DATA[, .(fatalityCount = sum(FATALITIES), injuryCount = sum(INJURIES), totalCasualty = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
totalCasualty <- totalCasualty[order(-fatalityCount),]
totalCasualty <- totalCasualty[1:10, ]
temp <- melt(totalCasualty, id.vars="EVTYPE",variable.name = "EVENT")
# Create chart
CHART <- ggplot(temp, aes(x=reorder(EVTYPE, -value), y=value))
# Plot data as bar chart
CHART = CHART + geom_bar(stat="identity", aes(fill=EVENT), position="dodge")
# setting labels
CHART = CHART + ylab("Frequency Count")
CHART = CHART + xlab("Event")
# Rotate X AXIS tick labels
CHART = CHART + theme(axis.text.x = element_text(angle=45, hjust=1))
CHART = CHART + ggtitle("Most harmful natural events") + theme(plot.title = element_text(hjust = 0.5))
CHART
temp <- melt(totalCost, id.vars="EVTYPE", variable.name = "DamageType")
# Create chart
CHART <- ggplot(temp, aes(x=reorder(EVTYPE, -value), y=value))
# Plot data as bar chart
CHART = CHART + geom_bar(stat="identity", aes(fill=DamageType), position="dodge")
# setting labels
CHART = CHART + xlab("Event")
CHART = CHART + ylab("Cost (in $)")
# Rotate X AXIS tick labels
CHART = CHART + theme(axis.text.x = element_text(angle=45, hjust=1))
CHART = CHART + ggtitle("Events causing maximum economic consequences") + theme(plot.title = element_text(hjust = 0.5))
CHART