This study is conducted on National Weather Service Storm Data collected across United States for the period 1950-2011. The objective of the study is to find:
The raw data is downloaded from the course website 'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2' in zipped format and then loaded into memory for analysis.
rawdatafile <- tempfile()
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
destfile = rawdatafile, "curl")
data <- read.csv(rawdatafile)
The following attributes of the data are used for health impact analysis
The following attributes of the data are used for economy impact analysis
'CROPDMGEXP' & 'PROPDMGEXP' contains garbage values and hence they are filtered out before analysis.
validunits = c("k", "K", "m", "M", "b", "B")
cropdamage = data[data$CROPDMGEXP %in% c("k", "K", "m", "M", "b", "B"), ]
propdamage = data[data$PROPDMGEXP %in% c("k", "K", "m", "M", "b", "B"), ]
The units repsented in charecters are then replaced by their numeric equivalents.
levels(cropdamage$CROPDMGEXP)[levels(cropdamage$CROPDMGEXP) == "k" | levels(cropdamage$CROPDMGEXP) ==
"K"] <- 1000
levels(cropdamage$CROPDMGEXP)[levels(cropdamage$CROPDMGEXP) == "m" | levels(cropdamage$CROPDMGEXP) ==
"M"] <- 1e+06
levels(cropdamage$CROPDMGEXP)[levels(cropdamage$CROPDMGEXP) == "b" | levels(cropdamage$CROPDMGEXP) ==
"B"] <- 1e+09
levels(propdamage$PROPDMGEXP)[levels(propdamage$PROPDMGEXP) == "k" | levels(propdamage$PROPDMGEXP) ==
"K"] <- 1000
levels(propdamage$PROPDMGEXP)[levels(propdamage$PROPDMGEXP) == "m" | levels(propdamage$PROPDMGEXP) ==
"M"] <- 1e+06
levels(propdamage$PROPDMGEXP)[levels(propdamage$PROPDMGEXP) == "b" | levels(propdamage$PROPDMGEXP) ==
"B"] <- 1e+09
Total damage estimate is calculated by multiplying crop/poperty damages with their respective units.
cropdamage$estimate <- as.numeric(cropdamage$CROPDMG) * as.numeric(cropdamage$CROPDMGEXP)
propdamage$estimate <- as.numeric(propdamage$PROPDMG) * as.numeric(propdamage$PROPDMGEXP)
Adverse effect of an event on population health is adjudged by the number of fatalities/injuries caused by the event. Top 5 event types that have adverse effect on population health are shown in the chart below.
injuries <- sort(tapply(data$INJURIES, data$EVTYPE, sum), decreasing = TRUE)[1:5]
fatalities <- sort(tapply(data$FATALITIES, data$EVTYPE, sum), decreasing = TRUE)[1:5]
par(mfrow = c(1, 2))
colors <- c(1:5)
mp <- barplot(fatalities/1000, ylim = c(0, 6), xaxt = "n", col = colors, pch = 19,
ylab = "fatalities (in thousands)", xlab = "storm event type")
title(main = "Top 5 fatal storm events", cex.main = 1)
axis(1, labels = LETTERS[1:5], at = mp)
legend("topright", pch = "ABCDE", col = colors, text.col = colors, legend = names(fatalities),
cex = 0.75, pt.cex = 0.5)
mp <- barplot(injuries/1000, ylim = c(0, 100), xaxt = "n", col = colors, pch = 19,
ylab = "injuries (in thousands)", xlab = "storm event type")
title(main = "Top 5 fateful storm events", cex.main = 1)
axis(1, labels = LETTERS[1:5], at = mp)
legend("topright", pch = "ABCDE", col = colors, text.col = colors, legend = names(injuries),
cex = 0.75, pt.cex = 0.5)
Impact of an event on economy is adjudged by the total damage caused by the event. Top 5 event types that have adverse affect on economy is are in chart below.
cropdmg <- sort(tapply(cropdamage$estimate, cropdamage$EVTYPE, sum), decreasing = TRUE)[1:5]
propdmg <- sort(tapply(propdamage$estimate, propdamage$EVTYPE, sum), decreasing = TRUE)[1:5]
par(mfcol = c(1, 2))
colors <- c(1:5)
mp <- barplot(cropdmg/1e+06, ylim = c(0, 4), xaxt = "n", col = colors, pch = 19,
ylab = "crop damage estimate (in millions)", xlab = "storm event type")
title(main = "Top 5 storm events \ncausing maximum crop damage", cex.main = 1)
axis(1, labels = LETTERS[1:5], at = mp)
legend("topright", pch = "ABCDE", col = colors, text.col = colors, legend = names(cropdmg),
cex = 0.75, pt.cex = 0.5)
mp <- barplot(propdmg/1e+06, ylim = c(0, 60), xaxt = "n", col = colors, pch = 19,
ylab = "prop damage estimate (in millions)", xlab = "storm event type")
title(main = "Top 5 storm events \ncausing maximum prop damage", cex.main = 1)
axis(1, labels = LETTERS[1:5], at = mp)
legend("topright", pch = "ABCDE", col = colors, text.col = colors, legend = names(propdmg),
cex = 0.75, pt.cex = 0.5)
TORNADO, EXCESSIVE HEAT, LIGHTNING have the most adverst impact on population health while TORNADO, FLASH FLOOD, TSTM WIND have the greatest adverse impact on economy.
.