1. Pre Processing the Data
1.1 Load the data into R
setwd("~/Desktop/Data Science Specialization/data")
if(!file.exists("./data")){dir.create("./data")}
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl,destfile="./data/Dataset.zip")
unzip(zipfile="./data/Dataset.zip",exdir="./data")
## Warning in unzip(zipfile = "./data/Dataset.zip", exdir = "./data"): error 1
## in extracting from zip file
storm <- read.csv("~/Desktop/Data Science Specialization/StormData.csv")
1.2 Load the required R packages
library(plyr)
library(ggplot2)
library(grid)
library(gridExtra)
2 Data Processing
2.1 Review the questions to be answered
#1.Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
#2.Across the United States, which types of events have the greatest economic consequences?
2.3 Find Property Damage
unique(data$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
data$PROPEXP[data$PROPDMGEXP == "K"] <- 1000
data$PROPEXP[data$PROPDMGEXP == "M"] <- 1e+06
data$PROPEXP[data$PROPDMGEXP == ""] <- 1
data$PROPEXP[data$PROPDMGEXP == "B"] <- 1e+09
data$PROPEXP[data$PROPDMGEXP == "m"] <- 1e+06
data$PROPEXP[data$PROPDMGEXP == "0"] <- 1
data$PROPEXP[data$PROPDMGEXP == "5"] <- 1e+05
data$PROPEXP[data$PROPDMGEXP == "6"] <- 1e+06
data$PROPEXP[data$PROPDMGEXP == "4"] <- 10000
data$PROPEXP[data$PROPDMGEXP == "2"] <- 100
data$PROPEXP[data$PROPDMGEXP == "3"] <- 1000
data$PROPEXP[data$PROPDMGEXP == "h"] <- 100
data$PROPEXP[data$PROPDMGEXP == "7"] <- 1e+07
data$PROPEXP[data$PROPDMGEXP == "H"] <- 100
data$PROPEXP[data$PROPDMGEXP == "1"] <- 10
data$PROPEXP[data$PROPDMGEXP == "8"] <- 1e+08
data$PROPEXP[data$PROPDMGEXP == "+"] <- 0
data$PROPEXP[data$PROPDMGEXP == "-"] <- 0
data$PROPEXP[data$PROPDMGEXP == "?"] <- 0
data$PROPDMGVAL <- data$PROPDMG * data$PROPEXP
2.4 Find crop damage
unique(data$CROPDMGEXP)
## [1] M K m B ? 0 k 2
## Levels: ? 0 2 B k K m M
data$CROPEXP[data$CROPDMGEXP == "M"] <- 1e+06
data$CROPEXP[data$CROPDMGEXP == "K"] <- 1000
data$CROPEXP[data$CROPDMGEXP == "m"] <- 1e+06
data$CROPEXP[data$CROPDMGEXP == "B"] <- 1e+09
data$CROPEXP[data$CROPDMGEXP == "0"] <- 1
data$CROPEXP[data$CROPDMGEXP == "k"] <- 1000
data$CROPEXP[data$CROPDMGEXP == "2"] <- 100
data$CROPEXP[data$CROPDMGEXP == ""] <- 1
data$CROPEXP[data$CROPDMGEXP == "?"] <- 0
data$CROPDMGVAL <- data$CROPDMG * data$CROPEXP
2.5 Total the data by event
fatal <- aggregate(FATALITIES ~ EVTYPE, data, FUN = sum)
injury <- aggregate(INJURIES ~ EVTYPE, data, FUN = sum)
propdmg <- aggregate(PROPDMGVAL ~ EVTYPE, data, FUN = sum)
cropdmg <- aggregate(CROPDMGVAL ~ EVTYPE, data, FUN = sum)
2.6 Find and plot events with the highest events and fatalities (e.g., most harmful)
fatal8 <- fatal[order(-fatal$FATALITIES), ][1:8, ]
injury8 <- injury[order(-injury$INJURIES), ][1:8, ]
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(fatal8$FATALITIES, las = 3, names.arg = fatal8$EVTYPE, main = "Events with Highest Fatalities",
ylab = "Number of fatalities", col = "blue")
barplot(injury8$INJURIES, las = 3, names.arg = injury8$EVTYPE, main = "Events with Highest Injuries",
ylab = "Number of injuries", col = "blue")

2.7 Find and plot events with highest property damage
propdmg8 <- propdmg[order(-propdmg$PROPDMGVAL), ][1:8, ]
cropdmg8 <- cropdmg[order(-cropdmg$CROPDMGVAL), ][1:8, ]
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(propdmg8$PROPDMGVAL/(10^9), las = 3, names.arg = propdmg8$EVTYPE,
main = "Events with Highest Property Damages", ylab = "Damage Cost ($ billions)",
col = "blue")
barplot(cropdmg8$CROPDMGVAL/(10^9), las = 3, names.arg = cropdmg8$EVTYPE,
main = "Events With Highest Crop Damages", ylab = "Damage Cost ($ billions)",
col = "blue")

Results
# The weather events that were the most harmful were analyzed using fatalities and injuries as a proxy to "harmfulness." We discovered that Tornados by far cause the most fatalities *and* injuries compared to other weather events. Excessive heat and flash floods caused the second and third most fatalities, respectively. TSTM Wind and floods caused the second and third most injuries, respectively.
# The economic consequences of these weather events was analuzed using property damages and crop damages as a proxy to "economic consequence." Floods caused the most property damage, followed by hurricanes/typhoons and tornados. Droughts caused the most crop damage, followed by floods and riverfloods.
# Overall, tornados appear to be the most harmful weather event, and floods (considering property and crop damages together) appear to have the most *negative* economic consequences.