fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = 'repdata%2Fdata%2FStormData.csv.bz2')
Data <- read.csv("repdata%2Fdata%2FStormData.csv.bz2")
# loading the packages
library(data.table)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Converting data.frame to table
DataDT <- as.data.table(Data)
#Checking the col names
colnames(DataDT)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
cols2Remove <- colnames(DataDT[, !c("EVTYPE","FATALITIES", "INJURIES","PROPDMG","PROPDMGEXP", "CROPDMG", "CROPDMGEXP")])
DataDT[, c(cols2Remove) := NULL]
# Using only data where fatalities or injuries occurred.
DataDT <- DataDT[(EVTYPE != "?" &
(INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE"
, "FATALITIES"
, "INJURIES"
, "PROPDMG"
, "PROPDMGEXP"
, "CROPDMG"
, "CROPDMGEXP") ]
##Converting Exponent Columns into Actual Exponents instead of (-,+, H, K, etc)
# Map property damage alphanumeric exponents to numeric values.
propDmgKey <- c("\"\"" = 10^0,
"-" = 10^0,
"+" = 10^0,
"0" = 10^0,
"1" = 10^1,
"2" = 10^2,
"3" = 10^3,
"4" = 10^4,
"5" = 10^5,
"6" = 10^6,
"7" = 10^7,
"8" = 10^8,
"9" = 10^9,
"H" = 10^2,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
# Map crop damage alphanumeric exponents to numeric values
cropDmgKey <- c("\"\"" = 10^0,
"?" = 10^0,
"0" = 10^0,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9)
DataDT[, PROPDMGEXP := propDmgKey[as.character(DataDT[,PROPDMGEXP])]]
DataDT[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]
DataDT[, CROPDMGEXP := cropDmgKey[as.character(DataDT[,CROPDMGEXP])] ]
DataDT[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]
#Making Economic Cost Columns
DataDT <- mutate(DataDT,propCost=PROPDMG * PROPDMGEXP,cropCost = CROPDMG * CROPDMGEXP)
#Calcuating Total Property and Crop Cost
totalCostDT <- DataDT[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]
#Calcuating Total Fatalities and Injuries
totalInjuriesDT <- DataDT[,.(FATALITIES =sum(FATALITIES ),INJURIES =sum(INJURIES ),TotalIF =sum(FATALITIES )+sum(INJURIES )),by=.(EVTYPE)]
###Events that are Most Harmful to Population Health
bad_stuff <- melt(totalInjuriesDT, id.vars="EVTYPE", variable.name = "bad_thing")
healthChart <- ggplot(bad_stuff, aes(x=reorder(EVTYPE, -value), y=value)) +
geom_bar(stat="identity", aes(fill=bad_thing), position="dodge") +
ylab("Frequency Count") + xlab("Event Type") +
theme(axis.text.x = element_text(angle=45, hjust=1)) + ggtitle("Top 10 US Killers") +
theme(plot.title = element_text(hjust = 0.5))
healthChart
###Events that have the Greatest Economic Consequences
econ_consequences <- melt(totalCostDT, id.vars="EVTYPE", variable.name = "Damage_Type")
head(econ_consequences, 5)
## EVTYPE Damage_Type value
## 1: TORNADO propCost 56935880688
## 2: TSTM WIND propCost 4484928495
## 3: HAIL propCost 15730367518
## 4: ICE STORM/FLASH FLOOD propCost 0
## 5: WINTER STORM propCost 6688497251
econChart <- ggplot(econ_consequences, aes(x=reorder(EVTYPE, -value), y=value))
# Plot data as bar chart
econChart = econChart + geom_bar(stat="identity", aes(fill=Damage_Type), position="dodge")
# Format y-axis scale and set y-axis label
econChart = econChart + ylab("Cost (dollars)")
# Set x-axis label
econChart = econChart + xlab("Event Type")
# Rotate x-axis tick labels
econChart = econChart + theme(axis.text.x = element_text(angle=45, hjust=1))
# Set chart title and center it
econChart = econChart + ggtitle("Top 10 US Storm Events causing Economic Consequences") + theme(plot.title = element_text(hjust = 0.5))
econChart