Analysis of the Adverse Health and Economic Impacts of US Storms

reading the Data

fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = 'repdata%2Fdata%2FStormData.csv.bz2')
Data <- read.csv("repdata%2Fdata%2FStormData.csv.bz2")
# loading the packages
library(data.table)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Converting data.frame to table
DataDT <- as.data.table(Data)
#Checking the col names 
colnames(DataDT)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
cols2Remove <- colnames(DataDT[, !c("EVTYPE","FATALITIES", "INJURIES","PROPDMG","PROPDMGEXP", "CROPDMG", "CROPDMGEXP")])
DataDT[, c(cols2Remove) := NULL]

# Using only data  where fatalities or injuries occurred.

DataDT <- DataDT[(EVTYPE != "?" & 
             (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)), c("EVTYPE"
                                                                            , "FATALITIES"
                                                                            , "INJURIES"
                                                                            , "PROPDMG"
                                                                            , "PROPDMGEXP"
                                                                            , "CROPDMG"
                                                                            , "CROPDMGEXP") ]

##Converting Exponent Columns into Actual Exponents instead of (-,+, H, K, etc)

# Map property damage alphanumeric exponents to numeric values.
propDmgKey <-  c("\"\"" = 10^0,
                 "-" = 10^0, 
                 "+" = 10^0,
                 "0" = 10^0,
                 "1" = 10^1,
                 "2" = 10^2,
                 "3" = 10^3,
                 "4" = 10^4,
                 "5" = 10^5,
                 "6" = 10^6,
                 "7" = 10^7,
                 "8" = 10^8,
                 "9" = 10^9,
                 "H" = 10^2,
                 "K" = 10^3,
                 "M" = 10^6,
                 "B" = 10^9)
# Map crop damage alphanumeric exponents to numeric values
cropDmgKey <-  c("\"\"" = 10^0,
                "?" = 10^0, 
                "0" = 10^0,
                "K" = 10^3,
                "M" = 10^6,
                "B" = 10^9)
DataDT[, PROPDMGEXP := propDmgKey[as.character(DataDT[,PROPDMGEXP])]]
DataDT[is.na(PROPDMGEXP), PROPDMGEXP := 10^0 ]

DataDT[, CROPDMGEXP := cropDmgKey[as.character(DataDT[,CROPDMGEXP])] ]
DataDT[is.na(CROPDMGEXP), CROPDMGEXP := 10^0 ]

#Making Economic Cost Columns

DataDT <- mutate(DataDT,propCost=PROPDMG * PROPDMGEXP,cropCost = CROPDMG * CROPDMGEXP)

#Calcuating Total Property and Crop Cost

totalCostDT <- DataDT[, .(propCost = sum(propCost), cropCost = sum(cropCost), Total_Cost = sum(propCost) + sum(cropCost)), by = .(EVTYPE)]

#Calcuating Total Fatalities and Injuries

totalInjuriesDT <- DataDT[,.(FATALITIES =sum(FATALITIES ),INJURIES =sum(INJURIES ),TotalIF =sum(FATALITIES )+sum(INJURIES )),by=.(EVTYPE)]

###Events that are Most Harmful to Population Health

bad_stuff <- melt(totalInjuriesDT, id.vars="EVTYPE", variable.name = "bad_thing")


 healthChart <- ggplot(bad_stuff, aes(x=reorder(EVTYPE, -value), y=value)) + 
  geom_bar(stat="identity", aes(fill=bad_thing), position="dodge") +
  ylab("Frequency Count") +  xlab("Event Type") + 
 theme(axis.text.x = element_text(angle=45, hjust=1)) + ggtitle("Top 10 US Killers") + 
  theme(plot.title = element_text(hjust = 0.5))
healthChart

###Events that have the Greatest Economic Consequences

econ_consequences <- melt(totalCostDT, id.vars="EVTYPE", variable.name = "Damage_Type")
head(econ_consequences, 5)
##                   EVTYPE Damage_Type       value
## 1:               TORNADO    propCost 56935880688
## 2:             TSTM WIND    propCost  4484928495
## 3:                  HAIL    propCost 15730367518
## 4: ICE STORM/FLASH FLOOD    propCost           0
## 5:          WINTER STORM    propCost  6688497251
econChart <- ggplot(econ_consequences, aes(x=reorder(EVTYPE, -value), y=value))

# Plot data as bar chart
econChart = econChart + geom_bar(stat="identity", aes(fill=Damage_Type), position="dodge")

# Format y-axis scale and set y-axis label
econChart = econChart + ylab("Cost (dollars)") 

# Set x-axis label
econChart = econChart + xlab("Event Type") 

# Rotate x-axis tick labels 
econChart = econChart + theme(axis.text.x = element_text(angle=45, hjust=1))

# Set chart title and center it
econChart = econChart + ggtitle("Top 10 US Storm Events causing Economic Consequences") + theme(plot.title = element_text(hjust = 0.5))

econChart